whispercpp 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +79 -25
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/addon.cpp +19 -19
- data/ext/sources/examples/addon.node/index.js +7 -5
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/bench/bench.cpp +26 -16
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +122 -111
- data/ext/sources/examples/command/command.cpp +26 -24
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/lsp/lsp.cpp +19 -17
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +34 -24
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +4 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +7 -3
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +113 -7
- data/ext/sources/examples/talk-llama/llama-adapter.h +13 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2136 -1491
- data/ext/sources/examples/talk-llama/llama-arch.h +125 -3
- data/ext/sources/examples/talk-llama/llama-batch.cpp +174 -100
- data/ext/sources/examples/talk-llama/llama-batch.h +46 -20
- data/ext/sources/examples/talk-llama/llama-chat.cpp +199 -8
- data/ext/sources/examples/talk-llama/llama-chat.h +11 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +1213 -413
- data/ext/sources/examples/talk-llama/llama-context.h +99 -36
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -4
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +883 -294
- data/ext/sources/examples/talk-llama/llama-graph.h +361 -161
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +144 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +100 -23
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +328 -0
- data/ext/sources/examples/talk-llama/{llama-kv-cache-unified-iswa.h → llama-kv-cache-iswa.h} +38 -29
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2100 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +373 -27
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +124 -30
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +63 -41
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +30 -29
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +77 -35
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +15 -16
- data/ext/sources/examples/talk-llama/llama-memory.h +16 -10
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +93 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +9 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +3369 -10145
- data/ext/sources/examples/talk-llama/llama-model.h +104 -12
- data/ext/sources/examples/talk-llama/llama-quant.cpp +53 -30
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1520 -324
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +562 -39
- data/ext/sources/examples/talk-llama/llama-vocab.h +50 -0
- data/ext/sources/examples/talk-llama/llama.cpp +794 -12
- data/ext/sources/examples/talk-llama/llama.h +246 -190
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +9 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +309 -16
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +4 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +18 -17
- data/ext/sources/ggml/CMakeLists.txt +135 -79
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +21 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -1
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +406 -23
- data/ext/sources/ggml/src/CMakeLists.txt +99 -13
- data/ext/sources/ggml/src/ggml-alloc.c +368 -161
- data/ext/sources/ggml/src/ggml-backend-impl.h +5 -5
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +55 -14
- data/ext/sources/ggml/src/ggml-backend.cpp +290 -57
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +10 -13
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +59 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2586 -1917
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +348 -309
- data/ext/sources/ggml/src/ggml-cann/common.h +350 -133
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +894 -625
- data/ext/sources/ggml/src/ggml-common.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +167 -75
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +560 -622
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1002 -270
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +107 -587
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +162 -589
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +373 -486
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +3 -58
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +521 -353
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +54 -314
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +184 -675
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +4682 -1660
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +82 -4
- data/ext/sources/ggml/src/ggml-cpu/common.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +18 -9
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +263 -111
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +39 -28
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +683 -82
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +38 -43
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +435 -119
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1234 -1182
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +2167 -1480
- data/ext/sources/ggml/src/ggml-cpu/ops.h +10 -12
- data/ext/sources/ggml/src/ggml-cpu/quants.c +35 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1132 -81
- data/ext/sources/ggml/src/ggml-cpu/repack.h +36 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +120 -93
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/traits.cpp +2 -2
- data/ext/sources/ggml/src/ggml-cpu/traits.h +1 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +294 -27
- data/ext/sources/ggml/src/ggml-cpu/vec.h +606 -48
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +92 -17
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +588 -128
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +95 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +25 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +335 -485
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +519 -378
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +750 -637
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1244 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +98 -61
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +230 -197
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1557 -294
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +57 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +915 -69
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +171 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +835 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +109 -67
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1601 -733
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +802 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +286 -149
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +86 -32
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +163 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +207 -98
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +330 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +325 -61
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -12
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +291 -104
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +21 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +351 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +21 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +189 -5
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +44 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +248 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +8 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +70 -37
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +10 -3
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +16 -13
- data/ext/sources/ggml/src/ggml-impl.h +186 -15
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -7
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +609 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1743 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +273 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1686 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +356 -61
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +4161 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +724 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +4495 -1876
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +21 -9
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +29 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4005 -427
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +147 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +177 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +49 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +33 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +111 -16
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +497 -195
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +6 -5
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +50 -30
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +10 -4
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +200 -99
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +72 -309
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +67 -49
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +77 -34
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +397 -314
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +12 -2
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +14 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +9 -6
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +643 -413
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +80 -60
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +223 -132
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +230 -55
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +8 -9
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +65 -59
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +330 -165
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +12 -6
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +60 -6
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +16 -12
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +7398 -2635
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +43 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +15 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +56 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +5 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +67 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +158 -16
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +38 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +103 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +220 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +139 -45
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +113 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +75 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +19 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +33 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +227 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +20 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -556
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +230 -51
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +566 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +90 -223
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +195 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +41 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +59 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +104 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -52
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +30 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +16 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +435 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +148 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +619 -177
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3087 -0
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +147 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +901 -129
- data/ext/sources/ggml/src/gguf.cpp +8 -1
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +124 -81
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +7 -5
- data/ext/sources/tests/test-vad.cpp +3 -3
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +126 -2
- data/test/test_params.rb +24 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +8 -1
- data/whispercpp.gemspec +1 -1
- metadata +439 -179
- data/ext/sources/build-xcframework.sh +0 -547
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +0 -279
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +0 -1841
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +0 -303
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -506
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +0 -11
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -6280
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +0 -162
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -118
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -99
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -58
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
data/ext/sources/ggml/src/ggml.c
CHANGED
|
@@ -53,13 +53,15 @@
|
|
|
53
53
|
|
|
54
54
|
#define UNUSED GGML_UNUSED
|
|
55
55
|
|
|
56
|
+
// Needed for ggml_fp32_to_bf16_row()
|
|
57
|
+
#if defined(__AVX512BF16__)
|
|
56
58
|
#if defined(_MSC_VER)
|
|
57
|
-
#define m512bh(p) p
|
|
58
59
|
#define m512i(p) p
|
|
59
60
|
#else
|
|
60
|
-
#
|
|
61
|
+
#include <immintrin.h>
|
|
61
62
|
#define m512i(p) (__m512i)(p)
|
|
62
|
-
#endif
|
|
63
|
+
#endif // defined(_MSC_VER)
|
|
64
|
+
#endif // defined(__AVX512BF16__)
|
|
63
65
|
|
|
64
66
|
#if defined(__linux__) || \
|
|
65
67
|
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
|
@@ -124,6 +126,13 @@ static void ggml_print_backtrace_symbols(void) {
|
|
|
124
126
|
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
125
127
|
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
|
126
128
|
}
|
|
129
|
+
#elif defined(__APPLE__)
|
|
130
|
+
#include <execinfo.h>
|
|
131
|
+
static void ggml_print_backtrace_symbols(void) {
|
|
132
|
+
void * trace[100];
|
|
133
|
+
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
134
|
+
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
|
135
|
+
}
|
|
127
136
|
#else
|
|
128
137
|
static void ggml_print_backtrace_symbols(void) {
|
|
129
138
|
// platform not supported
|
|
@@ -135,6 +144,20 @@ void ggml_print_backtrace(void) {
|
|
|
135
144
|
if (GGML_NO_BACKTRACE) {
|
|
136
145
|
return;
|
|
137
146
|
}
|
|
147
|
+
#if defined(__APPLE__)
|
|
148
|
+
// On macOS, fork+debugger attachment is problematic due to:
|
|
149
|
+
// 1. libdispatch "poisons" forked child processes
|
|
150
|
+
// 2. lldb has issues attaching to parent from forked child
|
|
151
|
+
// Use simple backtrace() instead to avoid Terminal.app crashes
|
|
152
|
+
const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
|
|
153
|
+
if (!GGML_BACKTRACE_LLDB) {
|
|
154
|
+
fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
|
|
155
|
+
fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
|
|
156
|
+
fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
|
|
157
|
+
ggml_print_backtrace_symbols();
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
#endif
|
|
138
161
|
#if defined(__linux__)
|
|
139
162
|
FILE * f = fopen("/proc/self/status", "r");
|
|
140
163
|
size_t size = 0;
|
|
@@ -202,19 +225,34 @@ void ggml_print_backtrace(void) {
|
|
|
202
225
|
}
|
|
203
226
|
#endif
|
|
204
227
|
|
|
228
|
+
static ggml_abort_callback_t g_abort_callback = NULL;
|
|
229
|
+
|
|
230
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
231
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
|
|
232
|
+
ggml_abort_callback_t ret_val = g_abort_callback;
|
|
233
|
+
g_abort_callback = callback;
|
|
234
|
+
return ret_val;
|
|
235
|
+
}
|
|
236
|
+
|
|
205
237
|
void ggml_abort(const char * file, int line, const char * fmt, ...) {
|
|
206
238
|
fflush(stdout);
|
|
207
239
|
|
|
208
|
-
|
|
240
|
+
char message[2048];
|
|
241
|
+
int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
|
|
209
242
|
|
|
210
243
|
va_list args;
|
|
211
244
|
va_start(args, fmt);
|
|
212
|
-
|
|
245
|
+
vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
|
|
213
246
|
va_end(args);
|
|
214
247
|
|
|
215
|
-
|
|
248
|
+
if (g_abort_callback) {
|
|
249
|
+
g_abort_callback(message);
|
|
250
|
+
} else {
|
|
251
|
+
// default: print error and backtrace to stderr
|
|
252
|
+
fprintf(stderr, "%s\n", message);
|
|
253
|
+
ggml_print_backtrace();
|
|
254
|
+
}
|
|
216
255
|
|
|
217
|
-
ggml_print_backtrace();
|
|
218
256
|
abort();
|
|
219
257
|
}
|
|
220
258
|
|
|
@@ -458,6 +496,14 @@ bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
|
|
|
458
496
|
return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
|
|
459
497
|
}
|
|
460
498
|
|
|
499
|
+
const char * ggml_version(void) {
|
|
500
|
+
return GGML_VERSION;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
const char * ggml_commit(void) {
|
|
504
|
+
return GGML_COMMIT;
|
|
505
|
+
}
|
|
506
|
+
|
|
461
507
|
//
|
|
462
508
|
// timing
|
|
463
509
|
//
|
|
@@ -559,9 +605,6 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
|
|
|
559
605
|
#endif
|
|
560
606
|
|
|
561
607
|
}
|
|
562
|
-
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
|
|
563
|
-
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
|
|
564
|
-
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
|
|
565
608
|
|
|
566
609
|
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|
567
610
|
[GGML_TYPE_I8] = {
|
|
@@ -667,6 +710,14 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
|
|
667
710
|
.is_quantized = true,
|
|
668
711
|
.from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
|
|
669
712
|
},
|
|
713
|
+
[GGML_TYPE_MXFP4] = {
|
|
714
|
+
.type_name = "mxfp4",
|
|
715
|
+
.blck_size = QK_MXFP4,
|
|
716
|
+
.type_size = sizeof(block_mxfp4),
|
|
717
|
+
.is_quantized = true,
|
|
718
|
+
.to_float = (ggml_to_float_t) dequantize_row_mxfp4,
|
|
719
|
+
.from_float_ref = (ggml_from_float_t)quantize_row_mxfp4_ref,
|
|
720
|
+
},
|
|
670
721
|
[GGML_TYPE_Q2_K] = {
|
|
671
722
|
.type_name = "q2_K",
|
|
672
723
|
.blck_size = QK_K,
|
|
@@ -894,6 +945,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
894
945
|
|
|
895
946
|
"DUP",
|
|
896
947
|
"ADD",
|
|
948
|
+
"ADD_ID",
|
|
897
949
|
"ADD1",
|
|
898
950
|
"ACC",
|
|
899
951
|
"SUB",
|
|
@@ -906,6 +958,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
906
958
|
"COS",
|
|
907
959
|
"SUM",
|
|
908
960
|
"SUM_ROWS",
|
|
961
|
+
"CUMSUM",
|
|
909
962
|
"MEAN",
|
|
910
963
|
"ARGMAX",
|
|
911
964
|
"COUNT_EQUAL",
|
|
@@ -945,7 +998,9 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
945
998
|
"CONV_TRANSPOSE_1D",
|
|
946
999
|
"IM2COL",
|
|
947
1000
|
"IM2COL_BACK",
|
|
1001
|
+
"IM2COL_3D",
|
|
948
1002
|
"CONV_2D",
|
|
1003
|
+
"CONV_3D",
|
|
949
1004
|
"CONV_2D_DW",
|
|
950
1005
|
"CONV_TRANSPOSE_2D",
|
|
951
1006
|
"POOL_1D",
|
|
@@ -958,7 +1013,10 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
958
1013
|
"ARANGE",
|
|
959
1014
|
"TIMESTEP_EMBEDDING",
|
|
960
1015
|
"ARGSORT",
|
|
1016
|
+
"TOP_K",
|
|
961
1017
|
"LEAKY_RELU",
|
|
1018
|
+
"TRI",
|
|
1019
|
+
"FILL",
|
|
962
1020
|
|
|
963
1021
|
"FLASH_ATTN_EXT",
|
|
964
1022
|
"FLASH_ATTN_BACK",
|
|
@@ -971,6 +1029,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
971
1029
|
"RWKV_WKV6",
|
|
972
1030
|
"GATED_LINEAR_ATTN",
|
|
973
1031
|
"RWKV_WKV7",
|
|
1032
|
+
"SOLVE_TRI",
|
|
974
1033
|
|
|
975
1034
|
"UNARY",
|
|
976
1035
|
|
|
@@ -983,17 +1042,19 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
983
1042
|
"CROSS_ENTROPY_LOSS",
|
|
984
1043
|
"CROSS_ENTROPY_LOSS_BACK",
|
|
985
1044
|
"OPT_STEP_ADAMW",
|
|
1045
|
+
"OPT_STEP_SGD",
|
|
986
1046
|
|
|
987
1047
|
"GLU",
|
|
988
1048
|
};
|
|
989
1049
|
|
|
990
|
-
static_assert(GGML_OP_COUNT ==
|
|
1050
|
+
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
|
|
991
1051
|
|
|
992
1052
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
993
1053
|
"none",
|
|
994
1054
|
|
|
995
1055
|
"x",
|
|
996
1056
|
"x+y",
|
|
1057
|
+
"x[i]+y",
|
|
997
1058
|
"x+y",
|
|
998
1059
|
"view(x,nb,offset)+=y->x",
|
|
999
1060
|
"x-y",
|
|
@@ -1006,6 +1067,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1006
1067
|
"cos(x)",
|
|
1007
1068
|
"Σx",
|
|
1008
1069
|
"Σx_k",
|
|
1070
|
+
"cumsum(x)",
|
|
1009
1071
|
"Σx/n",
|
|
1010
1072
|
"argmax(x)",
|
|
1011
1073
|
"count_equal(x)",
|
|
@@ -1045,7 +1107,9 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1045
1107
|
"conv_transpose_1d(x)",
|
|
1046
1108
|
"im2col(x)",
|
|
1047
1109
|
"im2col_back(x)",
|
|
1110
|
+
"im2col_3d(x)",
|
|
1048
1111
|
"conv_2d(x)",
|
|
1112
|
+
"conv_3d(x)",
|
|
1049
1113
|
"conv_2d_dw(x)",
|
|
1050
1114
|
"conv_transpose_2d(x)",
|
|
1051
1115
|
"pool_1d(x)",
|
|
@@ -1058,7 +1122,10 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1058
1122
|
"arange(start, stop, step)",
|
|
1059
1123
|
"timestep_embedding(timesteps, dim, max_period)",
|
|
1060
1124
|
"argsort(x)",
|
|
1125
|
+
"top_k(x)",
|
|
1061
1126
|
"leaky_relu(x)",
|
|
1127
|
+
"tri(x)",
|
|
1128
|
+
"fill(x, c)",
|
|
1062
1129
|
|
|
1063
1130
|
"flash_attn_ext(x)",
|
|
1064
1131
|
"flash_attn_back(x)",
|
|
@@ -1071,6 +1138,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1071
1138
|
"rwkv_wkv6(k, v, r, tf, td, s)",
|
|
1072
1139
|
"gated_linear_attn(k, v, q, gate, s)",
|
|
1073
1140
|
"rwkv_wkv7(r, w, k, v, a, b, s)",
|
|
1141
|
+
"A X = B, A triangular, solve X",
|
|
1074
1142
|
|
|
1075
1143
|
"unary(x)",
|
|
1076
1144
|
|
|
@@ -1083,15 +1151,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1083
1151
|
"cross_entropy_loss(x,y)",
|
|
1084
1152
|
"cross_entropy_loss_back(x,y)",
|
|
1085
1153
|
"adamw(x)",
|
|
1154
|
+
"sgd(x)",
|
|
1086
1155
|
|
|
1087
1156
|
"glu(x)",
|
|
1088
1157
|
};
|
|
1089
1158
|
|
|
1090
|
-
static_assert(GGML_OP_COUNT ==
|
|
1159
|
+
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
|
|
1091
1160
|
|
|
1092
1161
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
|
1093
1162
|
|
|
1094
|
-
|
|
1095
1163
|
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
1096
1164
|
"ABS",
|
|
1097
1165
|
"SGN",
|
|
@@ -1107,19 +1175,28 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
|
1107
1175
|
"HARDSWISH",
|
|
1108
1176
|
"HARDSIGMOID",
|
|
1109
1177
|
"EXP",
|
|
1178
|
+
"EXPM1",
|
|
1179
|
+
"SOFTPLUS",
|
|
1110
1180
|
"GELU_ERF",
|
|
1181
|
+
"XIELU",
|
|
1182
|
+
"FLOOR",
|
|
1183
|
+
"CEIL",
|
|
1184
|
+
"ROUND",
|
|
1185
|
+
"TRUNC",
|
|
1111
1186
|
};
|
|
1112
1187
|
|
|
1113
|
-
static_assert(GGML_UNARY_OP_COUNT ==
|
|
1114
|
-
|
|
1188
|
+
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
|
|
1115
1189
|
|
|
1116
1190
|
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
|
|
1117
1191
|
"REGLU",
|
|
1118
1192
|
"GEGLU",
|
|
1119
1193
|
"SWIGLU",
|
|
1194
|
+
"SWIGLU_OAI",
|
|
1195
|
+
"GEGLU_ERF",
|
|
1196
|
+
"GEGLU_QUICK",
|
|
1120
1197
|
};
|
|
1121
1198
|
|
|
1122
|
-
static_assert(GGML_GLU_OP_COUNT ==
|
|
1199
|
+
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
|
|
1123
1200
|
|
|
1124
1201
|
|
|
1125
1202
|
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
|
@@ -1287,6 +1364,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
|
1287
1364
|
case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
|
|
1288
1365
|
case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
|
|
1289
1366
|
case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
|
|
1367
|
+
case GGML_FTYPE_MOSTLY_MXFP4: wtype = GGML_TYPE_MXFP4; break;
|
|
1290
1368
|
case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
|
|
1291
1369
|
case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
|
|
1292
1370
|
case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
|
|
@@ -1937,6 +2015,27 @@ struct ggml_tensor * ggml_add_cast(
|
|
|
1937
2015
|
return ggml_add_cast_impl(ctx, a, b, type);
|
|
1938
2016
|
}
|
|
1939
2017
|
|
|
2018
|
+
struct ggml_tensor * ggml_add_id(
|
|
2019
|
+
struct ggml_context * ctx,
|
|
2020
|
+
struct ggml_tensor * a,
|
|
2021
|
+
struct ggml_tensor * b,
|
|
2022
|
+
struct ggml_tensor * ids) {
|
|
2023
|
+
|
|
2024
|
+
GGML_ASSERT(a->ne[0] == b->ne[0]);
|
|
2025
|
+
GGML_ASSERT(a->ne[1] == ids->ne[0]);
|
|
2026
|
+
GGML_ASSERT(a->ne[2] == ids->ne[1]);
|
|
2027
|
+
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
|
2028
|
+
|
|
2029
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2030
|
+
|
|
2031
|
+
result->op = GGML_OP_ADD_ID;
|
|
2032
|
+
result->src[0] = a;
|
|
2033
|
+
result->src[1] = b;
|
|
2034
|
+
result->src[2] = ids;
|
|
2035
|
+
|
|
2036
|
+
return result;
|
|
2037
|
+
}
|
|
2038
|
+
|
|
1940
2039
|
// ggml_add1
|
|
1941
2040
|
|
|
1942
2041
|
static struct ggml_tensor * ggml_add1_impl(
|
|
@@ -2194,6 +2293,30 @@ struct ggml_tensor * ggml_log_inplace(
|
|
|
2194
2293
|
return ggml_log_impl(ctx, a, true);
|
|
2195
2294
|
}
|
|
2196
2295
|
|
|
2296
|
+
struct ggml_tensor * ggml_expm1(
|
|
2297
|
+
struct ggml_context * ctx,
|
|
2298
|
+
struct ggml_tensor * a) {
|
|
2299
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
|
|
2300
|
+
}
|
|
2301
|
+
|
|
2302
|
+
struct ggml_tensor * ggml_expm1_inplace(
|
|
2303
|
+
struct ggml_context * ctx,
|
|
2304
|
+
struct ggml_tensor * a) {
|
|
2305
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
|
|
2306
|
+
}
|
|
2307
|
+
|
|
2308
|
+
struct ggml_tensor * ggml_softplus(
|
|
2309
|
+
struct ggml_context * ctx,
|
|
2310
|
+
struct ggml_tensor * a) {
|
|
2311
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
|
2312
|
+
}
|
|
2313
|
+
|
|
2314
|
+
struct ggml_tensor * ggml_softplus_inplace(
|
|
2315
|
+
struct ggml_context * ctx,
|
|
2316
|
+
struct ggml_tensor * a) {
|
|
2317
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
|
2318
|
+
}
|
|
2319
|
+
|
|
2197
2320
|
// ggml_sin
|
|
2198
2321
|
|
|
2199
2322
|
static struct ggml_tensor * ggml_sin_impl(
|
|
@@ -2277,6 +2400,21 @@ struct ggml_tensor * ggml_sum_rows(
|
|
|
2277
2400
|
return result;
|
|
2278
2401
|
}
|
|
2279
2402
|
|
|
2403
|
+
// ggml_cumsum
|
|
2404
|
+
|
|
2405
|
+
struct ggml_tensor * ggml_cumsum(
|
|
2406
|
+
struct ggml_context * ctx,
|
|
2407
|
+
struct ggml_tensor * a) {
|
|
2408
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
2409
|
+
|
|
2410
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2411
|
+
|
|
2412
|
+
result->op = GGML_OP_CUMSUM;
|
|
2413
|
+
result->src[0] = a;
|
|
2414
|
+
|
|
2415
|
+
return result;
|
|
2416
|
+
}
|
|
2417
|
+
|
|
2280
2418
|
// ggml_mean
|
|
2281
2419
|
|
|
2282
2420
|
struct ggml_tensor * ggml_mean(
|
|
@@ -2592,6 +2730,29 @@ struct ggml_tensor * ggml_silu_inplace(
|
|
|
2592
2730
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
|
2593
2731
|
}
|
|
2594
2732
|
|
|
2733
|
+
// ggml_xielu
|
|
2734
|
+
|
|
2735
|
+
struct ggml_tensor * ggml_xielu(
|
|
2736
|
+
struct ggml_context * ctx,
|
|
2737
|
+
struct ggml_tensor * a,
|
|
2738
|
+
float alpha_n,
|
|
2739
|
+
float alpha_p,
|
|
2740
|
+
float beta,
|
|
2741
|
+
float eps) {
|
|
2742
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2743
|
+
|
|
2744
|
+
ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
|
|
2745
|
+
ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
|
|
2746
|
+
ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
|
|
2747
|
+
ggml_set_op_params_f32(result, 3, beta);
|
|
2748
|
+
ggml_set_op_params_f32(result, 4, eps);
|
|
2749
|
+
|
|
2750
|
+
result->op = GGML_OP_UNARY;
|
|
2751
|
+
result->src[0] = a;
|
|
2752
|
+
|
|
2753
|
+
return result;
|
|
2754
|
+
}
|
|
2755
|
+
|
|
2595
2756
|
// ggml_silu_back
|
|
2596
2757
|
|
|
2597
2758
|
struct ggml_tensor * ggml_silu_back(
|
|
@@ -2666,6 +2827,62 @@ static struct ggml_tensor * ggml_glu_impl(
|
|
|
2666
2827
|
return result;
|
|
2667
2828
|
}
|
|
2668
2829
|
|
|
2830
|
+
// ggml_floor
|
|
2831
|
+
|
|
2832
|
+
struct ggml_tensor * ggml_floor(
|
|
2833
|
+
struct ggml_context * ctx,
|
|
2834
|
+
struct ggml_tensor * a) {
|
|
2835
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
|
|
2836
|
+
}
|
|
2837
|
+
|
|
2838
|
+
struct ggml_tensor * ggml_floor_inplace(
|
|
2839
|
+
struct ggml_context * ctx,
|
|
2840
|
+
struct ggml_tensor * a) {
|
|
2841
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
|
|
2842
|
+
}
|
|
2843
|
+
|
|
2844
|
+
// ggml_ceil
|
|
2845
|
+
|
|
2846
|
+
struct ggml_tensor * ggml_ceil(
|
|
2847
|
+
struct ggml_context * ctx,
|
|
2848
|
+
struct ggml_tensor * a) {
|
|
2849
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
|
|
2850
|
+
}
|
|
2851
|
+
|
|
2852
|
+
struct ggml_tensor * ggml_ceil_inplace(
|
|
2853
|
+
struct ggml_context * ctx,
|
|
2854
|
+
struct ggml_tensor * a) {
|
|
2855
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
|
|
2856
|
+
}
|
|
2857
|
+
|
|
2858
|
+
//ggml_round
|
|
2859
|
+
|
|
2860
|
+
struct ggml_tensor * ggml_round(
|
|
2861
|
+
struct ggml_context * ctx,
|
|
2862
|
+
struct ggml_tensor * a) {
|
|
2863
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
|
|
2864
|
+
}
|
|
2865
|
+
|
|
2866
|
+
struct ggml_tensor * ggml_round_inplace(
|
|
2867
|
+
struct ggml_context * ctx,
|
|
2868
|
+
struct ggml_tensor * a) {
|
|
2869
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
|
|
2870
|
+
}
|
|
2871
|
+
|
|
2872
|
+
//ggml_trunc
|
|
2873
|
+
|
|
2874
|
+
struct ggml_tensor * ggml_trunc(
|
|
2875
|
+
struct ggml_context * ctx,
|
|
2876
|
+
struct ggml_tensor * a) {
|
|
2877
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
|
|
2878
|
+
}
|
|
2879
|
+
|
|
2880
|
+
struct ggml_tensor * ggml_trunc_inplace(
|
|
2881
|
+
struct ggml_context * ctx,
|
|
2882
|
+
struct ggml_tensor * a) {
|
|
2883
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
|
|
2884
|
+
}
|
|
2885
|
+
|
|
2669
2886
|
struct ggml_tensor * ggml_glu(
|
|
2670
2887
|
struct ggml_context * ctx,
|
|
2671
2888
|
struct ggml_tensor * a,
|
|
@@ -2745,6 +2962,61 @@ struct ggml_tensor * ggml_swiglu_split(
|
|
|
2745
2962
|
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
|
|
2746
2963
|
}
|
|
2747
2964
|
|
|
2965
|
+
// ggml_geglu_erf
|
|
2966
|
+
|
|
2967
|
+
struct ggml_tensor * ggml_geglu_erf(
|
|
2968
|
+
struct ggml_context * ctx,
|
|
2969
|
+
struct ggml_tensor * a) {
|
|
2970
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
|
|
2971
|
+
}
|
|
2972
|
+
|
|
2973
|
+
struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
2974
|
+
struct ggml_context * ctx,
|
|
2975
|
+
struct ggml_tensor * a) {
|
|
2976
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
|
|
2977
|
+
}
|
|
2978
|
+
|
|
2979
|
+
struct ggml_tensor * ggml_geglu_erf_split(
|
|
2980
|
+
struct ggml_context * ctx,
|
|
2981
|
+
struct ggml_tensor * a,
|
|
2982
|
+
struct ggml_tensor * b) {
|
|
2983
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
|
|
2984
|
+
}
|
|
2985
|
+
|
|
2986
|
+
// ggml_geglu_quick
|
|
2987
|
+
|
|
2988
|
+
struct ggml_tensor * ggml_geglu_quick(
|
|
2989
|
+
struct ggml_context * ctx,
|
|
2990
|
+
struct ggml_tensor * a) {
|
|
2991
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
|
|
2992
|
+
}
|
|
2993
|
+
|
|
2994
|
+
struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
2995
|
+
struct ggml_context * ctx,
|
|
2996
|
+
struct ggml_tensor * a) {
|
|
2997
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
|
|
2998
|
+
}
|
|
2999
|
+
|
|
3000
|
+
struct ggml_tensor * ggml_geglu_quick_split(
|
|
3001
|
+
struct ggml_context * ctx,
|
|
3002
|
+
struct ggml_tensor * a,
|
|
3003
|
+
struct ggml_tensor * b) {
|
|
3004
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
|
|
3005
|
+
}
|
|
3006
|
+
|
|
3007
|
+
struct ggml_tensor * ggml_swiglu_oai(
|
|
3008
|
+
struct ggml_context * ctx,
|
|
3009
|
+
struct ggml_tensor * a,
|
|
3010
|
+
struct ggml_tensor * b,
|
|
3011
|
+
float alpha,
|
|
3012
|
+
float limit) {
|
|
3013
|
+
struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
|
|
3014
|
+
ggml_set_op_params_f32(result, 2, alpha);
|
|
3015
|
+
ggml_set_op_params_f32(result, 3, limit);
|
|
3016
|
+
|
|
3017
|
+
return result;
|
|
3018
|
+
}
|
|
3019
|
+
|
|
2748
3020
|
// ggml_norm
|
|
2749
3021
|
|
|
2750
3022
|
static struct ggml_tensor * ggml_norm_impl(
|
|
@@ -3002,12 +3274,14 @@ static struct ggml_tensor * ggml_scale_impl(
|
|
|
3002
3274
|
struct ggml_context * ctx,
|
|
3003
3275
|
struct ggml_tensor * a,
|
|
3004
3276
|
float s,
|
|
3277
|
+
float b,
|
|
3005
3278
|
bool inplace) {
|
|
3006
3279
|
GGML_ASSERT(ggml_is_padded_1d(a));
|
|
3007
3280
|
|
|
3008
3281
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
3009
3282
|
|
|
3010
|
-
|
|
3283
|
+
float params[2] = { s, b };
|
|
3284
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
|
3011
3285
|
|
|
3012
3286
|
result->op = GGML_OP_SCALE;
|
|
3013
3287
|
result->src[0] = a;
|
|
@@ -3019,14 +3293,30 @@ struct ggml_tensor * ggml_scale(
|
|
|
3019
3293
|
struct ggml_context * ctx,
|
|
3020
3294
|
struct ggml_tensor * a,
|
|
3021
3295
|
float s) {
|
|
3022
|
-
return ggml_scale_impl(ctx, a, s, false);
|
|
3296
|
+
return ggml_scale_impl(ctx, a, s, 0.0, false);
|
|
3023
3297
|
}
|
|
3024
3298
|
|
|
3025
3299
|
struct ggml_tensor * ggml_scale_inplace(
|
|
3026
3300
|
struct ggml_context * ctx,
|
|
3027
3301
|
struct ggml_tensor * a,
|
|
3028
3302
|
float s) {
|
|
3029
|
-
return ggml_scale_impl(ctx, a, s, true);
|
|
3303
|
+
return ggml_scale_impl(ctx, a, s, 0.0, true);
|
|
3304
|
+
}
|
|
3305
|
+
|
|
3306
|
+
struct ggml_tensor * ggml_scale_bias(
|
|
3307
|
+
struct ggml_context * ctx,
|
|
3308
|
+
struct ggml_tensor * a,
|
|
3309
|
+
float s,
|
|
3310
|
+
float b) {
|
|
3311
|
+
return ggml_scale_impl(ctx, a, s, b, false);
|
|
3312
|
+
}
|
|
3313
|
+
|
|
3314
|
+
struct ggml_tensor * ggml_scale_bias_inplace(
|
|
3315
|
+
struct ggml_context * ctx,
|
|
3316
|
+
struct ggml_tensor * a,
|
|
3317
|
+
float s,
|
|
3318
|
+
float b) {
|
|
3319
|
+
return ggml_scale_impl(ctx, a, s, b, true);
|
|
3030
3320
|
}
|
|
3031
3321
|
|
|
3032
3322
|
// ggml_set
|
|
@@ -3490,6 +3780,7 @@ struct ggml_tensor * ggml_get_rows(
|
|
|
3490
3780
|
struct ggml_tensor * a,
|
|
3491
3781
|
struct ggml_tensor * b) {
|
|
3492
3782
|
GGML_ASSERT(a->ne[2] == b->ne[1]);
|
|
3783
|
+
GGML_ASSERT(a->ne[3] == b->ne[2]);
|
|
3493
3784
|
GGML_ASSERT(b->ne[3] == 1);
|
|
3494
3785
|
GGML_ASSERT(b->type == GGML_TYPE_I32);
|
|
3495
3786
|
|
|
@@ -3543,7 +3834,7 @@ struct ggml_tensor * ggml_set_rows(
|
|
|
3543
3834
|
GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
|
|
3544
3835
|
GGML_ASSERT(c->ne[3] == 1);
|
|
3545
3836
|
GGML_ASSERT(b->type == GGML_TYPE_F32);
|
|
3546
|
-
GGML_ASSERT(c->type == GGML_TYPE_I64);
|
|
3837
|
+
GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
|
|
3547
3838
|
|
|
3548
3839
|
GGML_ASSERT(ggml_is_contiguous_rows(a));
|
|
3549
3840
|
GGML_ASSERT(ggml_is_contiguous_rows(b));
|
|
@@ -3553,6 +3844,7 @@ struct ggml_tensor * ggml_set_rows(
|
|
|
3553
3844
|
result->op = GGML_OP_SET_ROWS;
|
|
3554
3845
|
result->src[0] = b;
|
|
3555
3846
|
result->src[1] = c;
|
|
3847
|
+
result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
|
|
3556
3848
|
|
|
3557
3849
|
return result;
|
|
3558
3850
|
}
|
|
@@ -3651,9 +3943,10 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
|
3651
3943
|
if (mask) {
|
|
3652
3944
|
GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
|
|
3653
3945
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
3654
|
-
GGML_ASSERT(ggml_is_matrix(mask));
|
|
3655
3946
|
GGML_ASSERT(mask->ne[0] == a->ne[0]);
|
|
3656
3947
|
GGML_ASSERT(mask->ne[1] >= a->ne[1]);
|
|
3948
|
+
GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
|
|
3949
|
+
GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
|
|
3657
3950
|
}
|
|
3658
3951
|
|
|
3659
3952
|
if (max_bias > 0.0f) {
|
|
@@ -3693,6 +3986,31 @@ struct ggml_tensor * ggml_soft_max_ext(
|
|
|
3693
3986
|
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
|
|
3694
3987
|
}
|
|
3695
3988
|
|
|
3989
|
+
struct ggml_tensor * ggml_soft_max_ext_inplace(
|
|
3990
|
+
struct ggml_context * ctx,
|
|
3991
|
+
struct ggml_tensor * a,
|
|
3992
|
+
struct ggml_tensor * mask,
|
|
3993
|
+
float scale,
|
|
3994
|
+
float max_bias) {
|
|
3995
|
+
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
|
|
3996
|
+
}
|
|
3997
|
+
|
|
3998
|
+
void ggml_soft_max_add_sinks(
|
|
3999
|
+
struct ggml_tensor * a,
|
|
4000
|
+
struct ggml_tensor * sinks) {
|
|
4001
|
+
if (!sinks) {
|
|
4002
|
+
a->src[2] = NULL;
|
|
4003
|
+
return;
|
|
4004
|
+
}
|
|
4005
|
+
|
|
4006
|
+
GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
|
|
4007
|
+
GGML_ASSERT(a->src[2] == NULL);
|
|
4008
|
+
GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
|
|
4009
|
+
GGML_ASSERT(sinks->type == GGML_TYPE_F32);
|
|
4010
|
+
|
|
4011
|
+
a->src[2] = sinks;
|
|
4012
|
+
}
|
|
4013
|
+
|
|
3696
4014
|
// ggml_soft_max_ext_back
|
|
3697
4015
|
|
|
3698
4016
|
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
|
|
@@ -3740,6 +4058,7 @@ static struct ggml_tensor * ggml_rope_impl(
|
|
|
3740
4058
|
struct ggml_tensor * b,
|
|
3741
4059
|
struct ggml_tensor * c,
|
|
3742
4060
|
int n_dims,
|
|
4061
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
3743
4062
|
int mode,
|
|
3744
4063
|
int n_ctx_orig,
|
|
3745
4064
|
float freq_base,
|
|
@@ -3753,15 +4072,19 @@ static struct ggml_tensor * ggml_rope_impl(
|
|
|
3753
4072
|
|
|
3754
4073
|
GGML_ASSERT(ggml_is_vector(b));
|
|
3755
4074
|
GGML_ASSERT(b->type == GGML_TYPE_I32);
|
|
3756
|
-
|
|
4075
|
+
|
|
4076
|
+
bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
|
|
4077
|
+
if (mrope_used) {
|
|
4078
|
+
GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
|
|
4079
|
+
} else {
|
|
4080
|
+
GGML_ASSERT(a->ne[2] == b->ne[0]);
|
|
4081
|
+
}
|
|
3757
4082
|
|
|
3758
4083
|
if (c) {
|
|
3759
4084
|
GGML_ASSERT(c->type == GGML_TYPE_F32);
|
|
3760
4085
|
GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
|
3761
4086
|
}
|
|
3762
4087
|
|
|
3763
|
-
int sections[4] = {0, 0, 0, 0};
|
|
3764
|
-
|
|
3765
4088
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
3766
4089
|
|
|
3767
4090
|
int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
|
@@ -3771,7 +4094,11 @@ static struct ggml_tensor * ggml_rope_impl(
|
|
|
3771
4094
|
memcpy(params + 8, &attn_factor, sizeof(float));
|
|
3772
4095
|
memcpy(params + 9, &beta_fast, sizeof(float));
|
|
3773
4096
|
memcpy(params + 10, &beta_slow, sizeof(float));
|
|
3774
|
-
|
|
4097
|
+
if (mrope_used && sections) {
|
|
4098
|
+
memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS);
|
|
4099
|
+
} else {
|
|
4100
|
+
memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS);
|
|
4101
|
+
}
|
|
3775
4102
|
ggml_set_op_params(result, params, sizeof(params));
|
|
3776
4103
|
|
|
3777
4104
|
result->op = GGML_OP_ROPE;
|
|
@@ -3789,7 +4116,7 @@ struct ggml_tensor * ggml_rope(
|
|
|
3789
4116
|
int n_dims,
|
|
3790
4117
|
int mode) {
|
|
3791
4118
|
return ggml_rope_impl(
|
|
3792
|
-
ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
|
|
4119
|
+
ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
|
|
3793
4120
|
);
|
|
3794
4121
|
}
|
|
3795
4122
|
|
|
@@ -3799,7 +4126,7 @@ struct ggml_tensor * ggml_rope_multi(
|
|
|
3799
4126
|
struct ggml_tensor * b,
|
|
3800
4127
|
struct ggml_tensor * c,
|
|
3801
4128
|
int n_dims,
|
|
3802
|
-
int sections[
|
|
4129
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
3803
4130
|
int mode,
|
|
3804
4131
|
int n_ctx_orig,
|
|
3805
4132
|
float freq_base,
|
|
@@ -3808,36 +4135,31 @@ struct ggml_tensor * ggml_rope_multi(
|
|
|
3808
4135
|
float attn_factor,
|
|
3809
4136
|
float beta_fast,
|
|
3810
4137
|
float beta_slow) {
|
|
3811
|
-
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
|
|
3815
|
-
|
|
3816
|
-
GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
|
|
3817
|
-
|
|
3818
|
-
if (c) {
|
|
3819
|
-
GGML_ASSERT(c->type == GGML_TYPE_F32);
|
|
3820
|
-
GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
|
3821
|
-
}
|
|
3822
|
-
|
|
3823
|
-
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
3824
|
-
|
|
3825
|
-
int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
|
3826
|
-
memcpy(params + 5, &freq_base, sizeof(float));
|
|
3827
|
-
memcpy(params + 6, &freq_scale, sizeof(float));
|
|
3828
|
-
memcpy(params + 7, &ext_factor, sizeof(float));
|
|
3829
|
-
memcpy(params + 8, &attn_factor, sizeof(float));
|
|
3830
|
-
memcpy(params + 9, &beta_fast, sizeof(float));
|
|
3831
|
-
memcpy(params + 10, &beta_slow, sizeof(float));
|
|
3832
|
-
memcpy(¶ms[11], sections, sizeof(int)*4);
|
|
3833
|
-
ggml_set_op_params(result, params, sizeof(params));
|
|
3834
|
-
|
|
3835
|
-
result->op = GGML_OP_ROPE;
|
|
3836
|
-
result->src[0] = a;
|
|
3837
|
-
result->src[1] = b;
|
|
3838
|
-
result->src[2] = c;
|
|
4138
|
+
return ggml_rope_impl(
|
|
4139
|
+
ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
|
|
4140
|
+
ext_factor, attn_factor, beta_fast, beta_slow, false
|
|
4141
|
+
);
|
|
4142
|
+
}
|
|
3839
4143
|
|
|
3840
|
-
|
|
4144
|
+
struct ggml_tensor * ggml_rope_multi_inplace(
|
|
4145
|
+
struct ggml_context * ctx,
|
|
4146
|
+
struct ggml_tensor * a,
|
|
4147
|
+
struct ggml_tensor * b,
|
|
4148
|
+
struct ggml_tensor * c,
|
|
4149
|
+
int n_dims,
|
|
4150
|
+
int sections[GGML_MROPE_SECTIONS],
|
|
4151
|
+
int mode,
|
|
4152
|
+
int n_ctx_orig,
|
|
4153
|
+
float freq_base,
|
|
4154
|
+
float freq_scale,
|
|
4155
|
+
float ext_factor,
|
|
4156
|
+
float attn_factor,
|
|
4157
|
+
float beta_fast,
|
|
4158
|
+
float beta_slow) {
|
|
4159
|
+
return ggml_rope_impl(
|
|
4160
|
+
ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
|
|
4161
|
+
ext_factor, attn_factor, beta_fast, beta_slow, true
|
|
4162
|
+
);
|
|
3841
4163
|
}
|
|
3842
4164
|
|
|
3843
4165
|
struct ggml_tensor * ggml_rope_inplace(
|
|
@@ -3847,7 +4169,7 @@ struct ggml_tensor * ggml_rope_inplace(
|
|
|
3847
4169
|
int n_dims,
|
|
3848
4170
|
int mode) {
|
|
3849
4171
|
return ggml_rope_impl(
|
|
3850
|
-
ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
|
|
4172
|
+
ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
|
|
3851
4173
|
);
|
|
3852
4174
|
}
|
|
3853
4175
|
|
|
@@ -3866,7 +4188,7 @@ struct ggml_tensor * ggml_rope_ext(
|
|
|
3866
4188
|
float beta_fast,
|
|
3867
4189
|
float beta_slow) {
|
|
3868
4190
|
return ggml_rope_impl(
|
|
3869
|
-
ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
|
4191
|
+
ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
|
3870
4192
|
ext_factor, attn_factor, beta_fast, beta_slow, false
|
|
3871
4193
|
);
|
|
3872
4194
|
}
|
|
@@ -3886,7 +4208,7 @@ struct ggml_tensor * ggml_rope_ext_inplace(
|
|
|
3886
4208
|
float beta_fast,
|
|
3887
4209
|
float beta_slow) {
|
|
3888
4210
|
return ggml_rope_impl(
|
|
3889
|
-
ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
|
4211
|
+
ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
|
3890
4212
|
ext_factor, attn_factor, beta_fast, beta_slow, true
|
|
3891
4213
|
);
|
|
3892
4214
|
}
|
|
@@ -3905,7 +4227,7 @@ struct ggml_tensor * ggml_rope_custom(
|
|
|
3905
4227
|
float beta_fast,
|
|
3906
4228
|
float beta_slow) {
|
|
3907
4229
|
return ggml_rope_impl(
|
|
3908
|
-
ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
|
4230
|
+
ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
|
3909
4231
|
ext_factor, attn_factor, beta_fast, beta_slow, false
|
|
3910
4232
|
);
|
|
3911
4233
|
}
|
|
@@ -3924,7 +4246,7 @@ struct ggml_tensor * ggml_rope_custom_inplace(
|
|
|
3924
4246
|
float beta_fast,
|
|
3925
4247
|
float beta_slow) {
|
|
3926
4248
|
return ggml_rope_impl(
|
|
3927
|
-
ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
|
|
4249
|
+
ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
|
|
3928
4250
|
ext_factor, attn_factor, beta_fast, beta_slow, true
|
|
3929
4251
|
);
|
|
3930
4252
|
}
|
|
@@ -4122,14 +4444,13 @@ struct ggml_tensor * ggml_conv_1d_dw(
|
|
|
4122
4444
|
int s0,
|
|
4123
4445
|
int p0,
|
|
4124
4446
|
int d0) {
|
|
4125
|
-
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
|
|
4126
4447
|
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
|
|
4127
4448
|
|
|
4128
|
-
struct ggml_tensor * im2col = ggml_im2col(ctx,
|
|
4449
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
|
|
4129
4450
|
|
|
4130
4451
|
struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
|
|
4131
4452
|
|
|
4132
|
-
result = ggml_reshape_3d(ctx, result,
|
|
4453
|
+
result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
|
|
4133
4454
|
|
|
4134
4455
|
return result;
|
|
4135
4456
|
}
|
|
@@ -4210,6 +4531,91 @@ struct ggml_tensor * ggml_conv_2d(
|
|
|
4210
4531
|
return result;
|
|
4211
4532
|
}
|
|
4212
4533
|
|
|
4534
|
+
// a: [OC*IC, KD, KH, KW]
|
|
4535
|
+
// b: [N*IC, ID, IH, IW]
|
|
4536
|
+
// result: [N*OD, OH, OW, IC * KD * KH * KW]
|
|
4537
|
+
struct ggml_tensor * ggml_im2col_3d(
|
|
4538
|
+
struct ggml_context * ctx,
|
|
4539
|
+
struct ggml_tensor * a,
|
|
4540
|
+
struct ggml_tensor * b,
|
|
4541
|
+
int64_t IC,
|
|
4542
|
+
int s0, // stride width
|
|
4543
|
+
int s1, // stride height
|
|
4544
|
+
int s2, // stride depth
|
|
4545
|
+
int p0, // padding width
|
|
4546
|
+
int p1, // padding height
|
|
4547
|
+
int p2, // padding depth
|
|
4548
|
+
int d0, // dilation width
|
|
4549
|
+
int d1, // dilation height
|
|
4550
|
+
int d2, // dilation depth
|
|
4551
|
+
enum ggml_type dst_type) {
|
|
4552
|
+
const int64_t N = b->ne[3] / IC;
|
|
4553
|
+
const int64_t ID = b->ne[2];
|
|
4554
|
+
const int64_t IH = b->ne[1];
|
|
4555
|
+
const int64_t IW = b->ne[0];
|
|
4556
|
+
|
|
4557
|
+
const int64_t OC = a->ne[3] / IC;
|
|
4558
|
+
UNUSED(OC);
|
|
4559
|
+
const int64_t KD = a->ne[2];
|
|
4560
|
+
const int64_t KH = a->ne[1];
|
|
4561
|
+
const int64_t KW = a->ne[0];
|
|
4562
|
+
const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
|
|
4563
|
+
const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
|
|
4564
|
+
const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
|
|
4565
|
+
|
|
4566
|
+
GGML_ASSERT((OD > 0) && "b too small compared to a");
|
|
4567
|
+
GGML_ASSERT((OH > 0) && "b too small compared to a");
|
|
4568
|
+
GGML_ASSERT((OW > 0) && "b too small compared to a");
|
|
4569
|
+
|
|
4570
|
+
|
|
4571
|
+
const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
|
|
4572
|
+
|
|
4573
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
|
|
4574
|
+
int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
|
|
4575
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
4576
|
+
|
|
4577
|
+
result->op = GGML_OP_IM2COL_3D;
|
|
4578
|
+
result->src[0] = a;
|
|
4579
|
+
result->src[1] = b;
|
|
4580
|
+
|
|
4581
|
+
return result;
|
|
4582
|
+
}
|
|
4583
|
+
|
|
4584
|
+
// a: [OC*IC, KD, KH, KW]
|
|
4585
|
+
// b: [N*IC, ID, IH, IW]
|
|
4586
|
+
// result: [N*OC, OD, OH, OW]
|
|
4587
|
+
struct ggml_tensor * ggml_conv_3d(
|
|
4588
|
+
struct ggml_context * ctx,
|
|
4589
|
+
struct ggml_tensor * a,
|
|
4590
|
+
struct ggml_tensor * b,
|
|
4591
|
+
int64_t IC,
|
|
4592
|
+
int s0, // stride width
|
|
4593
|
+
int s1, // stride height
|
|
4594
|
+
int s2, // stride depth
|
|
4595
|
+
int p0, // padding width
|
|
4596
|
+
int p1, // padding height
|
|
4597
|
+
int p2, // padding depth
|
|
4598
|
+
int d0, // dilation width
|
|
4599
|
+
int d1, // dilation height
|
|
4600
|
+
int d2 // dilation depth
|
|
4601
|
+
) {
|
|
4602
|
+
struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
|
|
4603
|
+
|
|
4604
|
+
int64_t OC = a->ne[3] / IC;
|
|
4605
|
+
int64_t N = b->ne[3] / IC;
|
|
4606
|
+
struct ggml_tensor * result =
|
|
4607
|
+
ggml_mul_mat(ctx,
|
|
4608
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
|
|
4609
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC)); // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
|
|
4610
|
+
|
|
4611
|
+
int64_t OD = im2col->ne[3] / N;
|
|
4612
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
|
|
4613
|
+
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
|
|
4614
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
|
|
4615
|
+
|
|
4616
|
+
return result;
|
|
4617
|
+
}
|
|
4618
|
+
|
|
4213
4619
|
// ggml_conv_2d_sk_p0
|
|
4214
4620
|
|
|
4215
4621
|
struct ggml_tensor * ggml_conv_2d_sk_p0(
|
|
@@ -4331,6 +4737,56 @@ struct ggml_tensor * ggml_conv_2d_direct(
|
|
|
4331
4737
|
return result;
|
|
4332
4738
|
}
|
|
4333
4739
|
|
|
4740
|
+
// ggml_conv_3d_direct
|
|
4741
|
+
|
|
4742
|
+
struct ggml_tensor * ggml_conv_3d_direct(
|
|
4743
|
+
struct ggml_context * ctx,
|
|
4744
|
+
struct ggml_tensor * a,
|
|
4745
|
+
struct ggml_tensor * b,
|
|
4746
|
+
int s0,
|
|
4747
|
+
int s1,
|
|
4748
|
+
int s2,
|
|
4749
|
+
int p0,
|
|
4750
|
+
int p1,
|
|
4751
|
+
int p2,
|
|
4752
|
+
int d0,
|
|
4753
|
+
int d1,
|
|
4754
|
+
int d2,
|
|
4755
|
+
int c,
|
|
4756
|
+
int n,
|
|
4757
|
+
int oc) {
|
|
4758
|
+
|
|
4759
|
+
GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
|
|
4760
|
+
GGML_ASSERT(b->ne[3] == (int64_t) c * n);
|
|
4761
|
+
|
|
4762
|
+
int64_t ne[4];
|
|
4763
|
+
ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
|
4764
|
+
ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
|
|
4765
|
+
ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
|
|
4766
|
+
ne[3] = (int64_t) oc * n;
|
|
4767
|
+
|
|
4768
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
4769
|
+
|
|
4770
|
+
ggml_set_op_params_i32(result, 0, s0);
|
|
4771
|
+
ggml_set_op_params_i32(result, 1, s1);
|
|
4772
|
+
ggml_set_op_params_i32(result, 2, s2);
|
|
4773
|
+
ggml_set_op_params_i32(result, 3, p0);
|
|
4774
|
+
ggml_set_op_params_i32(result, 4, p1);
|
|
4775
|
+
ggml_set_op_params_i32(result, 5, p2);
|
|
4776
|
+
ggml_set_op_params_i32(result, 6, d0);
|
|
4777
|
+
ggml_set_op_params_i32(result, 7, d1);
|
|
4778
|
+
ggml_set_op_params_i32(result, 8, d2);
|
|
4779
|
+
ggml_set_op_params_i32(result, 9, c);
|
|
4780
|
+
ggml_set_op_params_i32(result, 10, n);
|
|
4781
|
+
ggml_set_op_params_i32(result, 11, oc);
|
|
4782
|
+
|
|
4783
|
+
result->op = GGML_OP_CONV_3D;
|
|
4784
|
+
result->src[0] = a;
|
|
4785
|
+
result->src[1] = b;
|
|
4786
|
+
|
|
4787
|
+
return result;
|
|
4788
|
+
}
|
|
4789
|
+
|
|
4334
4790
|
// ggml_conv_transpose_2d_p0
|
|
4335
4791
|
|
|
4336
4792
|
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
|
|
@@ -4458,6 +4914,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
|
|
|
4458
4914
|
int64_t ne3,
|
|
4459
4915
|
uint32_t mode) {
|
|
4460
4916
|
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
|
4917
|
+
// TODO: implement antialias for modes other than bilinear
|
|
4918
|
+
GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
|
|
4461
4919
|
|
|
4462
4920
|
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
|
4463
4921
|
|
|
@@ -4509,11 +4967,49 @@ struct ggml_tensor * ggml_pad(
|
|
|
4509
4967
|
int p1,
|
|
4510
4968
|
int p2,
|
|
4511
4969
|
int p3) {
|
|
4970
|
+
return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
|
4971
|
+
}
|
|
4972
|
+
|
|
4973
|
+
// ggml_pad_circular
|
|
4974
|
+
|
|
4975
|
+
struct ggml_tensor * ggml_pad_circular(
|
|
4976
|
+
struct ggml_context * ctx,
|
|
4977
|
+
struct ggml_tensor * a,
|
|
4978
|
+
int p0,
|
|
4979
|
+
int p1,
|
|
4980
|
+
int p2,
|
|
4981
|
+
int p3) {
|
|
4982
|
+
return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
|
4983
|
+
}
|
|
4984
|
+
|
|
4985
|
+
struct ggml_tensor * ggml_pad_ext(
|
|
4986
|
+
struct ggml_context * ctx,
|
|
4987
|
+
struct ggml_tensor * a,
|
|
4988
|
+
int lp0,
|
|
4989
|
+
int rp0,
|
|
4990
|
+
int lp1,
|
|
4991
|
+
int rp1,
|
|
4992
|
+
int lp2,
|
|
4993
|
+
int rp2,
|
|
4994
|
+
int lp3,
|
|
4995
|
+
int rp3
|
|
4996
|
+
) {
|
|
4512
4997
|
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
|
|
4513
|
-
a->ne[0] +
|
|
4514
|
-
a->ne[1] +
|
|
4515
|
-
a->ne[2] +
|
|
4516
|
-
a->ne[3] +
|
|
4998
|
+
a->ne[0] + lp0 + rp0,
|
|
4999
|
+
a->ne[1] + lp1 + rp1,
|
|
5000
|
+
a->ne[2] + lp2 + rp2,
|
|
5001
|
+
a->ne[3] + lp3 + rp3);
|
|
5002
|
+
|
|
5003
|
+
ggml_set_op_params_i32(result, 0, lp0);
|
|
5004
|
+
ggml_set_op_params_i32(result, 1, rp0);
|
|
5005
|
+
ggml_set_op_params_i32(result, 2, lp1);
|
|
5006
|
+
ggml_set_op_params_i32(result, 3, rp1);
|
|
5007
|
+
ggml_set_op_params_i32(result, 4, lp2);
|
|
5008
|
+
ggml_set_op_params_i32(result, 5, rp2);
|
|
5009
|
+
ggml_set_op_params_i32(result, 6, lp3);
|
|
5010
|
+
ggml_set_op_params_i32(result, 7, rp3);
|
|
5011
|
+
ggml_set_op_params_i32(result, 8, 0); // not circular by default
|
|
5012
|
+
|
|
4517
5013
|
|
|
4518
5014
|
result->op = GGML_OP_PAD;
|
|
4519
5015
|
result->src[0] = a;
|
|
@@ -4521,6 +5017,25 @@ struct ggml_tensor * ggml_pad(
|
|
|
4521
5017
|
return result;
|
|
4522
5018
|
}
|
|
4523
5019
|
|
|
5020
|
+
// ggml_pad_ext_circular
|
|
5021
|
+
|
|
5022
|
+
struct ggml_tensor * ggml_pad_ext_circular(
|
|
5023
|
+
struct ggml_context * ctx,
|
|
5024
|
+
struct ggml_tensor * a,
|
|
5025
|
+
int lp0,
|
|
5026
|
+
int rp0,
|
|
5027
|
+
int lp1,
|
|
5028
|
+
int rp1,
|
|
5029
|
+
int lp2,
|
|
5030
|
+
int rp2,
|
|
5031
|
+
int lp3,
|
|
5032
|
+
int rp3
|
|
5033
|
+
) {
|
|
5034
|
+
struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
|
|
5035
|
+
ggml_set_op_params_i32(result, 8, 1); // circular
|
|
5036
|
+
return result;
|
|
5037
|
+
}
|
|
5038
|
+
|
|
4524
5039
|
// ggml_pad_reflect_1d
|
|
4525
5040
|
|
|
4526
5041
|
struct ggml_tensor * ggml_pad_reflect_1d(
|
|
@@ -4580,28 +5095,6 @@ struct ggml_tensor * ggml_roll(
|
|
|
4580
5095
|
return result;
|
|
4581
5096
|
}
|
|
4582
5097
|
|
|
4583
|
-
// ggml_arange
|
|
4584
|
-
|
|
4585
|
-
struct ggml_tensor * ggml_arange(
|
|
4586
|
-
struct ggml_context * ctx,
|
|
4587
|
-
float start,
|
|
4588
|
-
float stop,
|
|
4589
|
-
float step) {
|
|
4590
|
-
GGML_ASSERT(stop > start);
|
|
4591
|
-
|
|
4592
|
-
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
|
4593
|
-
|
|
4594
|
-
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
|
4595
|
-
|
|
4596
|
-
ggml_set_op_params_f32(result, 0, start);
|
|
4597
|
-
ggml_set_op_params_f32(result, 1, stop);
|
|
4598
|
-
ggml_set_op_params_f32(result, 2, step);
|
|
4599
|
-
|
|
4600
|
-
result->op = GGML_OP_ARANGE;
|
|
4601
|
-
|
|
4602
|
-
return result;
|
|
4603
|
-
}
|
|
4604
|
-
|
|
4605
5098
|
// ggml_timestep_embedding
|
|
4606
5099
|
|
|
4607
5100
|
struct ggml_tensor * ggml_timestep_embedding(
|
|
@@ -4609,12 +5102,8 @@ struct ggml_tensor * ggml_timestep_embedding(
|
|
|
4609
5102
|
struct ggml_tensor * timesteps,
|
|
4610
5103
|
int dim,
|
|
4611
5104
|
int max_period) {
|
|
4612
|
-
int actual_dim = dim;
|
|
4613
|
-
if (dim % 2 != 0) {
|
|
4614
|
-
actual_dim = dim + 1;
|
|
4615
|
-
}
|
|
4616
5105
|
|
|
4617
|
-
struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32,
|
|
5106
|
+
struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
|
|
4618
5107
|
|
|
4619
5108
|
ggml_set_op_params_i32(result, 0, dim);
|
|
4620
5109
|
ggml_set_op_params_i32(result, 1, max_period);
|
|
@@ -4625,6 +5114,61 @@ struct ggml_tensor * ggml_timestep_embedding(
|
|
|
4625
5114
|
return result;
|
|
4626
5115
|
}
|
|
4627
5116
|
|
|
5117
|
+
// ggml_tri
|
|
5118
|
+
|
|
5119
|
+
struct ggml_tensor * ggml_tri(
|
|
5120
|
+
struct ggml_context * ctx,
|
|
5121
|
+
struct ggml_tensor * a,
|
|
5122
|
+
enum ggml_tri_type type) {
|
|
5123
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
5124
|
+
|
|
5125
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5126
|
+
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
|
5127
|
+
|
|
5128
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
5129
|
+
|
|
5130
|
+
ggml_set_op_params_i32(result, 0, type);
|
|
5131
|
+
|
|
5132
|
+
result->op = GGML_OP_TRI;
|
|
5133
|
+
result->src[0] = a;
|
|
5134
|
+
|
|
5135
|
+
return result;
|
|
5136
|
+
}
|
|
5137
|
+
|
|
5138
|
+
// ggml_fill
|
|
5139
|
+
|
|
5140
|
+
static struct ggml_tensor * ggml_fill_impl(
|
|
5141
|
+
struct ggml_context * ctx,
|
|
5142
|
+
struct ggml_tensor * a,
|
|
5143
|
+
float c,
|
|
5144
|
+
bool inplace) {
|
|
5145
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
5146
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5147
|
+
|
|
5148
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
5149
|
+
|
|
5150
|
+
ggml_set_op_params_f32(result, 0, c);
|
|
5151
|
+
|
|
5152
|
+
result->op = GGML_OP_FILL;
|
|
5153
|
+
result->src[0] = a;
|
|
5154
|
+
|
|
5155
|
+
return result;
|
|
5156
|
+
}
|
|
5157
|
+
|
|
5158
|
+
struct ggml_tensor * ggml_fill(
|
|
5159
|
+
struct ggml_context * ctx,
|
|
5160
|
+
struct ggml_tensor * a,
|
|
5161
|
+
float c) {
|
|
5162
|
+
return ggml_fill_impl(ctx, a, c, false);
|
|
5163
|
+
}
|
|
5164
|
+
|
|
5165
|
+
struct ggml_tensor * ggml_fill_inplace(
|
|
5166
|
+
struct ggml_context * ctx,
|
|
5167
|
+
struct ggml_tensor * a,
|
|
5168
|
+
float c) {
|
|
5169
|
+
return ggml_fill_impl(ctx, a, c, true);
|
|
5170
|
+
}
|
|
5171
|
+
|
|
4628
5172
|
// ggml_argsort
|
|
4629
5173
|
|
|
4630
5174
|
struct ggml_tensor * ggml_argsort(
|
|
@@ -4632,6 +5176,7 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4632
5176
|
struct ggml_tensor * a,
|
|
4633
5177
|
enum ggml_sort_order order) {
|
|
4634
5178
|
GGML_ASSERT(a->ne[0] <= INT32_MAX);
|
|
5179
|
+
|
|
4635
5180
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
|
4636
5181
|
|
|
4637
5182
|
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
|
@@ -4642,9 +5187,9 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4642
5187
|
return result;
|
|
4643
5188
|
}
|
|
4644
5189
|
|
|
4645
|
-
//
|
|
5190
|
+
// ggml_argsort_top_k
|
|
4646
5191
|
|
|
4647
|
-
struct ggml_tensor *
|
|
5192
|
+
struct ggml_tensor * ggml_argsort_top_k(
|
|
4648
5193
|
struct ggml_context * ctx,
|
|
4649
5194
|
struct ggml_tensor * a,
|
|
4650
5195
|
int k) {
|
|
@@ -4660,6 +5205,44 @@ struct ggml_tensor * ggml_top_k(
|
|
|
4660
5205
|
return result;
|
|
4661
5206
|
}
|
|
4662
5207
|
|
|
5208
|
+
// ggml_top_k
|
|
5209
|
+
|
|
5210
|
+
struct ggml_tensor * ggml_top_k(
|
|
5211
|
+
struct ggml_context * ctx,
|
|
5212
|
+
struct ggml_tensor * a,
|
|
5213
|
+
int k) {
|
|
5214
|
+
GGML_ASSERT(a->ne[0] >= k);
|
|
5215
|
+
|
|
5216
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
|
|
5217
|
+
|
|
5218
|
+
result->op = GGML_OP_TOP_K;
|
|
5219
|
+
result->src[0] = a;
|
|
5220
|
+
|
|
5221
|
+
return result;
|
|
5222
|
+
}
|
|
5223
|
+
|
|
5224
|
+
// ggml_arange
|
|
5225
|
+
|
|
5226
|
+
struct ggml_tensor * ggml_arange(
|
|
5227
|
+
struct ggml_context * ctx,
|
|
5228
|
+
float start,
|
|
5229
|
+
float stop,
|
|
5230
|
+
float step) {
|
|
5231
|
+
GGML_ASSERT(stop > start);
|
|
5232
|
+
|
|
5233
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
|
5234
|
+
|
|
5235
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
|
5236
|
+
|
|
5237
|
+
ggml_set_op_params_f32(result, 0, start);
|
|
5238
|
+
ggml_set_op_params_f32(result, 1, stop);
|
|
5239
|
+
ggml_set_op_params_f32(result, 2, step);
|
|
5240
|
+
|
|
5241
|
+
result->op = GGML_OP_ARANGE;
|
|
5242
|
+
|
|
5243
|
+
return result;
|
|
5244
|
+
}
|
|
5245
|
+
|
|
4663
5246
|
// ggml_flash_attn_ext
|
|
4664
5247
|
|
|
4665
5248
|
struct ggml_tensor * ggml_flash_attn_ext(
|
|
@@ -4674,13 +5257,15 @@ struct ggml_tensor * ggml_flash_attn_ext(
|
|
|
4674
5257
|
GGML_ASSERT(ggml_can_mul_mat(k, q));
|
|
4675
5258
|
// TODO: check if vT can be multiplied by (k*qT)
|
|
4676
5259
|
|
|
5260
|
+
GGML_ASSERT(q->ne[3] == k->ne[3]);
|
|
5261
|
+
GGML_ASSERT(q->ne[3] == v->ne[3]);
|
|
5262
|
+
|
|
4677
5263
|
if (mask) {
|
|
4678
5264
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
4679
|
-
GGML_ASSERT(mask->ne[2] == 1);
|
|
4680
|
-
GGML_ASSERT(mask->ne[3] == 1);
|
|
4681
|
-
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
|
|
4682
|
-
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
|
|
4683
5265
|
//GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
|
|
5266
|
+
|
|
5267
|
+
GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
|
|
5268
|
+
GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
|
|
4684
5269
|
}
|
|
4685
5270
|
|
|
4686
5271
|
if (max_bias > 0.0f) {
|
|
@@ -4722,6 +5307,22 @@ enum ggml_prec ggml_flash_attn_ext_get_prec(
|
|
|
4722
5307
|
return (enum ggml_prec) prec_i32;
|
|
4723
5308
|
}
|
|
4724
5309
|
|
|
5310
|
+
void ggml_flash_attn_ext_add_sinks(
|
|
5311
|
+
struct ggml_tensor * a,
|
|
5312
|
+
struct ggml_tensor * sinks) {
|
|
5313
|
+
if (!sinks) {
|
|
5314
|
+
a->src[4] = NULL;
|
|
5315
|
+
return;
|
|
5316
|
+
}
|
|
5317
|
+
|
|
5318
|
+
GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
|
|
5319
|
+
GGML_ASSERT(a->src[4] == NULL);
|
|
5320
|
+
GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
|
|
5321
|
+
GGML_ASSERT(sinks->type == GGML_TYPE_F32);
|
|
5322
|
+
|
|
5323
|
+
a->src[4] = sinks;
|
|
5324
|
+
}
|
|
5325
|
+
|
|
4725
5326
|
// ggml_flash_attn_back
|
|
4726
5327
|
|
|
4727
5328
|
struct ggml_tensor * ggml_flash_attn_back(
|
|
@@ -4808,7 +5409,6 @@ struct ggml_tensor * ggml_ssm_conv(
|
|
|
4808
5409
|
const int64_t n_s = sx->ne[2];
|
|
4809
5410
|
|
|
4810
5411
|
// TODO: maybe support other strides than 1?
|
|
4811
|
-
// FIXME: this is always true?
|
|
4812
5412
|
GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
|
|
4813
5413
|
GGML_ASSERT(sx->ne[1] == d_inner);
|
|
4814
5414
|
GGML_ASSERT(n_t >= 0);
|
|
@@ -4831,36 +5431,49 @@ struct ggml_tensor * ggml_ssm_scan(
|
|
|
4831
5431
|
struct ggml_tensor * dt,
|
|
4832
5432
|
struct ggml_tensor * A,
|
|
4833
5433
|
struct ggml_tensor * B,
|
|
4834
|
-
struct ggml_tensor * C
|
|
5434
|
+
struct ggml_tensor * C,
|
|
5435
|
+
struct ggml_tensor * ids) {
|
|
4835
5436
|
GGML_ASSERT(ggml_is_contiguous(s));
|
|
4836
|
-
GGML_ASSERT(ggml_is_contiguous(x));
|
|
4837
5437
|
GGML_ASSERT(ggml_is_contiguous(dt));
|
|
4838
5438
|
GGML_ASSERT(ggml_is_contiguous(A));
|
|
4839
|
-
GGML_ASSERT(
|
|
4840
|
-
GGML_ASSERT(ggml_is_3d(B));
|
|
4841
|
-
GGML_ASSERT(ggml_is_3d(s));
|
|
5439
|
+
GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
|
|
4842
5440
|
GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
|
|
4843
5441
|
GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
|
|
4844
|
-
GGML_ASSERT(
|
|
5442
|
+
GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
|
|
5443
|
+
GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
|
|
5444
|
+
GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
|
|
4845
5445
|
GGML_ASSERT(ggml_are_same_shape(B, C));
|
|
5446
|
+
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
|
4846
5447
|
|
|
4847
5448
|
{
|
|
4848
5449
|
const int64_t d_state = s->ne[0];
|
|
4849
|
-
const int64_t
|
|
4850
|
-
const int64_t
|
|
4851
|
-
const int64_t
|
|
4852
|
-
|
|
4853
|
-
|
|
4854
|
-
GGML_ASSERT(
|
|
4855
|
-
GGML_ASSERT(
|
|
4856
|
-
GGML_ASSERT(
|
|
5450
|
+
const int64_t head_dim = x->ne[0];
|
|
5451
|
+
const int64_t n_head = x->ne[1];
|
|
5452
|
+
const int64_t n_seq_tokens = x->ne[2];
|
|
5453
|
+
const int64_t n_seqs = x->ne[3];
|
|
5454
|
+
|
|
5455
|
+
GGML_ASSERT(dt->ne[0] == n_head);
|
|
5456
|
+
GGML_ASSERT(dt->ne[1] == n_seq_tokens);
|
|
5457
|
+
GGML_ASSERT(dt->ne[2] == n_seqs);
|
|
5458
|
+
GGML_ASSERT(ggml_is_3d(dt));
|
|
5459
|
+
GGML_ASSERT(s->ne[1] == head_dim);
|
|
5460
|
+
GGML_ASSERT(s->ne[2] == n_head);
|
|
4857
5461
|
GGML_ASSERT(B->ne[0] == d_state);
|
|
4858
|
-
GGML_ASSERT(B->ne[
|
|
4859
|
-
GGML_ASSERT(B->ne[
|
|
5462
|
+
GGML_ASSERT(B->ne[2] == n_seq_tokens);
|
|
5463
|
+
GGML_ASSERT(B->ne[3] == n_seqs);
|
|
5464
|
+
GGML_ASSERT(ids->ne[0] == n_seqs);
|
|
5465
|
+
GGML_ASSERT(ggml_is_vector(ids));
|
|
5466
|
+
GGML_ASSERT(A->ne[1] == n_head);
|
|
5467
|
+
GGML_ASSERT(ggml_is_matrix(A));
|
|
5468
|
+
|
|
5469
|
+
if (A->ne[0] != 1) {
|
|
5470
|
+
// Mamba-1 has more granular decay factors
|
|
5471
|
+
GGML_ASSERT(A->ne[0] == d_state);
|
|
5472
|
+
}
|
|
4860
5473
|
}
|
|
4861
5474
|
|
|
4862
5475
|
// concatenated y + ssm_states
|
|
4863
|
-
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) +
|
|
5476
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
|
|
4864
5477
|
|
|
4865
5478
|
result->op = GGML_OP_SSM_SCAN;
|
|
4866
5479
|
result->src[0] = s;
|
|
@@ -4869,6 +5482,7 @@ struct ggml_tensor * ggml_ssm_scan(
|
|
|
4869
5482
|
result->src[3] = A;
|
|
4870
5483
|
result->src[4] = B;
|
|
4871
5484
|
result->src[5] = C;
|
|
5485
|
+
result->src[6] = ids;
|
|
4872
5486
|
|
|
4873
5487
|
return result;
|
|
4874
5488
|
}
|
|
@@ -5424,6 +6038,63 @@ struct ggml_tensor * ggml_opt_step_adamw(
|
|
|
5424
6038
|
return result;
|
|
5425
6039
|
}
|
|
5426
6040
|
|
|
6041
|
+
// opt_step_sgd
|
|
6042
|
+
|
|
6043
|
+
struct ggml_tensor * ggml_opt_step_sgd(
|
|
6044
|
+
struct ggml_context * ctx,
|
|
6045
|
+
struct ggml_tensor * a,
|
|
6046
|
+
struct ggml_tensor * grad,
|
|
6047
|
+
struct ggml_tensor * params) {
|
|
6048
|
+
GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
|
|
6049
|
+
GGML_ASSERT(ggml_are_same_shape(a, grad));
|
|
6050
|
+
GGML_ASSERT(params->type == GGML_TYPE_F32);
|
|
6051
|
+
GGML_ASSERT(ggml_nelements(params) == 2);
|
|
6052
|
+
|
|
6053
|
+
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
|
6054
|
+
|
|
6055
|
+
result->op = GGML_OP_OPT_STEP_SGD;
|
|
6056
|
+
result->src[0] = a;
|
|
6057
|
+
result->src[1] = grad;
|
|
6058
|
+
result->src[2] = params;
|
|
6059
|
+
|
|
6060
|
+
return result;
|
|
6061
|
+
}
|
|
6062
|
+
|
|
6063
|
+
// solve_tri
|
|
6064
|
+
|
|
6065
|
+
struct ggml_tensor * ggml_solve_tri(
|
|
6066
|
+
struct ggml_context * ctx,
|
|
6067
|
+
struct ggml_tensor * a,
|
|
6068
|
+
struct ggml_tensor * b,
|
|
6069
|
+
bool left,
|
|
6070
|
+
bool lower,
|
|
6071
|
+
bool uni) {
|
|
6072
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
6073
|
+
GGML_ASSERT(b->type == GGML_TYPE_F32);
|
|
6074
|
+
|
|
6075
|
+
// A must be square and lower diagonal
|
|
6076
|
+
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
|
6077
|
+
// B must have same outer dimension as A
|
|
6078
|
+
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
|
6079
|
+
|
|
6080
|
+
// batch dimensions must be equal
|
|
6081
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
|
6082
|
+
GGML_ASSERT(a->ne[3] == b->ne[3]);
|
|
6083
|
+
|
|
6084
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
6085
|
+
GGML_ASSERT(ggml_is_contiguous(b));
|
|
6086
|
+
|
|
6087
|
+
GGML_ASSERT(lower && left && !uni); // TODO: support other variants
|
|
6088
|
+
|
|
6089
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
|
|
6090
|
+
|
|
6091
|
+
result->op = GGML_OP_SOLVE_TRI;
|
|
6092
|
+
result->src[0] = a;
|
|
6093
|
+
result->src[1] = b;
|
|
6094
|
+
|
|
6095
|
+
return result;
|
|
6096
|
+
}
|
|
6097
|
+
|
|
5427
6098
|
////////////////////////////////////////////////////////////////////////////////
|
|
5428
6099
|
|
|
5429
6100
|
struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
|
@@ -5692,7 +6363,7 @@ static void ggml_compute_backward(
|
|
|
5692
6363
|
} break;
|
|
5693
6364
|
case GGML_OP_MEAN: {
|
|
5694
6365
|
if (src0_needs_grads) {
|
|
5695
|
-
ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
|
|
6366
|
+
ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
|
|
5696
6367
|
}
|
|
5697
6368
|
} break;
|
|
5698
6369
|
case GGML_OP_REPEAT: {
|
|
@@ -5769,7 +6440,7 @@ static void ggml_compute_backward(
|
|
|
5769
6440
|
if (src0_needs_grads) {
|
|
5770
6441
|
float s;
|
|
5771
6442
|
memcpy(&s, tensor->op_params, sizeof(float));
|
|
5772
|
-
ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, false));
|
|
6443
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
|
|
5773
6444
|
}
|
|
5774
6445
|
} break;
|
|
5775
6446
|
case GGML_OP_SET: {
|
|
@@ -5996,6 +6667,16 @@ static void ggml_compute_backward(
|
|
|
5996
6667
|
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
|
|
5997
6668
|
}
|
|
5998
6669
|
} break;
|
|
6670
|
+
case GGML_UNARY_OP_EXPM1: {
|
|
6671
|
+
if (src0_needs_grads) {
|
|
6672
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
|
|
6673
|
+
}
|
|
6674
|
+
} break;
|
|
6675
|
+
case GGML_UNARY_OP_SOFTPLUS: {
|
|
6676
|
+
if (src0_needs_grads) {
|
|
6677
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
|
|
6678
|
+
}
|
|
6679
|
+
} break;
|
|
5999
6680
|
default: {
|
|
6000
6681
|
fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
|
|
6001
6682
|
__func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
|
|
@@ -6009,13 +6690,28 @@ static void ggml_compute_backward(
|
|
|
6009
6690
|
}
|
|
6010
6691
|
GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
|
|
6011
6692
|
} break;
|
|
6693
|
+
case GGML_OP_GLU: {
|
|
6694
|
+
switch (ggml_get_glu_op(tensor)) {
|
|
6695
|
+
case GGML_GLU_OP_SWIGLU: {
|
|
6696
|
+
if (src0_needs_grads) {
|
|
6697
|
+
GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
|
|
6698
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
|
|
6699
|
+
}
|
|
6700
|
+
if (src1_needs_grads) {
|
|
6701
|
+
ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
|
|
6702
|
+
}
|
|
6703
|
+
} break;
|
|
6704
|
+
default: {
|
|
6705
|
+
GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
|
|
6706
|
+
} //break;
|
|
6707
|
+
}
|
|
6708
|
+
} break;
|
|
6012
6709
|
case GGML_OP_NONE: {
|
|
6013
6710
|
// noop
|
|
6014
6711
|
} break;
|
|
6015
6712
|
case GGML_OP_COUNT:
|
|
6016
6713
|
default: {
|
|
6017
|
-
|
|
6018
|
-
GGML_ABORT("fatal error");
|
|
6714
|
+
GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
|
|
6019
6715
|
} //break;
|
|
6020
6716
|
}
|
|
6021
6717
|
|
|
@@ -6491,6 +7187,78 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
|
6491
7187
|
GGML_LOG_INFO("========================================\n");
|
|
6492
7188
|
}
|
|
6493
7189
|
|
|
7190
|
+
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
|
|
7191
|
+
const int * idxs,
|
|
7192
|
+
int count,
|
|
7193
|
+
const struct ggml_tensor * tensor) {
|
|
7194
|
+
GGML_ASSERT(cgraph && idxs);
|
|
7195
|
+
for (int i = 0; i < count; ++i) {
|
|
7196
|
+
const int node_idx = idxs[i];
|
|
7197
|
+
|
|
7198
|
+
if (node_idx >= cgraph->n_nodes) {
|
|
7199
|
+
return -1;
|
|
7200
|
+
}
|
|
7201
|
+
if (cgraph->nodes[node_idx] == tensor) {
|
|
7202
|
+
return i;
|
|
7203
|
+
}
|
|
7204
|
+
}
|
|
7205
|
+
return -1;
|
|
7206
|
+
}
|
|
7207
|
+
|
|
7208
|
+
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
|
|
7209
|
+
const int * node_idxs,
|
|
7210
|
+
int count,
|
|
7211
|
+
const enum ggml_op * ops,
|
|
7212
|
+
const int * outputs,
|
|
7213
|
+
int num_outputs) {
|
|
7214
|
+
GGML_ASSERT(outputs && num_outputs > 0);
|
|
7215
|
+
|
|
7216
|
+
for (int i = 0; i < count; ++i) {
|
|
7217
|
+
if (node_idxs[i] >= cgraph->n_nodes) {
|
|
7218
|
+
return false;
|
|
7219
|
+
}
|
|
7220
|
+
|
|
7221
|
+
const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
|
|
7222
|
+
|
|
7223
|
+
if (node->op != ops[i]) {
|
|
7224
|
+
return false;
|
|
7225
|
+
}
|
|
7226
|
+
|
|
7227
|
+
if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
|
|
7228
|
+
continue;
|
|
7229
|
+
}
|
|
7230
|
+
|
|
7231
|
+
if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
|
|
7232
|
+
return false;
|
|
7233
|
+
}
|
|
7234
|
+
|
|
7235
|
+
int subgraph_uses = 0;
|
|
7236
|
+
for (int j = i + 1; j < count; ++j) {
|
|
7237
|
+
const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
|
|
7238
|
+
for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
|
|
7239
|
+
if (other_node->src[src_idx] == node) {
|
|
7240
|
+
subgraph_uses++;
|
|
7241
|
+
}
|
|
7242
|
+
}
|
|
7243
|
+
}
|
|
7244
|
+
|
|
7245
|
+
if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
|
|
7246
|
+
return false;
|
|
7247
|
+
}
|
|
7248
|
+
|
|
7249
|
+
// if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
|
|
7250
|
+
struct ggml_tensor * view_src = node->view_src;
|
|
7251
|
+
while (view_src) {
|
|
7252
|
+
if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
|
|
7253
|
+
return false;
|
|
7254
|
+
}
|
|
7255
|
+
view_src = view_src->view_src;
|
|
7256
|
+
}
|
|
7257
|
+
}
|
|
7258
|
+
|
|
7259
|
+
return true;
|
|
7260
|
+
}
|
|
7261
|
+
|
|
6494
7262
|
// check if node is part of the graph
|
|
6495
7263
|
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
|
6496
7264
|
if (cgraph == NULL) {
|
|
@@ -6522,20 +7290,18 @@ static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgr
|
|
|
6522
7290
|
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
|
|
6523
7291
|
struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
|
|
6524
7292
|
struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
|
|
6525
|
-
fprintf(fp, " \"%p\"
|
|
7293
|
+
fprintf(fp, " \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
|
|
6526
7294
|
gparent0 ? (void *) gparent0 : (void *) parent,
|
|
6527
|
-
gparent0 ? "g" : "x",
|
|
6528
7295
|
gparent ? (void *) gparent : (void *) node,
|
|
6529
|
-
gparent ? "g" : "x",
|
|
6530
7296
|
gparent ? "empty" : "vee",
|
|
6531
7297
|
gparent ? "dashed" : "solid",
|
|
6532
7298
|
label);
|
|
6533
7299
|
}
|
|
6534
7300
|
|
|
6535
7301
|
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
|
|
6536
|
-
fprintf(fp, " \"%p\"
|
|
6537
|
-
(void *) parent,
|
|
6538
|
-
(void *) node,
|
|
7302
|
+
fprintf(fp, " \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
|
|
7303
|
+
(void *) parent,
|
|
7304
|
+
(void *) node,
|
|
6539
7305
|
label);
|
|
6540
7306
|
}
|
|
6541
7307
|
|
|
@@ -6756,6 +7522,7 @@ size_t ggml_quantize_chunk(
|
|
|
6756
7522
|
case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6757
7523
|
case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6758
7524
|
case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
7525
|
+
case GGML_TYPE_MXFP4: result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6759
7526
|
case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6760
7527
|
case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
6761
7528
|
case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
|
@@ -6801,6 +7568,11 @@ size_t ggml_quantize_chunk(
|
|
|
6801
7568
|
|
|
6802
7569
|
////////////////////////////////////////////////////////////////////////////////
|
|
6803
7570
|
|
|
7571
|
+
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
|
|
7572
|
+
*log_callback = g_logger_state.log_callback;
|
|
7573
|
+
*user_data = g_logger_state.log_callback_user_data;
|
|
7574
|
+
}
|
|
7575
|
+
|
|
6804
7576
|
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
6805
7577
|
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
|
6806
7578
|
g_logger_state.log_callback_user_data = user_data;
|