whispercpp 1.3.3 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/ruby_whisper_params.c +55 -25
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/addon.cpp +19 -19
- data/ext/sources/examples/addon.node/index.js +7 -5
- data/ext/sources/examples/bench/bench.cpp +26 -16
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +4 -2
- data/ext/sources/examples/command/command.cpp +26 -24
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/lsp/lsp.cpp +19 -17
- data/ext/sources/examples/server/server.cpp +24 -13
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +4 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +2 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +588 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +58 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +103 -71
- data/ext/sources/examples/talk-llama/llama-batch.h +31 -18
- data/ext/sources/examples/talk-llama/llama-chat.cpp +120 -5
- data/ext/sources/examples/talk-llama/llama-chat.h +7 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +460 -357
- data/ext/sources/examples/talk-llama/llama-context.h +44 -29
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +543 -271
- data/ext/sources/examples/talk-llama/llama-graph.h +278 -168
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +118 -4
- data/ext/sources/examples/talk-llama/llama-hparams.h +61 -15
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/{llama-kv-cache-unified-iswa.h → llama-kv-cache-iswa.h} +38 -29
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2020 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +358 -27
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +80 -28
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +56 -36
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +30 -29
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +48 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +13 -14
- data/ext/sources/examples/talk-llama/llama-memory.h +16 -10
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +2 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +7165 -2336
- data/ext/sources/examples/talk-llama/llama-model.h +60 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +48 -10
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +440 -13
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -0
- data/ext/sources/examples/talk-llama/llama.cpp +65 -10
- data/ext/sources/examples/talk-llama/llama.h +95 -177
- data/ext/sources/examples/talk-llama/talk-llama.cpp +9 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +207 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +4 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +59 -31
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +17 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -1
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +221 -16
- data/ext/sources/ggml/src/CMakeLists.txt +17 -2
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +30 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +221 -38
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +143 -1
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +488 -69
- data/ext/sources/ggml/src/ggml-common.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +40 -18
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +4 -2
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +132 -596
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +14 -286
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +103 -582
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +162 -589
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +265 -437
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +3 -58
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +521 -353
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +54 -314
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +184 -675
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +4679 -1657
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +32 -2
- data/ext/sources/ggml/src/ggml-cpu/common.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +13 -6
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +70 -42
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +35 -28
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +227 -97
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +474 -1116
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1587 -1177
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +35 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +458 -47
- data/ext/sources/ggml/src/ggml-cpu/repack.h +22 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +89 -60
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/traits.cpp +2 -2
- data/ext/sources/ggml/src/ggml-cpu/traits.h +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +170 -26
- data/ext/sources/ggml/src/ggml-cpu/vec.h +506 -63
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +250 -63
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +95 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +498 -367
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +137 -91
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +86 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +379 -107
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +56 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cu → mmvf.cu} +53 -53
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +3 -3
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -100
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +21 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +90 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +8 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +10 -2
- data/ext/sources/ggml/src/ggml-impl.h +119 -9
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -7
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +136 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +2854 -1503
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +18 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +2510 -242
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +177 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +49 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +66 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +111 -16
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +67 -47
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +15 -5
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +25 -16
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +10 -4
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +166 -99
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +72 -306
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +67 -49
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +1 -31
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +79 -29
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +14 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +9 -6
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +328 -323
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +80 -60
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +201 -132
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +74 -55
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +8 -9
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +35 -42
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +12 -6
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +16 -12
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +3492 -883
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +55 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -77
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +478 -98
- data/ext/sources/ggml/src/gguf.cpp +8 -1
- data/ext/sources/src/whisper.cpp +23 -46
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/lib/whisper/model/uri.rb +1 -1
- data/sig/whisper.rbs +7 -0
- data/test/test_params.rb +8 -0
- data/test/test_whisper.rb +1 -1
- data/whispercpp.gemspec +1 -1
- metadata +164 -157
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +0 -279
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +0 -1841
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +0 -303
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -6280
@@ -241,7 +241,16 @@
|
|
241
241
|
#define GGML_ROPE_TYPE_MROPE 8
|
242
242
|
#define GGML_ROPE_TYPE_VISION 24
|
243
243
|
|
244
|
+
#define GGML_MROPE_SECTIONS 4
|
245
|
+
|
244
246
|
#define GGML_UNUSED(x) (void)(x)
|
247
|
+
#ifdef __CUDACC__
|
248
|
+
template<typename... Args>
|
249
|
+
__host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {}
|
250
|
+
#define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__)
|
251
|
+
#else
|
252
|
+
#define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
|
253
|
+
#endif // __CUDACC__
|
245
254
|
|
246
255
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
247
256
|
|
@@ -275,19 +284,19 @@
|
|
275
284
|
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
276
285
|
//
|
277
286
|
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
278
|
-
const type prefix##0 = (pointer)->array[0]; \
|
287
|
+
const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
|
279
288
|
GGML_UNUSED(prefix##0);
|
280
289
|
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
281
290
|
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
282
|
-
const type prefix##1 = (pointer)->array[1]; \
|
291
|
+
const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
|
283
292
|
GGML_UNUSED(prefix##1);
|
284
293
|
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
285
294
|
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
286
|
-
const type prefix##2 = (pointer)->array[2]; \
|
295
|
+
const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
|
287
296
|
GGML_UNUSED(prefix##2);
|
288
297
|
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
289
298
|
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
290
|
-
const type prefix##3 = (pointer)->array[3]; \
|
299
|
+
const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
|
291
300
|
GGML_UNUSED(prefix##3);
|
292
301
|
|
293
302
|
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
@@ -304,6 +313,16 @@
|
|
304
313
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
305
314
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
306
315
|
|
316
|
+
#define GGML_TENSOR_TERNARY_OP_LOCALS \
|
317
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
318
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
319
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
320
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
321
|
+
GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
|
322
|
+
GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \
|
323
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
324
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
325
|
+
|
307
326
|
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
308
327
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
309
328
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
@@ -314,6 +333,13 @@
|
|
314
333
|
extern "C" {
|
315
334
|
#endif
|
316
335
|
|
336
|
+
// Function type used in fatal error callbacks
|
337
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
338
|
+
|
339
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
340
|
+
// Returns the old callback for chaining
|
341
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
342
|
+
|
317
343
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
318
344
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
319
345
|
|
@@ -388,7 +414,8 @@ extern "C" {
|
|
388
414
|
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
389
415
|
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
390
416
|
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
391
|
-
|
417
|
+
GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
|
418
|
+
GGML_TYPE_COUNT = 40,
|
392
419
|
};
|
393
420
|
|
394
421
|
// precision
|
@@ -423,6 +450,7 @@ extern "C" {
|
|
423
450
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
424
451
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
425
452
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
453
|
+
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
|
426
454
|
};
|
427
455
|
|
428
456
|
// available tensor operations:
|
@@ -431,6 +459,7 @@ extern "C" {
|
|
431
459
|
|
432
460
|
GGML_OP_DUP,
|
433
461
|
GGML_OP_ADD,
|
462
|
+
GGML_OP_ADD_ID,
|
434
463
|
GGML_OP_ADD1,
|
435
464
|
GGML_OP_ACC,
|
436
465
|
GGML_OP_SUB,
|
@@ -482,13 +511,15 @@ extern "C" {
|
|
482
511
|
GGML_OP_CONV_TRANSPOSE_1D,
|
483
512
|
GGML_OP_IM2COL,
|
484
513
|
GGML_OP_IM2COL_BACK,
|
514
|
+
GGML_OP_IM2COL_3D,
|
485
515
|
GGML_OP_CONV_2D,
|
516
|
+
GGML_OP_CONV_3D,
|
486
517
|
GGML_OP_CONV_2D_DW,
|
487
518
|
GGML_OP_CONV_TRANSPOSE_2D,
|
488
519
|
GGML_OP_POOL_1D,
|
489
520
|
GGML_OP_POOL_2D,
|
490
521
|
GGML_OP_POOL_2D_BACK,
|
491
|
-
GGML_OP_UPSCALE,
|
522
|
+
GGML_OP_UPSCALE,
|
492
523
|
GGML_OP_PAD,
|
493
524
|
GGML_OP_PAD_REFLECT_1D,
|
494
525
|
GGML_OP_ROLL,
|
@@ -520,6 +551,7 @@ extern "C" {
|
|
520
551
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
521
552
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
522
553
|
GGML_OP_OPT_STEP_ADAMW,
|
554
|
+
GGML_OP_OPT_STEP_SGD,
|
523
555
|
|
524
556
|
GGML_OP_GLU,
|
525
557
|
|
@@ -550,6 +582,9 @@ extern "C" {
|
|
550
582
|
GGML_GLU_OP_REGLU,
|
551
583
|
GGML_GLU_OP_GEGLU,
|
552
584
|
GGML_GLU_OP_SWIGLU,
|
585
|
+
GGML_GLU_OP_SWIGLU_OAI,
|
586
|
+
GGML_GLU_OP_GEGLU_ERF,
|
587
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
553
588
|
|
554
589
|
GGML_GLU_OP_COUNT,
|
555
590
|
};
|
@@ -639,6 +674,9 @@ extern "C" {
|
|
639
674
|
|
640
675
|
// misc
|
641
676
|
|
677
|
+
GGML_API const char * ggml_version(void);
|
678
|
+
GGML_API const char * ggml_commit(void);
|
679
|
+
|
642
680
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
643
681
|
GGML_API int64_t ggml_time_ms(void);
|
644
682
|
GGML_API int64_t ggml_time_us(void);
|
@@ -819,6 +857,13 @@ extern "C" {
|
|
819
857
|
struct ggml_tensor * b,
|
820
858
|
enum ggml_type type);
|
821
859
|
|
860
|
+
// dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
|
861
|
+
GGML_API struct ggml_tensor * ggml_add_id(
|
862
|
+
struct ggml_context * ctx,
|
863
|
+
struct ggml_tensor * a,
|
864
|
+
struct ggml_tensor * b,
|
865
|
+
struct ggml_tensor * ids);
|
866
|
+
|
822
867
|
GGML_API struct ggml_tensor * ggml_add1(
|
823
868
|
struct ggml_context * ctx,
|
824
869
|
struct ggml_tensor * a,
|
@@ -1137,6 +1182,22 @@ extern "C" {
|
|
1137
1182
|
struct ggml_context * ctx,
|
1138
1183
|
struct ggml_tensor * a);
|
1139
1184
|
|
1185
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
1186
|
+
struct ggml_context * ctx,
|
1187
|
+
struct ggml_tensor * a);
|
1188
|
+
|
1189
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
1190
|
+
struct ggml_context * ctx,
|
1191
|
+
struct ggml_tensor * a);
|
1192
|
+
|
1193
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
1194
|
+
struct ggml_context * ctx,
|
1195
|
+
struct ggml_tensor * a);
|
1196
|
+
|
1197
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
1198
|
+
struct ggml_context * ctx,
|
1199
|
+
struct ggml_tensor * a);
|
1200
|
+
|
1140
1201
|
// A: n columns, r rows,
|
1141
1202
|
// B: n columns, r rows,
|
1142
1203
|
GGML_API struct ggml_tensor * ggml_glu_split(
|
@@ -1160,6 +1221,23 @@ extern "C" {
|
|
1160
1221
|
struct ggml_tensor * a,
|
1161
1222
|
struct ggml_tensor * b);
|
1162
1223
|
|
1224
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
1225
|
+
struct ggml_context * ctx,
|
1226
|
+
struct ggml_tensor * a,
|
1227
|
+
struct ggml_tensor * b);
|
1228
|
+
|
1229
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
1230
|
+
struct ggml_context * ctx,
|
1231
|
+
struct ggml_tensor * a,
|
1232
|
+
struct ggml_tensor * b);
|
1233
|
+
|
1234
|
+
GGML_API struct ggml_tensor * ggml_swiglu_oai(
|
1235
|
+
struct ggml_context * ctx,
|
1236
|
+
struct ggml_tensor * a,
|
1237
|
+
struct ggml_tensor * b,
|
1238
|
+
float alpha,
|
1239
|
+
float limit);
|
1240
|
+
|
1163
1241
|
// normalize along rows
|
1164
1242
|
GGML_API struct ggml_tensor * ggml_norm(
|
1165
1243
|
struct ggml_context * ctx,
|
@@ -1259,6 +1337,19 @@ extern "C" {
|
|
1259
1337
|
struct ggml_tensor * a,
|
1260
1338
|
float s);
|
1261
1339
|
|
1340
|
+
// x = s * a + b
|
1341
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
1342
|
+
struct ggml_context * ctx,
|
1343
|
+
struct ggml_tensor * a,
|
1344
|
+
float s,
|
1345
|
+
float b);
|
1346
|
+
|
1347
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
1348
|
+
struct ggml_context * ctx,
|
1349
|
+
struct ggml_tensor * a,
|
1350
|
+
float s,
|
1351
|
+
float b);
|
1352
|
+
|
1262
1353
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1263
1354
|
GGML_API struct ggml_tensor * ggml_set(
|
1264
1355
|
struct ggml_context * ctx,
|
@@ -1313,6 +1404,7 @@ extern "C" {
|
|
1313
1404
|
struct ggml_tensor * a,
|
1314
1405
|
struct ggml_tensor * b);
|
1315
1406
|
|
1407
|
+
// note: casting from f32 to i32 will discard the fractional part
|
1316
1408
|
GGML_API struct ggml_tensor * ggml_cast(
|
1317
1409
|
struct ggml_context * ctx,
|
1318
1410
|
struct ggml_tensor * a,
|
@@ -1437,7 +1529,11 @@ extern "C" {
|
|
1437
1529
|
struct ggml_context * ctx,
|
1438
1530
|
struct ggml_tensor * a);
|
1439
1531
|
|
1440
|
-
// supports
|
1532
|
+
// supports 4D a:
|
1533
|
+
// a [n_embd, ne1, ne2, ne3]
|
1534
|
+
// b I32 [n_rows, ne2, ne3, 1]
|
1535
|
+
//
|
1536
|
+
// return [n_embd, n_rows, ne2, ne3]
|
1441
1537
|
GGML_API struct ggml_tensor * ggml_get_rows(
|
1442
1538
|
struct ggml_context * ctx,
|
1443
1539
|
struct ggml_tensor * a, // data
|
@@ -1503,8 +1599,14 @@ extern "C" {
|
|
1503
1599
|
struct ggml_context * ctx,
|
1504
1600
|
struct ggml_tensor * a);
|
1505
1601
|
|
1602
|
+
// a [ne0, ne01, ne02, ne03]
|
1603
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
1604
|
+
//
|
1605
|
+
// broadcast:
|
1606
|
+
// ne02 % ne12 == 0
|
1607
|
+
// ne03 % ne13 == 0
|
1608
|
+
//
|
1506
1609
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
1507
|
-
// mask is optional
|
1508
1610
|
// max_bias = 0.0f for no ALiBi
|
1509
1611
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
1510
1612
|
struct ggml_context * ctx,
|
@@ -1513,6 +1615,10 @@ extern "C" {
|
|
1513
1615
|
float scale,
|
1514
1616
|
float max_bias);
|
1515
1617
|
|
1618
|
+
GGML_API void ggml_soft_max_add_sinks(
|
1619
|
+
struct ggml_tensor * a,
|
1620
|
+
struct ggml_tensor * sinks);
|
1621
|
+
|
1516
1622
|
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
|
1517
1623
|
struct ggml_context * ctx,
|
1518
1624
|
struct ggml_tensor * a,
|
@@ -1571,7 +1677,7 @@ extern "C" {
|
|
1571
1677
|
struct ggml_tensor * b,
|
1572
1678
|
struct ggml_tensor * c,
|
1573
1679
|
int n_dims,
|
1574
|
-
int sections[
|
1680
|
+
int sections[GGML_MROPE_SECTIONS],
|
1575
1681
|
int mode,
|
1576
1682
|
int n_ctx_orig,
|
1577
1683
|
float freq_base,
|
@@ -1597,6 +1703,22 @@ extern "C" {
|
|
1597
1703
|
float beta_fast,
|
1598
1704
|
float beta_slow);
|
1599
1705
|
|
1706
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
|
1707
|
+
struct ggml_context * ctx,
|
1708
|
+
struct ggml_tensor * a,
|
1709
|
+
struct ggml_tensor * b,
|
1710
|
+
struct ggml_tensor * c,
|
1711
|
+
int n_dims,
|
1712
|
+
int sections[GGML_MROPE_SECTIONS],
|
1713
|
+
int mode,
|
1714
|
+
int n_ctx_orig,
|
1715
|
+
float freq_base,
|
1716
|
+
float freq_scale,
|
1717
|
+
float ext_factor,
|
1718
|
+
float attn_factor,
|
1719
|
+
float beta_fast,
|
1720
|
+
float beta_slow);
|
1721
|
+
|
1600
1722
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
1601
1723
|
struct ggml_context * ctx,
|
1602
1724
|
struct ggml_tensor * a,
|
@@ -1754,6 +1876,41 @@ extern "C" {
|
|
1754
1876
|
int d0, // dilation dimension 0
|
1755
1877
|
int d1); // dilation dimension 1
|
1756
1878
|
|
1879
|
+
GGML_API struct ggml_tensor * ggml_im2col_3d(
|
1880
|
+
struct ggml_context * ctx,
|
1881
|
+
struct ggml_tensor * a,
|
1882
|
+
struct ggml_tensor * b,
|
1883
|
+
int64_t IC,
|
1884
|
+
int s0, // stride width
|
1885
|
+
int s1, // stride height
|
1886
|
+
int s2, // stride depth
|
1887
|
+
int p0, // padding width
|
1888
|
+
int p1, // padding height
|
1889
|
+
int p2, // padding depth
|
1890
|
+
int d0, // dilation width
|
1891
|
+
int d1, // dilation height
|
1892
|
+
int d2, // dilation depth
|
1893
|
+
enum ggml_type dst_type);
|
1894
|
+
|
1895
|
+
// a: [OC*IC, KD, KH, KW]
|
1896
|
+
// b: [N*IC, ID, IH, IW]
|
1897
|
+
// result: [N*OC, OD, OH, OW]
|
1898
|
+
GGML_API struct ggml_tensor * ggml_conv_3d(
|
1899
|
+
struct ggml_context * ctx,
|
1900
|
+
struct ggml_tensor * a,
|
1901
|
+
struct ggml_tensor * b,
|
1902
|
+
int64_t IC,
|
1903
|
+
int s0, // stride width
|
1904
|
+
int s1, // stride height
|
1905
|
+
int s2, // stride depth
|
1906
|
+
int p0, // padding width
|
1907
|
+
int p1, // padding height
|
1908
|
+
int p2, // padding depth
|
1909
|
+
int d0, // dilation width
|
1910
|
+
int d1, // dilation height
|
1911
|
+
int d2 // dilation depth
|
1912
|
+
);
|
1913
|
+
|
1757
1914
|
// kernel size is a->ne[0] x a->ne[1]
|
1758
1915
|
// stride is equal to kernel size
|
1759
1916
|
// padding is zero
|
@@ -1825,6 +1982,23 @@ extern "C" {
|
|
1825
1982
|
int d0, // dilation dimension 0
|
1826
1983
|
int d1); // dilation dimension 1
|
1827
1984
|
|
1985
|
+
GGML_API struct ggml_tensor * ggml_conv_3d_direct(
|
1986
|
+
struct ggml_context * ctx,
|
1987
|
+
struct ggml_tensor * a, // kernel [KW, KH, KD, IC * OC]
|
1988
|
+
struct ggml_tensor * b, // input [W, H, D, C * N]
|
1989
|
+
int s0, // stride
|
1990
|
+
int s1,
|
1991
|
+
int s2,
|
1992
|
+
int p0, // padding
|
1993
|
+
int p1,
|
1994
|
+
int p2,
|
1995
|
+
int d0, // dilation
|
1996
|
+
int d1,
|
1997
|
+
int d2,
|
1998
|
+
int n_channels,
|
1999
|
+
int n_batch,
|
2000
|
+
int n_channels_out);
|
2001
|
+
|
1828
2002
|
enum ggml_op_pool {
|
1829
2003
|
GGML_OP_POOL_MAX,
|
1830
2004
|
GGML_OP_POOL_AVG,
|
@@ -1915,6 +2089,19 @@ extern "C" {
|
|
1915
2089
|
int p2,
|
1916
2090
|
int p3);
|
1917
2091
|
|
2092
|
+
GGML_API struct ggml_tensor * ggml_pad_ext(
|
2093
|
+
struct ggml_context * ctx,
|
2094
|
+
struct ggml_tensor * a,
|
2095
|
+
int lp0,
|
2096
|
+
int rp0,
|
2097
|
+
int lp1,
|
2098
|
+
int rp1,
|
2099
|
+
int lp2,
|
2100
|
+
int rp2,
|
2101
|
+
int lp3,
|
2102
|
+
int rp3
|
2103
|
+
);
|
2104
|
+
|
1918
2105
|
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
1919
2106
|
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
1920
2107
|
struct ggml_context * ctx,
|
@@ -1967,11 +2154,17 @@ extern "C" {
|
|
1967
2154
|
|
1968
2155
|
#define GGML_KQ_MASK_PAD 64
|
1969
2156
|
|
1970
|
-
// q: [n_embd_k, n_batch, n_head,
|
1971
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
1972
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
1973
|
-
// mask: [n_kv, n_batch_pad,
|
1974
|
-
// res: [n_embd_v, n_head, n_batch,
|
2157
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
2158
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
2159
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
2160
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
2161
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
2162
|
+
//
|
2163
|
+
// broadcast:
|
2164
|
+
// n_head % n_head_kv == 0
|
2165
|
+
// n_head % ne32 == 0
|
2166
|
+
// ne3 % ne33 == 0
|
2167
|
+
//
|
1975
2168
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
1976
2169
|
struct ggml_context * ctx,
|
1977
2170
|
struct ggml_tensor * q,
|
@@ -1989,6 +2182,10 @@ extern "C" {
|
|
1989
2182
|
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
1990
2183
|
const struct ggml_tensor * a);
|
1991
2184
|
|
2185
|
+
GGML_API void ggml_flash_attn_ext_add_sinks(
|
2186
|
+
struct ggml_tensor * a,
|
2187
|
+
struct ggml_tensor * sinks);
|
2188
|
+
|
1992
2189
|
// TODO: needs to be adapted to ggml_flash_attn_ext
|
1993
2190
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1994
2191
|
struct ggml_context * ctx,
|
@@ -2010,7 +2207,8 @@ extern "C" {
|
|
2010
2207
|
struct ggml_tensor * dt,
|
2011
2208
|
struct ggml_tensor * A,
|
2012
2209
|
struct ggml_tensor * B,
|
2013
|
-
struct ggml_tensor * C
|
2210
|
+
struct ggml_tensor * C,
|
2211
|
+
struct ggml_tensor * ids);
|
2014
2212
|
|
2015
2213
|
// partition into non-overlapping windows with padding if needed
|
2016
2214
|
// example:
|
@@ -2193,7 +2391,14 @@ extern "C" {
|
|
2193
2391
|
struct ggml_tensor * grad,
|
2194
2392
|
struct ggml_tensor * m,
|
2195
2393
|
struct ggml_tensor * v,
|
2196
|
-
struct ggml_tensor * adamw_params); // parameters such
|
2394
|
+
struct ggml_tensor * adamw_params); // parameters such as the learning rate
|
2395
|
+
|
2396
|
+
// stochastic gradient descent step (with weight decay)
|
2397
|
+
GGML_API struct ggml_tensor * ggml_opt_step_sgd(
|
2398
|
+
struct ggml_context * ctx,
|
2399
|
+
struct ggml_tensor * a,
|
2400
|
+
struct ggml_tensor * grad,
|
2401
|
+
struct ggml_tensor * sgd_params); // alpha, weight decay
|
2197
2402
|
|
2198
2403
|
//
|
2199
2404
|
// automatic differentiation
|
@@ -114,6 +114,9 @@ message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
|
|
114
114
|
|
115
115
|
if (NOT MSVC)
|
116
116
|
if (GGML_STATIC)
|
117
|
+
if (UNIX AND NOT APPLE)
|
118
|
+
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a;.so")
|
119
|
+
endif()
|
117
120
|
add_link_options(-static)
|
118
121
|
if (MINGW)
|
119
122
|
add_link_options(-static-libgcc -static-libstdc++)
|
@@ -214,6 +217,13 @@ add_library(ggml
|
|
214
217
|
ggml-backend-reg.cpp)
|
215
218
|
add_library(ggml::ggml ALIAS ggml)
|
216
219
|
|
220
|
+
if (GGML_BACKEND_DIR)
|
221
|
+
if (NOT GGML_BACKEND_DL)
|
222
|
+
message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
|
223
|
+
endif()
|
224
|
+
target_compile_definitions(ggml PUBLIC GGML_BACKEND_DIR="${GGML_BACKEND_DIR}")
|
225
|
+
endif()
|
226
|
+
|
217
227
|
target_link_libraries(ggml PUBLIC ggml-base)
|
218
228
|
|
219
229
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
@@ -227,7 +237,11 @@ function(ggml_add_backend_library backend)
|
|
227
237
|
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
228
238
|
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
|
229
239
|
add_dependencies(ggml ${backend})
|
230
|
-
|
240
|
+
if (GGML_BACKEND_DIR)
|
241
|
+
install(TARGETS ${backend} LIBRARY DESTINATION ${GGML_BACKEND_DIR})
|
242
|
+
else()
|
243
|
+
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
|
244
|
+
endif()
|
231
245
|
else()
|
232
246
|
add_library(${backend} ${ARGN})
|
233
247
|
target_link_libraries(ggml PUBLIC ${backend})
|
@@ -365,12 +379,13 @@ ggml_add_backend(BLAS)
|
|
365
379
|
ggml_add_backend(CANN)
|
366
380
|
ggml_add_backend(CUDA)
|
367
381
|
ggml_add_backend(HIP)
|
368
|
-
ggml_add_backend(Kompute)
|
369
382
|
ggml_add_backend(METAL)
|
370
383
|
ggml_add_backend(MUSA)
|
371
384
|
ggml_add_backend(RPC)
|
372
385
|
ggml_add_backend(SYCL)
|
373
386
|
ggml_add_backend(Vulkan)
|
387
|
+
ggml_add_backend(WebGPU)
|
388
|
+
ggml_add_backend(zDNN)
|
374
389
|
ggml_add_backend(OpenCL)
|
375
390
|
|
376
391
|
foreach (target ggml-base ggml)
|