whispercpp 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +59 -27
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +154 -35
- data/ext/sources/examples/addon.node/index.js +10 -5
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +29 -18
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +7 -4
- data/ext/sources/examples/command/command.cpp +58 -32
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +21 -17
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +193 -35
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +10 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +756 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +85 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +773 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +126 -55
- data/ext/sources/examples/talk-llama/llama-chat.cpp +150 -13
- data/ext/sources/examples/talk-llama/llama-chat.h +8 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +814 -542
- data/ext/sources/examples/talk-llama/llama-context.h +68 -32
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +787 -440
- data/ext/sources/examples/talk-llama/llama-graph.h +333 -153
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +128 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +80 -17
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +137 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +1248 -1967
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +218 -345
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +164 -52
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +266 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +139 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1154 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +182 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +94 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +44 -17
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +11377 -5248
- data/ext/sources/examples/talk-llama/llama-model.h +87 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +137 -16
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +502 -38
- data/ext/sources/examples/talk-llama/llama-vocab.h +46 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -17
- data/ext/sources/examples/talk-llama/llama.h +176 -151
- data/ext/sources/examples/talk-llama/talk-llama.cpp +11 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +212 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +6 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +106 -33
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +18 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +365 -21
- data/ext/sources/ggml/src/CMakeLists.txt +98 -25
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +35 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +266 -60
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +4 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +15 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +149 -2
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +521 -78
- data/ext/sources/ggml/src/ggml-common.h +21 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +165 -50
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -3
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +3650 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1891 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2160 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1897 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +214 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +18 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +23 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +179 -110
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +44 -33
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +228 -98
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +532 -1124
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +3374 -2081
- data/ext/sources/ggml/src/ggml-cpu/ops.h +13 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1193 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +34 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1982 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +120 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +367 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +3 -3
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +272 -35
- data/ext/sources/ggml/src/ggml-cpu/vec.h +794 -142
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +291 -81
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +117 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +20 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +499 -368
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +142 -93
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +90 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +636 -222
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +73 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +4 -5
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -98
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +23 -19
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +179 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -2
- data/ext/sources/ggml/src/ggml-impl.h +229 -175
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +21 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +163 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3208 -1575
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +32 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4430 -792
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +117 -24
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +85 -62
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +13 -17
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +21 -2
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +116 -211
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +700 -1041
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +20 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +17 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +2 -96
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +393 -250
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -11
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +125 -21
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +4 -3
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +105 -17
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4198 -1145
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +64 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +338 -71
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +802 -142
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +32 -4
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +241 -215
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +57 -2
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +75 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/{tests → test}/test_params.rb +8 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +246 -191
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -6431
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -336
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -5998
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -241,7 +241,16 @@
|
|
241
241
|
#define GGML_ROPE_TYPE_MROPE 8
|
242
242
|
#define GGML_ROPE_TYPE_VISION 24
|
243
243
|
|
244
|
+
#define GGML_MROPE_SECTIONS 4
|
245
|
+
|
244
246
|
#define GGML_UNUSED(x) (void)(x)
|
247
|
+
#ifdef __CUDACC__
|
248
|
+
template<typename... Args>
|
249
|
+
__host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {}
|
250
|
+
#define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__)
|
251
|
+
#else
|
252
|
+
#define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
|
253
|
+
#endif // __CUDACC__
|
245
254
|
|
246
255
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
247
256
|
|
@@ -275,19 +284,19 @@
|
|
275
284
|
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
276
285
|
//
|
277
286
|
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
278
|
-
const type prefix##0 = (pointer)->array[0]; \
|
287
|
+
const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
|
279
288
|
GGML_UNUSED(prefix##0);
|
280
289
|
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
281
290
|
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
282
|
-
const type prefix##1 = (pointer)->array[1]; \
|
291
|
+
const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
|
283
292
|
GGML_UNUSED(prefix##1);
|
284
293
|
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
285
294
|
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
286
|
-
const type prefix##2 = (pointer)->array[2]; \
|
295
|
+
const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
|
287
296
|
GGML_UNUSED(prefix##2);
|
288
297
|
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
289
298
|
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
290
|
-
const type prefix##3 = (pointer)->array[3]; \
|
299
|
+
const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
|
291
300
|
GGML_UNUSED(prefix##3);
|
292
301
|
|
293
302
|
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
@@ -304,6 +313,16 @@
|
|
304
313
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
305
314
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
306
315
|
|
316
|
+
#define GGML_TENSOR_TERNARY_OP_LOCALS \
|
317
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
318
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
319
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
320
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
321
|
+
GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
|
322
|
+
GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \
|
323
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
324
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
325
|
+
|
307
326
|
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
308
327
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
309
328
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
@@ -314,6 +333,13 @@
|
|
314
333
|
extern "C" {
|
315
334
|
#endif
|
316
335
|
|
336
|
+
// Function type used in fatal error callbacks
|
337
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
338
|
+
|
339
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
340
|
+
// Returns the old callback for chaining
|
341
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
342
|
+
|
317
343
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
318
344
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
319
345
|
|
@@ -388,7 +414,8 @@ extern "C" {
|
|
388
414
|
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
389
415
|
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
390
416
|
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
391
|
-
|
417
|
+
GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
|
418
|
+
GGML_TYPE_COUNT = 40,
|
392
419
|
};
|
393
420
|
|
394
421
|
// precision
|
@@ -423,6 +450,7 @@ extern "C" {
|
|
423
450
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
424
451
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
425
452
|
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
453
|
+
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
|
426
454
|
};
|
427
455
|
|
428
456
|
// available tensor operations:
|
@@ -431,6 +459,7 @@ extern "C" {
|
|
431
459
|
|
432
460
|
GGML_OP_DUP,
|
433
461
|
GGML_OP_ADD,
|
462
|
+
GGML_OP_ADD_ID,
|
434
463
|
GGML_OP_ADD1,
|
435
464
|
GGML_OP_ACC,
|
436
465
|
GGML_OP_SUB,
|
@@ -470,6 +499,7 @@ extern "C" {
|
|
470
499
|
GGML_OP_TRANSPOSE,
|
471
500
|
GGML_OP_GET_ROWS,
|
472
501
|
GGML_OP_GET_ROWS_BACK,
|
502
|
+
GGML_OP_SET_ROWS,
|
473
503
|
GGML_OP_DIAG,
|
474
504
|
GGML_OP_DIAG_MASK_INF,
|
475
505
|
GGML_OP_DIAG_MASK_ZERO,
|
@@ -481,14 +511,18 @@ extern "C" {
|
|
481
511
|
GGML_OP_CONV_TRANSPOSE_1D,
|
482
512
|
GGML_OP_IM2COL,
|
483
513
|
GGML_OP_IM2COL_BACK,
|
514
|
+
GGML_OP_IM2COL_3D,
|
515
|
+
GGML_OP_CONV_2D,
|
516
|
+
GGML_OP_CONV_3D,
|
484
517
|
GGML_OP_CONV_2D_DW,
|
485
518
|
GGML_OP_CONV_TRANSPOSE_2D,
|
486
519
|
GGML_OP_POOL_1D,
|
487
520
|
GGML_OP_POOL_2D,
|
488
521
|
GGML_OP_POOL_2D_BACK,
|
489
|
-
GGML_OP_UPSCALE,
|
522
|
+
GGML_OP_UPSCALE,
|
490
523
|
GGML_OP_PAD,
|
491
524
|
GGML_OP_PAD_REFLECT_1D,
|
525
|
+
GGML_OP_ROLL,
|
492
526
|
GGML_OP_ARANGE,
|
493
527
|
GGML_OP_TIMESTEP_EMBEDDING,
|
494
528
|
GGML_OP_ARGSORT,
|
@@ -517,6 +551,9 @@ extern "C" {
|
|
517
551
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
518
552
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
519
553
|
GGML_OP_OPT_STEP_ADAMW,
|
554
|
+
GGML_OP_OPT_STEP_SGD,
|
555
|
+
|
556
|
+
GGML_OP_GLU,
|
520
557
|
|
521
558
|
GGML_OP_COUNT,
|
522
559
|
};
|
@@ -541,6 +578,17 @@ extern "C" {
|
|
541
578
|
GGML_UNARY_OP_COUNT,
|
542
579
|
};
|
543
580
|
|
581
|
+
enum ggml_glu_op {
|
582
|
+
GGML_GLU_OP_REGLU,
|
583
|
+
GGML_GLU_OP_GEGLU,
|
584
|
+
GGML_GLU_OP_SWIGLU,
|
585
|
+
GGML_GLU_OP_SWIGLU_OAI,
|
586
|
+
GGML_GLU_OP_GEGLU_ERF,
|
587
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
588
|
+
|
589
|
+
GGML_GLU_OP_COUNT,
|
590
|
+
};
|
591
|
+
|
544
592
|
enum ggml_object_type {
|
545
593
|
GGML_OBJECT_TYPE_TENSOR,
|
546
594
|
GGML_OBJECT_TYPE_GRAPH,
|
@@ -626,6 +674,9 @@ extern "C" {
|
|
626
674
|
|
627
675
|
// misc
|
628
676
|
|
677
|
+
GGML_API const char * ggml_version(void);
|
678
|
+
GGML_API const char * ggml_commit(void);
|
679
|
+
|
629
680
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
630
681
|
GGML_API int64_t ggml_time_ms(void);
|
631
682
|
GGML_API int64_t ggml_time_us(void);
|
@@ -656,6 +707,7 @@ extern "C" {
|
|
656
707
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
657
708
|
|
658
709
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
710
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
659
711
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
660
712
|
|
661
713
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
@@ -686,6 +738,9 @@ extern "C" {
|
|
686
738
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
687
739
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
688
740
|
|
741
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
742
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
743
|
+
|
689
744
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
690
745
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
691
746
|
|
@@ -757,6 +812,7 @@ extern "C" {
|
|
757
812
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
758
813
|
|
759
814
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
815
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
760
816
|
|
761
817
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
762
818
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
@@ -801,6 +857,13 @@ extern "C" {
|
|
801
857
|
struct ggml_tensor * b,
|
802
858
|
enum ggml_type type);
|
803
859
|
|
860
|
+
// dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
|
861
|
+
GGML_API struct ggml_tensor * ggml_add_id(
|
862
|
+
struct ggml_context * ctx,
|
863
|
+
struct ggml_tensor * a,
|
864
|
+
struct ggml_tensor * b,
|
865
|
+
struct ggml_tensor * ids);
|
866
|
+
|
804
867
|
GGML_API struct ggml_tensor * ggml_add1(
|
805
868
|
struct ggml_context * ctx,
|
806
869
|
struct ggml_tensor * a,
|
@@ -935,6 +998,15 @@ extern "C" {
|
|
935
998
|
struct ggml_tensor * a,
|
936
999
|
struct ggml_tensor * b);
|
937
1000
|
|
1001
|
+
// repeat a to the specified shape
|
1002
|
+
GGML_API struct ggml_tensor * ggml_repeat_4d(
|
1003
|
+
struct ggml_context * ctx,
|
1004
|
+
struct ggml_tensor * a,
|
1005
|
+
int64_t ne0,
|
1006
|
+
int64_t ne1,
|
1007
|
+
int64_t ne2,
|
1008
|
+
int64_t ne3);
|
1009
|
+
|
938
1010
|
// sums repetitions in a into shape of b
|
939
1011
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
940
1012
|
struct ggml_context * ctx,
|
@@ -1076,6 +1148,96 @@ extern "C" {
|
|
1076
1148
|
struct ggml_context * ctx,
|
1077
1149
|
struct ggml_tensor * a);
|
1078
1150
|
|
1151
|
+
// gated linear unit ops
|
1152
|
+
// A: n columns, r rows,
|
1153
|
+
// result is n / 2 columns, r rows,
|
1154
|
+
// expects gate in second half of row, unless swapped is true
|
1155
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
1156
|
+
struct ggml_context * ctx,
|
1157
|
+
struct ggml_tensor * a,
|
1158
|
+
enum ggml_glu_op op,
|
1159
|
+
bool swapped);
|
1160
|
+
|
1161
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
1162
|
+
struct ggml_context * ctx,
|
1163
|
+
struct ggml_tensor * a);
|
1164
|
+
|
1165
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
1166
|
+
struct ggml_context * ctx,
|
1167
|
+
struct ggml_tensor * a);
|
1168
|
+
|
1169
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
1170
|
+
struct ggml_context * ctx,
|
1171
|
+
struct ggml_tensor * a);
|
1172
|
+
|
1173
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
1174
|
+
struct ggml_context * ctx,
|
1175
|
+
struct ggml_tensor * a);
|
1176
|
+
|
1177
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
1178
|
+
struct ggml_context * ctx,
|
1179
|
+
struct ggml_tensor * a);
|
1180
|
+
|
1181
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
1182
|
+
struct ggml_context * ctx,
|
1183
|
+
struct ggml_tensor * a);
|
1184
|
+
|
1185
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
1186
|
+
struct ggml_context * ctx,
|
1187
|
+
struct ggml_tensor * a);
|
1188
|
+
|
1189
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
1190
|
+
struct ggml_context * ctx,
|
1191
|
+
struct ggml_tensor * a);
|
1192
|
+
|
1193
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
1194
|
+
struct ggml_context * ctx,
|
1195
|
+
struct ggml_tensor * a);
|
1196
|
+
|
1197
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
1198
|
+
struct ggml_context * ctx,
|
1199
|
+
struct ggml_tensor * a);
|
1200
|
+
|
1201
|
+
// A: n columns, r rows,
|
1202
|
+
// B: n columns, r rows,
|
1203
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
1204
|
+
struct ggml_context * ctx,
|
1205
|
+
struct ggml_tensor * a,
|
1206
|
+
struct ggml_tensor * b,
|
1207
|
+
enum ggml_glu_op op);
|
1208
|
+
|
1209
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
1210
|
+
struct ggml_context * ctx,
|
1211
|
+
struct ggml_tensor * a,
|
1212
|
+
struct ggml_tensor * b);
|
1213
|
+
|
1214
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
1215
|
+
struct ggml_context * ctx,
|
1216
|
+
struct ggml_tensor * a,
|
1217
|
+
struct ggml_tensor * b);
|
1218
|
+
|
1219
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
1220
|
+
struct ggml_context * ctx,
|
1221
|
+
struct ggml_tensor * a,
|
1222
|
+
struct ggml_tensor * b);
|
1223
|
+
|
1224
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
1225
|
+
struct ggml_context * ctx,
|
1226
|
+
struct ggml_tensor * a,
|
1227
|
+
struct ggml_tensor * b);
|
1228
|
+
|
1229
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
1230
|
+
struct ggml_context * ctx,
|
1231
|
+
struct ggml_tensor * a,
|
1232
|
+
struct ggml_tensor * b);
|
1233
|
+
|
1234
|
+
GGML_API struct ggml_tensor * ggml_swiglu_oai(
|
1235
|
+
struct ggml_context * ctx,
|
1236
|
+
struct ggml_tensor * a,
|
1237
|
+
struct ggml_tensor * b,
|
1238
|
+
float alpha,
|
1239
|
+
float limit);
|
1240
|
+
|
1079
1241
|
// normalize along rows
|
1080
1242
|
GGML_API struct ggml_tensor * ggml_norm(
|
1081
1243
|
struct ggml_context * ctx,
|
@@ -1175,6 +1337,19 @@ extern "C" {
|
|
1175
1337
|
struct ggml_tensor * a,
|
1176
1338
|
float s);
|
1177
1339
|
|
1340
|
+
// x = s * a + b
|
1341
|
+
GGML_API struct ggml_tensor * ggml_scale_bias(
|
1342
|
+
struct ggml_context * ctx,
|
1343
|
+
struct ggml_tensor * a,
|
1344
|
+
float s,
|
1345
|
+
float b);
|
1346
|
+
|
1347
|
+
GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
|
1348
|
+
struct ggml_context * ctx,
|
1349
|
+
struct ggml_tensor * a,
|
1350
|
+
float s,
|
1351
|
+
float b);
|
1352
|
+
|
1178
1353
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1179
1354
|
GGML_API struct ggml_tensor * ggml_set(
|
1180
1355
|
struct ggml_context * ctx,
|
@@ -1229,6 +1404,7 @@ extern "C" {
|
|
1229
1404
|
struct ggml_tensor * a,
|
1230
1405
|
struct ggml_tensor * b);
|
1231
1406
|
|
1407
|
+
// note: casting from f32 to i32 will discard the fractional part
|
1232
1408
|
GGML_API struct ggml_tensor * ggml_cast(
|
1233
1409
|
struct ggml_context * ctx,
|
1234
1410
|
struct ggml_tensor * a,
|
@@ -1353,7 +1529,11 @@ extern "C" {
|
|
1353
1529
|
struct ggml_context * ctx,
|
1354
1530
|
struct ggml_tensor * a);
|
1355
1531
|
|
1356
|
-
// supports
|
1532
|
+
// supports 4D a:
|
1533
|
+
// a [n_embd, ne1, ne2, ne3]
|
1534
|
+
// b I32 [n_rows, ne2, ne3, 1]
|
1535
|
+
//
|
1536
|
+
// return [n_embd, n_rows, ne2, ne3]
|
1357
1537
|
GGML_API struct ggml_tensor * ggml_get_rows(
|
1358
1538
|
struct ggml_context * ctx,
|
1359
1539
|
struct ggml_tensor * a, // data
|
@@ -1365,6 +1545,23 @@ extern "C" {
|
|
1365
1545
|
struct ggml_tensor * b, // row indices
|
1366
1546
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
1367
1547
|
|
1548
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
1549
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
1550
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
1551
|
+
//
|
1552
|
+
// undefined behavior if destination rows overlap
|
1553
|
+
//
|
1554
|
+
// broadcast:
|
1555
|
+
// ne2 % ne11 == 0
|
1556
|
+
// ne3 % ne12 == 0
|
1557
|
+
//
|
1558
|
+
// return view(a)
|
1559
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
1560
|
+
struct ggml_context * ctx,
|
1561
|
+
struct ggml_tensor * a, // destination
|
1562
|
+
struct ggml_tensor * b, // source
|
1563
|
+
struct ggml_tensor * c); // row indices
|
1564
|
+
|
1368
1565
|
GGML_API struct ggml_tensor * ggml_diag(
|
1369
1566
|
struct ggml_context * ctx,
|
1370
1567
|
struct ggml_tensor * a);
|
@@ -1402,8 +1599,14 @@ extern "C" {
|
|
1402
1599
|
struct ggml_context * ctx,
|
1403
1600
|
struct ggml_tensor * a);
|
1404
1601
|
|
1602
|
+
// a [ne0, ne01, ne02, ne03]
|
1603
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
1604
|
+
//
|
1605
|
+
// broadcast:
|
1606
|
+
// ne02 % ne12 == 0
|
1607
|
+
// ne03 % ne13 == 0
|
1608
|
+
//
|
1405
1609
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
1406
|
-
// mask is optional
|
1407
1610
|
// max_bias = 0.0f for no ALiBi
|
1408
1611
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
1409
1612
|
struct ggml_context * ctx,
|
@@ -1412,6 +1615,10 @@ extern "C" {
|
|
1412
1615
|
float scale,
|
1413
1616
|
float max_bias);
|
1414
1617
|
|
1618
|
+
GGML_API void ggml_soft_max_add_sinks(
|
1619
|
+
struct ggml_tensor * a,
|
1620
|
+
struct ggml_tensor * sinks);
|
1621
|
+
|
1415
1622
|
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
|
1416
1623
|
struct ggml_context * ctx,
|
1417
1624
|
struct ggml_tensor * a,
|
@@ -1470,7 +1677,7 @@ extern "C" {
|
|
1470
1677
|
struct ggml_tensor * b,
|
1471
1678
|
struct ggml_tensor * c,
|
1472
1679
|
int n_dims,
|
1473
|
-
int sections[
|
1680
|
+
int sections[GGML_MROPE_SECTIONS],
|
1474
1681
|
int mode,
|
1475
1682
|
int n_ctx_orig,
|
1476
1683
|
float freq_base,
|
@@ -1496,6 +1703,22 @@ extern "C" {
|
|
1496
1703
|
float beta_fast,
|
1497
1704
|
float beta_slow);
|
1498
1705
|
|
1706
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
|
1707
|
+
struct ggml_context * ctx,
|
1708
|
+
struct ggml_tensor * a,
|
1709
|
+
struct ggml_tensor * b,
|
1710
|
+
struct ggml_tensor * c,
|
1711
|
+
int n_dims,
|
1712
|
+
int sections[GGML_MROPE_SECTIONS],
|
1713
|
+
int mode,
|
1714
|
+
int n_ctx_orig,
|
1715
|
+
float freq_base,
|
1716
|
+
float freq_scale,
|
1717
|
+
float ext_factor,
|
1718
|
+
float attn_factor,
|
1719
|
+
float beta_fast,
|
1720
|
+
float beta_slow);
|
1721
|
+
|
1499
1722
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
1500
1723
|
struct ggml_context * ctx,
|
1501
1724
|
struct ggml_tensor * a,
|
@@ -1653,6 +1876,41 @@ extern "C" {
|
|
1653
1876
|
int d0, // dilation dimension 0
|
1654
1877
|
int d1); // dilation dimension 1
|
1655
1878
|
|
1879
|
+
GGML_API struct ggml_tensor * ggml_im2col_3d(
|
1880
|
+
struct ggml_context * ctx,
|
1881
|
+
struct ggml_tensor * a,
|
1882
|
+
struct ggml_tensor * b,
|
1883
|
+
int64_t IC,
|
1884
|
+
int s0, // stride width
|
1885
|
+
int s1, // stride height
|
1886
|
+
int s2, // stride depth
|
1887
|
+
int p0, // padding width
|
1888
|
+
int p1, // padding height
|
1889
|
+
int p2, // padding depth
|
1890
|
+
int d0, // dilation width
|
1891
|
+
int d1, // dilation height
|
1892
|
+
int d2, // dilation depth
|
1893
|
+
enum ggml_type dst_type);
|
1894
|
+
|
1895
|
+
// a: [OC*IC, KD, KH, KW]
|
1896
|
+
// b: [N*IC, ID, IH, IW]
|
1897
|
+
// result: [N*OC, OD, OH, OW]
|
1898
|
+
GGML_API struct ggml_tensor * ggml_conv_3d(
|
1899
|
+
struct ggml_context * ctx,
|
1900
|
+
struct ggml_tensor * a,
|
1901
|
+
struct ggml_tensor * b,
|
1902
|
+
int64_t IC,
|
1903
|
+
int s0, // stride width
|
1904
|
+
int s1, // stride height
|
1905
|
+
int s2, // stride depth
|
1906
|
+
int p0, // padding width
|
1907
|
+
int p1, // padding height
|
1908
|
+
int p2, // padding depth
|
1909
|
+
int d0, // dilation width
|
1910
|
+
int d1, // dilation height
|
1911
|
+
int d2 // dilation depth
|
1912
|
+
);
|
1913
|
+
|
1656
1914
|
// kernel size is a->ne[0] x a->ne[1]
|
1657
1915
|
// stride is equal to kernel size
|
1658
1916
|
// padding is zero
|
@@ -1713,6 +1971,34 @@ extern "C" {
|
|
1713
1971
|
struct ggml_tensor * b,
|
1714
1972
|
int stride);
|
1715
1973
|
|
1974
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
1975
|
+
struct ggml_context * ctx,
|
1976
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
1977
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
1978
|
+
int s0, // stride dimension 0
|
1979
|
+
int s1, // stride dimension 1
|
1980
|
+
int p0, // padding dimension 0
|
1981
|
+
int p1, // padding dimension 1
|
1982
|
+
int d0, // dilation dimension 0
|
1983
|
+
int d1); // dilation dimension 1
|
1984
|
+
|
1985
|
+
GGML_API struct ggml_tensor * ggml_conv_3d_direct(
|
1986
|
+
struct ggml_context * ctx,
|
1987
|
+
struct ggml_tensor * a, // kernel [KW, KH, KD, IC * OC]
|
1988
|
+
struct ggml_tensor * b, // input [W, H, D, C * N]
|
1989
|
+
int s0, // stride
|
1990
|
+
int s1,
|
1991
|
+
int s2,
|
1992
|
+
int p0, // padding
|
1993
|
+
int p1,
|
1994
|
+
int p2,
|
1995
|
+
int d0, // dilation
|
1996
|
+
int d1,
|
1997
|
+
int d2,
|
1998
|
+
int n_channels,
|
1999
|
+
int n_batch,
|
2000
|
+
int n_channels_out);
|
2001
|
+
|
1716
2002
|
enum ggml_op_pool {
|
1717
2003
|
GGML_OP_POOL_MAX,
|
1718
2004
|
GGML_OP_POOL_AVG,
|
@@ -1755,6 +2041,12 @@ extern "C" {
|
|
1755
2041
|
enum ggml_scale_mode {
|
1756
2042
|
GGML_SCALE_MODE_NEAREST = 0,
|
1757
2043
|
GGML_SCALE_MODE_BILINEAR = 1,
|
2044
|
+
|
2045
|
+
GGML_SCALE_MODE_COUNT
|
2046
|
+
};
|
2047
|
+
|
2048
|
+
enum ggml_scale_flag {
|
2049
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
1758
2050
|
};
|
1759
2051
|
|
1760
2052
|
// interpolate
|
@@ -1767,14 +2059,26 @@ extern "C" {
|
|
1767
2059
|
|
1768
2060
|
// interpolate
|
1769
2061
|
// interpolate scale to specified dimensions
|
1770
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
2062
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1771
2063
|
struct ggml_context * ctx,
|
1772
2064
|
struct ggml_tensor * a,
|
1773
2065
|
int ne0,
|
1774
2066
|
int ne1,
|
1775
2067
|
int ne2,
|
1776
2068
|
int ne3,
|
1777
|
-
enum ggml_scale_mode mode)
|
2069
|
+
enum ggml_scale_mode mode),
|
2070
|
+
"use ggml_interpolate instead");
|
2071
|
+
|
2072
|
+
// Up- or downsamples the input to the specified size.
|
2073
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
2074
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
2075
|
+
struct ggml_context * ctx,
|
2076
|
+
struct ggml_tensor * a,
|
2077
|
+
int64_t ne0,
|
2078
|
+
int64_t ne1,
|
2079
|
+
int64_t ne2,
|
2080
|
+
int64_t ne3,
|
2081
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
1778
2082
|
|
1779
2083
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1780
2084
|
GGML_API struct ggml_tensor * ggml_pad(
|
@@ -1785,6 +2089,19 @@ extern "C" {
|
|
1785
2089
|
int p2,
|
1786
2090
|
int p3);
|
1787
2091
|
|
2092
|
+
GGML_API struct ggml_tensor * ggml_pad_ext(
|
2093
|
+
struct ggml_context * ctx,
|
2094
|
+
struct ggml_tensor * a,
|
2095
|
+
int lp0,
|
2096
|
+
int rp0,
|
2097
|
+
int lp1,
|
2098
|
+
int rp1,
|
2099
|
+
int lp2,
|
2100
|
+
int rp2,
|
2101
|
+
int lp3,
|
2102
|
+
int rp3
|
2103
|
+
);
|
2104
|
+
|
1788
2105
|
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
1789
2106
|
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
1790
2107
|
struct ggml_context * ctx,
|
@@ -1792,6 +2109,17 @@ extern "C" {
|
|
1792
2109
|
int p0,
|
1793
2110
|
int p1);
|
1794
2111
|
|
2112
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
2113
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
2114
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
2115
|
+
struct ggml_context * ctx,
|
2116
|
+
struct ggml_tensor * a,
|
2117
|
+
int shift0,
|
2118
|
+
int shift1,
|
2119
|
+
int shift2,
|
2120
|
+
int shift3);
|
2121
|
+
|
2122
|
+
|
1795
2123
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
1796
2124
|
// timesteps: [N,]
|
1797
2125
|
// return: [N, dim]
|
@@ -1826,11 +2154,17 @@ extern "C" {
|
|
1826
2154
|
|
1827
2155
|
#define GGML_KQ_MASK_PAD 64
|
1828
2156
|
|
1829
|
-
// q: [n_embd_k, n_batch, n_head,
|
1830
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
1831
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
1832
|
-
// mask: [n_kv, n_batch_pad,
|
1833
|
-
// res: [n_embd_v, n_head, n_batch,
|
2157
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
2158
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
2159
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
2160
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
2161
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
2162
|
+
//
|
2163
|
+
// broadcast:
|
2164
|
+
// n_head % n_head_kv == 0
|
2165
|
+
// n_head % ne32 == 0
|
2166
|
+
// ne3 % ne33 == 0
|
2167
|
+
//
|
1834
2168
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
1835
2169
|
struct ggml_context * ctx,
|
1836
2170
|
struct ggml_tensor * q,
|
@@ -1848,6 +2182,10 @@ extern "C" {
|
|
1848
2182
|
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
1849
2183
|
const struct ggml_tensor * a);
|
1850
2184
|
|
2185
|
+
GGML_API void ggml_flash_attn_ext_add_sinks(
|
2186
|
+
struct ggml_tensor * a,
|
2187
|
+
struct ggml_tensor * sinks);
|
2188
|
+
|
1851
2189
|
// TODO: needs to be adapted to ggml_flash_attn_ext
|
1852
2190
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1853
2191
|
struct ggml_context * ctx,
|
@@ -1869,7 +2207,8 @@ extern "C" {
|
|
1869
2207
|
struct ggml_tensor * dt,
|
1870
2208
|
struct ggml_tensor * A,
|
1871
2209
|
struct ggml_tensor * B,
|
1872
|
-
struct ggml_tensor * C
|
2210
|
+
struct ggml_tensor * C,
|
2211
|
+
struct ggml_tensor * ids);
|
1873
2212
|
|
1874
2213
|
// partition into non-overlapping windows with padding if needed
|
1875
2214
|
// example:
|
@@ -2052,7 +2391,14 @@ extern "C" {
|
|
2052
2391
|
struct ggml_tensor * grad,
|
2053
2392
|
struct ggml_tensor * m,
|
2054
2393
|
struct ggml_tensor * v,
|
2055
|
-
struct ggml_tensor * adamw_params); // parameters such
|
2394
|
+
struct ggml_tensor * adamw_params); // parameters such as the learning rate
|
2395
|
+
|
2396
|
+
// stochastic gradient descent step (with weight decay)
|
2397
|
+
GGML_API struct ggml_tensor * ggml_opt_step_sgd(
|
2398
|
+
struct ggml_context * ctx,
|
2399
|
+
struct ggml_tensor * a,
|
2400
|
+
struct ggml_tensor * grad,
|
2401
|
+
struct ggml_tensor * sgd_params); // alpha, weight decay
|
2056
2402
|
|
2057
2403
|
//
|
2058
2404
|
// automatic differentiation
|
@@ -2086,9 +2432,6 @@ extern "C" {
|
|
2086
2432
|
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
2087
2433
|
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
2088
2434
|
|
2089
|
-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
2090
|
-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
2091
|
-
|
2092
2435
|
// print info and performance information for the graph
|
2093
2436
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
2094
2437
|
|
@@ -2172,6 +2515,7 @@ extern "C" {
|
|
2172
2515
|
|
2173
2516
|
// scheduling priorities
|
2174
2517
|
enum ggml_sched_priority {
|
2518
|
+
GGML_SCHED_PRIO_LOW = -1,
|
2175
2519
|
GGML_SCHED_PRIO_NORMAL,
|
2176
2520
|
GGML_SCHED_PRIO_MEDIUM,
|
2177
2521
|
GGML_SCHED_PRIO_HIGH,
|