whispercpp 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +59 -27
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +154 -35
- data/ext/sources/examples/addon.node/index.js +10 -5
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +29 -18
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +7 -4
- data/ext/sources/examples/command/command.cpp +58 -32
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +21 -17
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +193 -35
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +10 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +756 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +85 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +773 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +126 -55
- data/ext/sources/examples/talk-llama/llama-chat.cpp +150 -13
- data/ext/sources/examples/talk-llama/llama-chat.h +8 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +814 -542
- data/ext/sources/examples/talk-llama/llama-context.h +68 -32
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +787 -440
- data/ext/sources/examples/talk-llama/llama-graph.h +333 -153
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +128 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +80 -17
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +137 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +1248 -1967
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +218 -345
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +164 -52
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +266 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +139 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1154 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +182 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +94 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +44 -17
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +11377 -5248
- data/ext/sources/examples/talk-llama/llama-model.h +87 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +137 -16
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +502 -38
- data/ext/sources/examples/talk-llama/llama-vocab.h +46 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -17
- data/ext/sources/examples/talk-llama/llama.h +176 -151
- data/ext/sources/examples/talk-llama/talk-llama.cpp +11 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +212 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +6 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +106 -33
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +18 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +365 -21
- data/ext/sources/ggml/src/CMakeLists.txt +98 -25
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +35 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +266 -60
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +4 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +15 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +149 -2
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +521 -78
- data/ext/sources/ggml/src/ggml-common.h +21 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +165 -50
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -3
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +3650 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1891 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2160 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1897 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +214 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +18 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +23 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +179 -110
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +44 -33
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +228 -98
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +532 -1124
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +3374 -2081
- data/ext/sources/ggml/src/ggml-cpu/ops.h +13 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1193 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +34 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1982 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +120 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +367 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +3 -3
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +272 -35
- data/ext/sources/ggml/src/ggml-cpu/vec.h +794 -142
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +291 -81
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +117 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +20 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +499 -368
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +142 -93
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +90 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +636 -222
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +73 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +4 -5
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -98
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +23 -19
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +179 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -2
- data/ext/sources/ggml/src/ggml-impl.h +229 -175
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +21 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +163 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3208 -1575
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +32 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4430 -792
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +117 -24
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +85 -62
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +13 -17
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +21 -2
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +116 -211
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +700 -1041
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +20 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +17 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +2 -96
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +393 -250
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -11
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +125 -21
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +4 -3
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +105 -17
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4198 -1145
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +64 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +338 -71
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +802 -142
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +32 -4
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +241 -215
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +57 -2
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +75 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/{tests → test}/test_params.rb +8 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +246 -191
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -6431
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -336
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -5998
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
data/ext/sources/src/whisper.cpp
CHANGED
@@ -206,15 +206,6 @@ static bool ggml_graph_compute_helper(
|
|
206
206
|
return t;
|
207
207
|
}
|
208
208
|
|
209
|
-
static void whisper_load_backends() {
|
210
|
-
#ifdef GGML_BACKEND_DL
|
211
|
-
static std::once_flag flag;
|
212
|
-
std::call_once(flag, []() {
|
213
|
-
ggml_backend_load_all();
|
214
|
-
});
|
215
|
-
#endif
|
216
|
-
}
|
217
|
-
|
218
209
|
// TODO: move these functions to ggml-base with support for ggml-backend?
|
219
210
|
|
220
211
|
static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) {
|
@@ -261,45 +252,6 @@ static void whisper_set_i32_nd(struct ggml_tensor * t, int64_t i0, int64_t i1, i
|
|
261
252
|
*(int32_t *) data = v;
|
262
253
|
}
|
263
254
|
|
264
|
-
// faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
|
265
|
-
// the idea is to represent the original matrix multiplication:
|
266
|
-
//
|
267
|
-
// Z = X @ Y
|
268
|
-
//
|
269
|
-
// with the sum of two matrix multiplications:
|
270
|
-
//
|
271
|
-
// Z = (X_0 @ Y_0) + (X_1 @ Y_1)
|
272
|
-
//
|
273
|
-
// here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
|
274
|
-
// and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
|
275
|
-
// general-purpose kernels
|
276
|
-
//
|
277
|
-
static struct ggml_tensor * ggml_mul_mat_pad(struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32) {
|
278
|
-
// use padding only if dimension 0 is at least 8 times larger than the padding
|
279
|
-
// else we won't get much benefit from the optimization
|
280
|
-
const int n_pad_req = 8;
|
281
|
-
|
282
|
-
if (x->ne[0] % pad == 0 || x->ne[0] / pad < n_pad_req) {
|
283
|
-
return ggml_mul_mat(ctx, x, y);
|
284
|
-
}
|
285
|
-
|
286
|
-
struct ggml_tensor * x_0 = ggml_view_3d(ctx, x, (x->ne[0]/pad)*pad, x->ne[1], x->ne[2], x->nb[1], x->nb[2], 0);
|
287
|
-
struct ggml_tensor * x_1 = ggml_view_3d(ctx, x, x->ne[0]%pad, x->ne[1], x->ne[2], x->nb[1], x->nb[2], x_0->ne[0]*x_0->nb[0]);
|
288
|
-
|
289
|
-
struct ggml_tensor * y_0 = ggml_view_3d(ctx, y, (y->ne[0]/pad)*pad, y->ne[1], y->ne[2], y->nb[1], y->nb[2], 0);
|
290
|
-
struct ggml_tensor * y_1 = ggml_view_3d(ctx, y, y->ne[0]%pad, y->ne[1], y->ne[2], y->nb[1], y->nb[2], y_0->ne[0]*y_0->nb[0]);
|
291
|
-
|
292
|
-
return ggml_add(ctx,
|
293
|
-
ggml_mul_mat(ctx, x_0, y_0),
|
294
|
-
ggml_mul_mat(ctx, x_1, y_1));
|
295
|
-
}
|
296
|
-
|
297
|
-
// TODO: check if other platforms can benefit from this optimization
|
298
|
-
// TODO: CUDA is currently broken - seems ggml_mul_mat does not handle views correctly
|
299
|
-
#if defined(GGML_USE_METAL)
|
300
|
-
#define ggml_mul_mat ggml_mul_mat_pad
|
301
|
-
#endif
|
302
|
-
|
303
255
|
// available whisper models
|
304
256
|
enum e_model {
|
305
257
|
MODEL_UNKNOWN,
|
@@ -868,6 +820,11 @@ struct whisper_aheads_masks {
|
|
868
820
|
ggml_backend_buffer_t buffer = nullptr;
|
869
821
|
};
|
870
822
|
|
823
|
+
struct vad_time_mapping {
|
824
|
+
int64_t processed_time; // Time in processed (VAD) audio
|
825
|
+
int64_t original_time; // Corresponding time in original audio
|
826
|
+
};
|
827
|
+
|
871
828
|
struct whisper_state {
|
872
829
|
int64_t t_sample_us = 0;
|
873
830
|
int64_t t_encode_us = 0;
|
@@ -957,13 +914,15 @@ struct whisper_state {
|
|
957
914
|
whisper_vad_context * vad_context = nullptr;
|
958
915
|
|
959
916
|
struct vad_segment_info {
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
917
|
+
int64_t orig_start;
|
918
|
+
int64_t orig_end;
|
919
|
+
int64_t vad_start;
|
920
|
+
int64_t vad_end;
|
964
921
|
};
|
965
922
|
std::vector<vad_segment_info> vad_segments;
|
966
923
|
bool has_vad_segments = false;
|
924
|
+
|
925
|
+
std::vector<vad_time_mapping> vad_mapping_table;
|
967
926
|
};
|
968
927
|
|
969
928
|
struct whisper_context {
|
@@ -1322,8 +1281,6 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
|
|
1322
1281
|
static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
|
1323
1282
|
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
|
1324
1283
|
|
1325
|
-
whisper_load_backends();
|
1326
|
-
|
1327
1284
|
ggml_backend_dev_t dev = nullptr;
|
1328
1285
|
|
1329
1286
|
int cnt = 0;
|
@@ -1331,7 +1288,7 @@ static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & pa
|
|
1331
1288
|
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
1332
1289
|
ggml_backend_dev_t dev_cur = ggml_backend_dev_get(i);
|
1333
1290
|
if (ggml_backend_dev_type(dev_cur) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
1334
|
-
if (cnt ==
|
1291
|
+
if (cnt == params.gpu_device) {
|
1335
1292
|
dev = dev_cur;
|
1336
1293
|
}
|
1337
1294
|
|
@@ -1400,7 +1357,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
|
|
1400
1357
|
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
1401
1358
|
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
1402
1359
|
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
1403
|
-
if (cnt ==
|
1360
|
+
if (cnt == params.gpu_device) {
|
1404
1361
|
auto * buft = ggml_backend_dev_buffer_type(dev);
|
1405
1362
|
if (buft) {
|
1406
1363
|
buft_list.emplace_back(dev, buft);
|
@@ -1442,7 +1399,8 @@ static bool weight_buft_supported(const whisper_hparams & hparams, ggml_tensor *
|
|
1442
1399
|
op_supported = true;
|
1443
1400
|
} else {
|
1444
1401
|
switch (op) {
|
1445
|
-
// The current extra_buffer_type implementations only support GGML_OP_MUL_MAT
|
1402
|
+
// The current extra_buffer_type implementations only support GGML_OP_MUL_MAT and GGML_OP_GET_ROWS
|
1403
|
+
case GGML_OP_GET_ROWS:
|
1446
1404
|
case GGML_OP_MUL_MAT: {
|
1447
1405
|
ggml_init_params params = {
|
1448
1406
|
/*.mem_size =*/ 2 * ggml_tensor_overhead(),
|
@@ -1458,9 +1416,15 @@ static bool weight_buft_supported(const whisper_hparams & hparams, ggml_tensor *
|
|
1458
1416
|
|
1459
1417
|
ggml_tensor * op_tensor = nullptr;
|
1460
1418
|
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1419
|
+
if (op == GGML_OP_MUL_MAT) {
|
1420
|
+
int64_t n_ctx = hparams.n_audio_ctx;
|
1421
|
+
ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, w->ne[0], n_ctx, w->ne[2], w->ne[3]);
|
1422
|
+
op_tensor = ggml_mul_mat(ctx, w, b);
|
1423
|
+
} else if (op == GGML_OP_GET_ROWS) {
|
1424
|
+
int64_t num_indices = 8;
|
1425
|
+
ggml_tensor * indices = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, num_indices);
|
1426
|
+
op_tensor = ggml_get_rows(ctx, w, indices);
|
1427
|
+
}
|
1464
1428
|
|
1465
1429
|
// create a temporary dummy buffer for the weight so that supports_op can check the buffer type
|
1466
1430
|
GGML_ASSERT(w->buffer == nullptr);
|
@@ -2429,6 +2393,8 @@ static bool whisper_encode_internal(
|
|
2429
2393
|
return false;
|
2430
2394
|
}
|
2431
2395
|
} else {
|
2396
|
+
ggml_backend_sched_reset(sched);
|
2397
|
+
|
2432
2398
|
#if defined(WHISPER_USE_COREML)
|
2433
2399
|
whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data);
|
2434
2400
|
#elif defined(WHISPER_USE_OPENVINO)
|
@@ -3626,7 +3592,7 @@ int whisper_ctx_init_openvino_encoder(
|
|
3626
3592
|
struct whisper_context_params whisper_context_default_params() {
|
3627
3593
|
struct whisper_context_params result = {
|
3628
3594
|
/*.use_gpu =*/ true,
|
3629
|
-
/*.flash_attn =*/
|
3595
|
+
/*.flash_attn =*/ true,
|
3630
3596
|
/*.gpu_device =*/ 0,
|
3631
3597
|
|
3632
3598
|
/*.dtw_token_timestamps =*/ false,
|
@@ -4335,8 +4301,6 @@ static int whisper_has_openvino(void) {
|
|
4335
4301
|
const char * whisper_print_system_info(void) {
|
4336
4302
|
static std::string s;
|
4337
4303
|
|
4338
|
-
whisper_load_backends();
|
4339
|
-
|
4340
4304
|
s = "";
|
4341
4305
|
s += "WHISPER : ";
|
4342
4306
|
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
|
@@ -4420,8 +4384,8 @@ struct whisper_vad_model {
|
|
4420
4384
|
};
|
4421
4385
|
|
4422
4386
|
struct whisper_vad_segment {
|
4423
|
-
|
4424
|
-
|
4387
|
+
int64_t start;
|
4388
|
+
int64_t end;
|
4425
4389
|
};
|
4426
4390
|
|
4427
4391
|
struct whisper_vad_segments {
|
@@ -4469,6 +4433,15 @@ struct whisper_vad_params whisper_vad_default_params(void) {
|
|
4469
4433
|
return result;
|
4470
4434
|
}
|
4471
4435
|
|
4436
|
+
// Time conversion utility functions for whisper VAD
|
4437
|
+
static int cs_to_samples(int64_t cs) {
|
4438
|
+
return (int)((cs / 100.0) * WHISPER_SAMPLE_RATE + 0.5);
|
4439
|
+
}
|
4440
|
+
|
4441
|
+
static int64_t samples_to_cs(int samples) {
|
4442
|
+
return (int64_t)((samples / (double)WHISPER_SAMPLE_RATE) * 100.0 + 0.5);
|
4443
|
+
}
|
4444
|
+
|
4472
4445
|
static bool weight_buft_supported(const whisper_vad_hparams & hparams, ggml_tensor * w, ggml_op op, ggml_backend_buffer_type_t buft, ggml_backend_dev_t dev) {
|
4473
4446
|
bool op_supported = true;
|
4474
4447
|
|
@@ -4703,6 +4676,7 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
|
|
4703
4676
|
ggml_set_name(vctx->c_state, "c_state");
|
4704
4677
|
|
4705
4678
|
vctx->buffer = ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]);
|
4679
|
+
ggml_free(ctx);
|
4706
4680
|
if (!vctx->buffer) {
|
4707
4681
|
WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__);
|
4708
4682
|
return false;
|
@@ -5413,12 +5387,12 @@ struct whisper_vad_segments * whisper_vad_segments_from_probs(
|
|
5413
5387
|
(speeches[i].end + speech_pad_samples) : audio_length_samples;
|
5414
5388
|
}
|
5415
5389
|
|
5416
|
-
// Convert from samples to
|
5417
|
-
segments[i].start = (
|
5418
|
-
segments[i].end = (
|
5390
|
+
// Convert from samples to centiseconds
|
5391
|
+
segments[i].start = samples_to_cs(speeches[i].start);
|
5392
|
+
segments[i].end = samples_to_cs(speeches[i].end);
|
5419
5393
|
|
5420
5394
|
WHISPER_LOG_INFO("%s: VAD segment %d: start = %.2f, end = %.2f (duration: %.2f)\n",
|
5421
|
-
__func__, i, segments[i].start, segments[i].end, segments[i].end - segments[i].start);
|
5395
|
+
__func__, i, segments[i].start/100.0, segments[i].end/100.0, (segments[i].end - segments[i].start)/100.0);
|
5422
5396
|
}
|
5423
5397
|
|
5424
5398
|
whisper_vad_segments * vad_segments = new whisper_vad_segments;
|
@@ -5447,6 +5421,9 @@ struct whisper_vad_segments * whisper_vad_segments_from_samples(
|
|
5447
5421
|
|
5448
5422
|
void whisper_vad_free(whisper_vad_context * ctx) {
|
5449
5423
|
if (ctx) {
|
5424
|
+
if (ctx->buffer) {
|
5425
|
+
ggml_backend_buffer_free(ctx->buffer);
|
5426
|
+
}
|
5450
5427
|
for (ggml_context * context : ctx->model.ctxs) {
|
5451
5428
|
ggml_free(context);
|
5452
5429
|
}
|
@@ -5461,6 +5438,9 @@ void whisper_vad_free(whisper_vad_context * ctx) {
|
|
5461
5438
|
ggml_backend_free(backend);
|
5462
5439
|
}
|
5463
5440
|
|
5441
|
+
delete[] ctx->model.hparams.encoder_in_channels;
|
5442
|
+
delete[] ctx->model.hparams.encoder_out_channels;
|
5443
|
+
delete[] ctx->model.hparams.kernel_sizes;
|
5464
5444
|
|
5465
5445
|
delete ctx;
|
5466
5446
|
}
|
@@ -6615,10 +6595,13 @@ static bool whisper_vad(
|
|
6615
6595
|
struct whisper_full_params params,
|
6616
6596
|
const float * samples,
|
6617
6597
|
int n_samples,
|
6618
|
-
std::vector<float> & filtered_samples
|
6619
|
-
|
6620
|
-
|
6621
|
-
|
6598
|
+
std::vector<float> & filtered_samples) {
|
6599
|
+
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
|
6600
|
+
int filtered_n_samples = 0;
|
6601
|
+
|
6602
|
+
// Clear any existing mapping table
|
6603
|
+
state->vad_mapping_table.clear();
|
6604
|
+
state->has_vad_segments = false;
|
6622
6605
|
|
6623
6606
|
if (state->vad_context == nullptr) {
|
6624
6607
|
struct whisper_vad_context_params vad_ctx_params = whisper_vad_default_context_params();
|
@@ -6640,13 +6623,17 @@ static bool whisper_vad(
|
|
6640
6623
|
ctx->state->vad_segments.clear();
|
6641
6624
|
ctx->state->vad_segments.reserve(vad_segments->data.size());
|
6642
6625
|
|
6626
|
+
// Initialize the time mapping table
|
6627
|
+
state->vad_mapping_table.clear();
|
6628
|
+
state->vad_mapping_table.reserve(vad_segments->data.size() * 4);
|
6629
|
+
|
6643
6630
|
WHISPER_LOG_INFO("%s: detected %d speech segments\n", __func__, (int)vad_segments->data.size());
|
6644
6631
|
float overlap_seconds = vad_params.samples_overlap;
|
6645
6632
|
int overlap_samples = overlap_seconds * WHISPER_SAMPLE_RATE;
|
6646
6633
|
|
6647
6634
|
for (int i = 0; i < (int)vad_segments->data.size(); i++) {
|
6648
|
-
int segment_start_samples = vad_segments->data[i].start
|
6649
|
-
int segment_end_samples = vad_segments->data[i].end
|
6635
|
+
int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
|
6636
|
+
int segment_end_samples = cs_to_samples(vad_segments->data[i].end);
|
6650
6637
|
|
6651
6638
|
if (i < (int)vad_segments->data.size() - 1) {
|
6652
6639
|
segment_end_samples += overlap_samples;
|
@@ -6655,9 +6642,9 @@ static bool whisper_vad(
|
|
6655
6642
|
filtered_n_samples += (segment_end_samples - segment_start_samples);
|
6656
6643
|
|
6657
6644
|
WHISPER_LOG_INFO("%s: Including segment %d: %.2f - %.2f (duration: %.2f)\n",
|
6658
|
-
__func__, i, vad_segments->data[i].start,
|
6659
|
-
vad_segments->data[i].end + (i < (int)vad_segments->data.size() - 1 ? overlap_seconds : 0),
|
6660
|
-
(vad_segments->data[i].end - vad_segments->data[i].start) +
|
6645
|
+
__func__, i, vad_segments->data[i].start/100.0,
|
6646
|
+
(vad_segments->data[i].end/100.0 + (i < (int)vad_segments->data.size() - 1 ? overlap_seconds : 0)),
|
6647
|
+
(vad_segments->data[i].end - vad_segments->data[i].start)/100.0 +
|
6661
6648
|
(i < (int)vad_segments->data.size() - 1 ? overlap_seconds : 0));
|
6662
6649
|
}
|
6663
6650
|
|
@@ -6679,8 +6666,8 @@ static bool whisper_vad(
|
|
6679
6666
|
|
6680
6667
|
int offset = 0;
|
6681
6668
|
for (int i = 0; i < (int)vad_segments->data.size(); i++) {
|
6682
|
-
int segment_start_samples = vad_segments->data[i].start
|
6683
|
-
int segment_end_samples = vad_segments->data[i].end
|
6669
|
+
int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
|
6670
|
+
int segment_end_samples = cs_to_samples(vad_segments->data[i].end);
|
6684
6671
|
|
6685
6672
|
if (i < (int)vad_segments->data.size() - 1) {
|
6686
6673
|
segment_end_samples += overlap_samples;
|
@@ -6689,18 +6676,47 @@ static bool whisper_vad(
|
|
6689
6676
|
segment_start_samples = std::min(segment_start_samples, n_samples - 1);
|
6690
6677
|
segment_end_samples = std::min(segment_end_samples, n_samples);
|
6691
6678
|
int segment_length = segment_end_samples - segment_start_samples;
|
6692
|
-
|
6693
6679
|
if (segment_length > 0) {
|
6694
6680
|
whisper_state::vad_segment_info segment;
|
6695
6681
|
|
6696
6682
|
segment.orig_start = vad_segments->data[i].start;
|
6697
6683
|
segment.orig_end = vad_segments->data[i].end;
|
6698
6684
|
|
6699
|
-
segment.vad_start = offset
|
6700
|
-
segment.vad_end = (offset + segment_length)
|
6685
|
+
segment.vad_start = samples_to_cs(offset);
|
6686
|
+
segment.vad_end = samples_to_cs(offset + segment_length);
|
6687
|
+
|
6688
|
+
// Add segment boundaries to mapping table
|
6689
|
+
vad_time_mapping start_mapping = {segment.vad_start, segment.orig_start};
|
6690
|
+
vad_time_mapping end_mapping = {segment.vad_end, segment.orig_end};
|
6691
|
+
|
6692
|
+
state->vad_mapping_table.push_back(start_mapping);
|
6693
|
+
state->vad_mapping_table.push_back(end_mapping);
|
6694
|
+
|
6695
|
+
// Add intermediate points for longer segments to improve interpolation accuracy
|
6696
|
+
const int64_t min_segment_length = 100; // 1 second
|
6697
|
+
const int64_t point_interval = 20; // Add a point every 200ms
|
6698
|
+
|
6699
|
+
if (segment.vad_end - segment.vad_start > min_segment_length) {
|
6700
|
+
int64_t segment_duration = segment.vad_end - segment.vad_start;
|
6701
|
+
int num_points = (int)(segment_duration / point_interval) - 1;
|
6702
|
+
|
6703
|
+
for (int j = 1; j <= num_points; j++) {
|
6704
|
+
int64_t vad_time = segment.vad_start + j * point_interval;
|
6705
|
+
|
6706
|
+
if (vad_time >= segment.vad_end) continue;
|
6707
|
+
|
6708
|
+
int64_t vad_elapsed = vad_time - segment.vad_start;
|
6709
|
+
int64_t vad_total = segment.vad_end - segment.vad_start;
|
6710
|
+
int64_t orig_total = segment.orig_end - segment.orig_start;
|
6711
|
+
int64_t orig_time = segment.orig_start + (vad_elapsed * orig_total) / vad_total;
|
6712
|
+
|
6713
|
+
vad_time_mapping intermediate_mapping = {vad_time, orig_time};
|
6714
|
+
state->vad_mapping_table.push_back(intermediate_mapping);
|
6715
|
+
}
|
6716
|
+
}
|
6701
6717
|
|
6702
6718
|
WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
|
6703
|
-
__func__, segment.orig_start, segment.orig_end, segment.vad_start, segment.vad_end);
|
6719
|
+
__func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
|
6704
6720
|
ctx->state->vad_segments.push_back(segment);
|
6705
6721
|
|
6706
6722
|
// Copy this speech segment
|
@@ -6709,6 +6725,17 @@ static bool whisper_vad(
|
|
6709
6725
|
|
6710
6726
|
// Add silence after this segment (except after the last segment)
|
6711
6727
|
if (i < (int)vad_segments->data.size() - 1) {
|
6728
|
+
// Calculate the start and end time of the silence gap in processed audio
|
6729
|
+
int64_t silence_start_vad = samples_to_cs(offset);
|
6730
|
+
int64_t silence_end_vad = samples_to_cs(offset + silence_samples);
|
6731
|
+
// Calculate the corresponding original times
|
6732
|
+
int64_t orig_silence_start = segment.orig_end;
|
6733
|
+
int64_t orig_silence_end = vad_segments->data[i+1].start;
|
6734
|
+
|
6735
|
+
// Add mapping points for silence boundaries
|
6736
|
+
state->vad_mapping_table.push_back({silence_start_vad, orig_silence_start});
|
6737
|
+
state->vad_mapping_table.push_back({silence_end_vad, orig_silence_end});
|
6738
|
+
|
6712
6739
|
// Fill with zeros (silence)
|
6713
6740
|
memset(filtered_samples.data() + offset, 0, silence_samples * sizeof(float));
|
6714
6741
|
offset += silence_samples;
|
@@ -6716,6 +6743,24 @@ static bool whisper_vad(
|
|
6716
6743
|
}
|
6717
6744
|
}
|
6718
6745
|
|
6746
|
+
// Sort the mapping table by processed time
|
6747
|
+
std::sort(state->vad_mapping_table.begin(), state->vad_mapping_table.end(),
|
6748
|
+
[](const vad_time_mapping& a, const vad_time_mapping& b) {
|
6749
|
+
return a.processed_time < b.processed_time;
|
6750
|
+
});
|
6751
|
+
|
6752
|
+
// Remove any duplicate processed times to ensure monotonicity which is
|
6753
|
+
// needed for binary search and interpolation later.
|
6754
|
+
if (!state->vad_mapping_table.empty()) {
|
6755
|
+
auto last = std::unique(state->vad_mapping_table.begin(), state->vad_mapping_table.end(),
|
6756
|
+
[](const vad_time_mapping& a, const vad_time_mapping& b) {
|
6757
|
+
return a.processed_time == b.processed_time;
|
6758
|
+
});
|
6759
|
+
state->vad_mapping_table.erase(last, state->vad_mapping_table.end());
|
6760
|
+
}
|
6761
|
+
|
6762
|
+
WHISPER_LOG_INFO("%s: Created time mapping table with %d points\n", __func__, (int)state->vad_mapping_table.size());
|
6763
|
+
|
6719
6764
|
filtered_n_samples = offset;
|
6720
6765
|
WHISPER_LOG_INFO("%s: Reduced audio from %d to %d samples (%.1f%% reduction)\n",
|
6721
6766
|
__func__, n_samples, filtered_n_samples, 100.0f * (1.0f - (float)filtered_n_samples / n_samples));
|
@@ -6735,27 +6780,9 @@ int whisper_full_with_state(
|
|
6735
6780
|
|
6736
6781
|
result_all.clear();
|
6737
6782
|
|
6738
|
-
|
6739
|
-
int n_process_samples = n_samples;
|
6740
|
-
std::vector<float> vad_samples;
|
6741
|
-
|
6742
|
-
if (params.vad) {
|
6743
|
-
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
|
6744
|
-
int vad_n_samples;
|
6745
|
-
if (!whisper_vad(ctx, state, params, samples, n_samples, vad_samples, vad_n_samples)) {
|
6746
|
-
WHISPER_LOG_ERROR("%s: failed to compute VAD\n", __func__);
|
6747
|
-
return -1;
|
6748
|
-
}
|
6749
|
-
if (vad_n_samples == 0) {
|
6750
|
-
return 0;
|
6751
|
-
}
|
6752
|
-
process_samples = vad_samples.data();
|
6753
|
-
n_process_samples = vad_n_samples;
|
6754
|
-
}
|
6755
|
-
|
6756
|
-
if (n_process_samples > 0) {
|
6783
|
+
if (n_samples > 0) {
|
6757
6784
|
// compute log mel spectrogram
|
6758
|
-
if (whisper_pcm_to_mel_with_state(ctx, state,
|
6785
|
+
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
|
6759
6786
|
WHISPER_LOG_ERROR("%s: failed to compute log mel spectrogram\n", __func__);
|
6760
6787
|
return -2;
|
6761
6788
|
}
|
@@ -7665,6 +7692,21 @@ int whisper_full(
|
|
7665
7692
|
struct whisper_full_params params,
|
7666
7693
|
const float * samples,
|
7667
7694
|
int n_samples) {
|
7695
|
+
|
7696
|
+
std::vector<float> vad_samples;
|
7697
|
+
if (params.vad) {
|
7698
|
+
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
|
7699
|
+
if (!whisper_vad(ctx, ctx->state, params, samples, n_samples, vad_samples)) {
|
7700
|
+
WHISPER_LOG_ERROR("%s: failed to compute VAD\n", __func__);
|
7701
|
+
return -1;
|
7702
|
+
}
|
7703
|
+
if (vad_samples.empty()) {
|
7704
|
+
ctx->state->result_all.clear();
|
7705
|
+
return 0;
|
7706
|
+
}
|
7707
|
+
samples = vad_samples.data();
|
7708
|
+
n_samples = vad_samples.size();
|
7709
|
+
}
|
7668
7710
|
return whisper_full_with_state(ctx, ctx->state, params, samples, n_samples);
|
7669
7711
|
}
|
7670
7712
|
|
@@ -7674,9 +7716,24 @@ int whisper_full_parallel(
|
|
7674
7716
|
const float * samples,
|
7675
7717
|
int n_samples,
|
7676
7718
|
int n_processors) {
|
7719
|
+
|
7677
7720
|
if (n_processors == 1) {
|
7678
7721
|
return whisper_full(ctx, params, samples, n_samples);
|
7679
7722
|
}
|
7723
|
+
|
7724
|
+
std::vector<float> vad_samples;
|
7725
|
+
if (params.vad) {
|
7726
|
+
WHISPER_LOG_INFO("%s: VAD is enabled, processing speech segments only\n", __func__);
|
7727
|
+
if (!whisper_vad(ctx, ctx->state, params, samples, n_samples, vad_samples)) {
|
7728
|
+
WHISPER_LOG_ERROR("%s: failed to compute VAD\n", __func__);
|
7729
|
+
return -1;
|
7730
|
+
}
|
7731
|
+
if (vad_samples.empty()) {
|
7732
|
+
return 0;
|
7733
|
+
}
|
7734
|
+
samples = vad_samples.data();
|
7735
|
+
n_samples = vad_samples.size();
|
7736
|
+
}
|
7680
7737
|
int ret = 0;
|
7681
7738
|
|
7682
7739
|
// prepare separate states for each thread
|
@@ -7799,130 +7856,89 @@ int whisper_full_lang_id(struct whisper_context * ctx) {
|
|
7799
7856
|
return ctx->state->lang_id;
|
7800
7857
|
}
|
7801
7858
|
|
7802
|
-
int64_t
|
7803
|
-
|
7804
|
-
|
7805
|
-
return state->result_all[i_segment].t0;
|
7859
|
+
static int64_t map_processed_to_original_time(int64_t processed_time, const std::vector<vad_time_mapping> & mapping_table) {
|
7860
|
+
if (mapping_table.empty()) {
|
7861
|
+
return processed_time;
|
7806
7862
|
}
|
7807
7863
|
|
7808
|
-
|
7809
|
-
|
7810
|
-
|
7811
|
-
float t0 = state->result_all[i_segment].t0 / 100.0f;
|
7864
|
+
if (processed_time <= mapping_table.front().processed_time) {
|
7865
|
+
return mapping_table.front().original_time; // Before first mapping point
|
7866
|
+
}
|
7812
7867
|
|
7813
|
-
|
7814
|
-
|
7815
|
-
|
7816
|
-
// the access pattern is sequential and optimized for that too.
|
7817
|
-
for (size_t i = 0; i < state->vad_segments.size(); i++) {
|
7818
|
-
const auto & segment = state->vad_segments[i];
|
7868
|
+
if (processed_time >= mapping_table.back().processed_time) {
|
7869
|
+
return mapping_table.back().original_time; // After last mapping point
|
7870
|
+
}
|
7819
7871
|
|
7820
|
-
|
7821
|
-
|
7822
|
-
|
7823
|
-
|
7824
|
-
|
7825
|
-
}
|
7826
|
-
float orig_t0 = segment.orig_start + proportion * (segment.orig_end - segment.orig_start);
|
7827
|
-
return (int64_t)(orig_t0 * 100);
|
7872
|
+
// Binary search over the time map that finds the first entry that has a
|
7873
|
+
// processed time greater than or equal to the current processed time.
|
7874
|
+
auto upper = std::lower_bound(mapping_table.begin(), mapping_table.end(), processed_time,
|
7875
|
+
[](const vad_time_mapping & entry, int64_t time) {
|
7876
|
+
return entry.processed_time < time;
|
7828
7877
|
}
|
7878
|
+
);
|
7879
|
+
|
7880
|
+
// If exact match found
|
7881
|
+
if (upper->processed_time == processed_time) {
|
7882
|
+
return upper->original_time;
|
7829
7883
|
}
|
7830
7884
|
|
7831
|
-
//
|
7832
|
-
|
7833
|
-
const auto & curr = state->vad_segments[i];
|
7834
|
-
const auto & next = state->vad_segments[i + 1];
|
7885
|
+
// Need to interpolate between two points
|
7886
|
+
auto lower = upper - 1;
|
7835
7887
|
|
7836
|
-
|
7837
|
-
|
7838
|
-
|
7839
|
-
if (next.vad_start > curr.vad_end) {
|
7840
|
-
gap_proportion = (t0 - curr.vad_end) / (next.vad_start - curr.vad_end);
|
7841
|
-
}
|
7842
|
-
float orig_t0 = curr.orig_end + gap_proportion * (next.orig_start - curr.orig_end);
|
7843
|
-
return (int64_t)(orig_t0 * 100);
|
7844
|
-
}
|
7845
|
-
}
|
7888
|
+
int64_t processed_diff = upper->processed_time - lower->processed_time;
|
7889
|
+
int64_t original_diff = upper->original_time - lower->original_time;
|
7890
|
+
int64_t offset = processed_time - lower->processed_time;
|
7846
7891
|
|
7847
|
-
|
7848
|
-
|
7849
|
-
// For timestamps after the last segment, add the extra time to the end of the last segment
|
7850
|
-
const auto& last = state->vad_segments.back();
|
7851
|
-
// Calculate how far beyond the last segment
|
7852
|
-
float extra_time = t0 - last.vad_end;
|
7853
|
-
// Add this extra time to the original end time
|
7854
|
-
float orig_t0 = last.orig_end + extra_time;
|
7855
|
-
return (int64_t)(orig_t0 * 100);
|
7892
|
+
if (processed_diff == 0) {
|
7893
|
+
return lower->original_time;
|
7856
7894
|
}
|
7857
7895
|
|
7858
|
-
|
7859
|
-
return
|
7896
|
+
// Perform linear interpolation
|
7897
|
+
return lower->original_time + (offset * original_diff) / processed_diff;
|
7860
7898
|
}
|
7861
7899
|
|
7862
|
-
|
7863
|
-
|
7900
|
+
// Function to get the starting timestamp of a segment
|
7901
|
+
int64_t whisper_full_get_segment_t0_from_state(struct whisper_state * state, int i_segment) {
|
7902
|
+
// If VAD wasn't used, return the original timestamp
|
7903
|
+
if (!state->has_vad_segments || state->vad_mapping_table.empty()) {
|
7904
|
+
return state->result_all[i_segment].t0;
|
7905
|
+
}
|
7906
|
+
|
7907
|
+
// Get the processed timestamp
|
7908
|
+
int64_t t0 = state->result_all[i_segment].t0;
|
7909
|
+
|
7910
|
+
// Map to original time using the mapping table
|
7911
|
+
return map_processed_to_original_time(t0, state->vad_mapping_table);
|
7864
7912
|
}
|
7865
7913
|
|
7914
|
+
// Function to get the ending timestamp of a segment
|
7866
7915
|
int64_t whisper_full_get_segment_t1_from_state(struct whisper_state * state, int i_segment) {
|
7867
7916
|
// If VAD wasn't used, return the original timestamp
|
7868
|
-
if (!state->has_vad_segments || state->
|
7917
|
+
if (!state->has_vad_segments || state->vad_mapping_table.empty()) {
|
7869
7918
|
return state->result_all[i_segment].t1;
|
7870
7919
|
}
|
7871
7920
|
|
7872
|
-
// Get the
|
7873
|
-
|
7874
|
-
// back to the original audio.
|
7875
|
-
float t1 = state->result_all[i_segment].t1 / 100.0f;
|
7876
|
-
|
7877
|
-
// Find which VAD segment this timestamp belongs.
|
7878
|
-
// TODO(danbev) This could be optimized by using a binary search if the number
|
7879
|
-
// of segments exceed a certain limit. Also we might be able to assume that
|
7880
|
-
// the access pattern is sequential and optimized for that too.
|
7881
|
-
for (size_t i = 0; i < state->vad_segments.size(); i++) {
|
7882
|
-
const auto& segment = state->vad_segments[i];
|
7883
|
-
|
7884
|
-
// Check if the timestamp falls within this segment.
|
7885
|
-
if (t1 >= segment.vad_start && t1 <= segment.vad_end) {
|
7886
|
-
// Calculate the proportion through the filtered segment.
|
7887
|
-
float proportion = 0.0f;
|
7888
|
-
if (segment.vad_end > segment.vad_start) {
|
7889
|
-
proportion = (t1 - segment.vad_start) / (segment.vad_end - segment.vad_start);
|
7890
|
-
}
|
7891
|
-
float orig_t1 = segment.orig_start + proportion * (segment.orig_end - segment.orig_start);
|
7892
|
-
return (int64_t)(orig_t1 * 100);
|
7893
|
-
}
|
7894
|
-
}
|
7921
|
+
// Get the processed timestamp
|
7922
|
+
int64_t t1 = state->result_all[i_segment].t1;
|
7895
7923
|
|
7896
|
-
//
|
7897
|
-
|
7898
|
-
const auto & curr = state->vad_segments[i];
|
7899
|
-
const auto & next = state->vad_segments[i + 1];
|
7924
|
+
// Map to original time using the mapping table
|
7925
|
+
int64_t orig_t1 = map_processed_to_original_time(t1, state->vad_mapping_table);
|
7900
7926
|
|
7901
|
-
|
7902
|
-
|
7903
|
-
float gap_proportion = 0.0f;
|
7904
|
-
if (next.vad_start > curr.vad_end) {
|
7905
|
-
gap_proportion = (t1 - curr.vad_end) / (next.vad_start - curr.vad_end);
|
7906
|
-
}
|
7907
|
-
// Map to the corresponding position in the original gap
|
7908
|
-
float orig_t1 = curr.orig_end + gap_proportion * (next.orig_start - curr.orig_end);
|
7909
|
-
return (int64_t)(orig_t1 * 100);
|
7910
|
-
}
|
7911
|
-
}
|
7927
|
+
// Get the corresponding t0 for this segment
|
7928
|
+
int64_t orig_t0 = whisper_full_get_segment_t0_from_state(state, i_segment);
|
7912
7929
|
|
7913
|
-
//
|
7914
|
-
|
7915
|
-
|
7916
|
-
|
7917
|
-
// Calculate how far beyond the last segment
|
7918
|
-
float extra_time = t1 - last.vad_end;
|
7919
|
-
// Add this extra time to the original end time
|
7920
|
-
float orig_t1 = last.orig_end + extra_time;
|
7921
|
-
return (int64_t)(orig_t1 * 100);
|
7930
|
+
// Ensure minimum duration to prevent zero-length segments
|
7931
|
+
const int64_t min_duration = 10; // 10ms minimum
|
7932
|
+
if (orig_t1 - orig_t0 < min_duration) {
|
7933
|
+
orig_t1 = orig_t0 + min_duration;
|
7922
7934
|
}
|
7923
7935
|
|
7924
|
-
|
7925
|
-
|
7936
|
+
return orig_t1;
|
7937
|
+
}
|
7938
|
+
|
7939
|
+
|
7940
|
+
int64_t whisper_full_get_segment_t0(struct whisper_context * ctx, int i_segment) {
|
7941
|
+
return whisper_full_get_segment_t0_from_state(ctx->state, i_segment);
|
7926
7942
|
}
|
7927
7943
|
|
7928
7944
|
int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment) {
|
@@ -8154,8 +8170,6 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
|
|
8154
8170
|
}
|
8155
8171
|
|
8156
8172
|
WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
8157
|
-
whisper_load_backends();
|
8158
|
-
|
8159
8173
|
static std::string s;
|
8160
8174
|
s = "";
|
8161
8175
|
char strbuf[256];
|
@@ -8289,10 +8303,6 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
|
8289
8303
|
// token-level timestamps
|
8290
8304
|
//
|
8291
8305
|
|
8292
|
-
static int timestamp_to_sample(int64_t t, int n_samples) {
|
8293
|
-
return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100)));
|
8294
|
-
}
|
8295
|
-
|
8296
8306
|
static int64_t sample_to_timestamp(int i_sample) {
|
8297
8307
|
return (100ll*i_sample)/WHISPER_SAMPLE_RATE;
|
8298
8308
|
}
|
@@ -8342,6 +8352,18 @@ static std::vector<float> get_signal_energy(const float * signal, int n_samples,
|
|
8342
8352
|
return result;
|
8343
8353
|
}
|
8344
8354
|
|
8355
|
+
static int timestamp_to_sample(int64_t t, int64_t segment_t0, int n_samples) {
|
8356
|
+
// Convert absolute timestamp to segment-relative timestamp
|
8357
|
+
int64_t relative_t = t - segment_t0;
|
8358
|
+
int sample = (int)((relative_t * WHISPER_SAMPLE_RATE) / 100);
|
8359
|
+
return std::max(0, std::min(n_samples - 1, sample));
|
8360
|
+
}
|
8361
|
+
|
8362
|
+
static int64_t sample_to_timestamp(int i_sample, int64_t segment_t0) {
|
8363
|
+
int64_t relative_timestamp = (100ll * i_sample) / WHISPER_SAMPLE_RATE;
|
8364
|
+
return relative_timestamp + segment_t0;
|
8365
|
+
}
|
8366
|
+
|
8345
8367
|
static void whisper_exp_compute_token_level_timestamps(
|
8346
8368
|
struct whisper_context & ctx,
|
8347
8369
|
struct whisper_state & state,
|
@@ -8482,8 +8504,8 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
8482
8504
|
continue;
|
8483
8505
|
}
|
8484
8506
|
|
8485
|
-
int s0 = timestamp_to_sample(tokens[j].t0, n_samples);
|
8486
|
-
int s1 = timestamp_to_sample(tokens[j].t1, n_samples);
|
8507
|
+
int s0 = timestamp_to_sample(tokens[j].t0, segment.t0, n_samples);
|
8508
|
+
int s1 = timestamp_to_sample(tokens[j].t1, segment.t0, n_samples);
|
8487
8509
|
|
8488
8510
|
const int ss0 = std::max(s0 - hw, 0);
|
8489
8511
|
const int ss1 = std::min(s1 + hw, n_samples);
|
@@ -8504,7 +8526,7 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
8504
8526
|
while (k > 0 && state.energy[k] > thold) {
|
8505
8527
|
k--;
|
8506
8528
|
}
|
8507
|
-
tokens[j].t0 = sample_to_timestamp(k);
|
8529
|
+
tokens[j].t0 = sample_to_timestamp(k, segment.t0);
|
8508
8530
|
if (tokens[j].t0 < tokens[j - 1].t1) {
|
8509
8531
|
tokens[j].t0 = tokens[j - 1].t1;
|
8510
8532
|
} else {
|
@@ -8515,7 +8537,7 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
8515
8537
|
k++;
|
8516
8538
|
}
|
8517
8539
|
s0 = k;
|
8518
|
-
tokens[j].t0 = sample_to_timestamp(k);
|
8540
|
+
tokens[j].t0 = sample_to_timestamp(k, segment.t0);
|
8519
8541
|
}
|
8520
8542
|
}
|
8521
8543
|
|
@@ -8525,7 +8547,7 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
8525
8547
|
while (k < n_samples - 1 && state.energy[k] > thold) {
|
8526
8548
|
k++;
|
8527
8549
|
}
|
8528
|
-
tokens[j].t1 = sample_to_timestamp(k);
|
8550
|
+
tokens[j].t1 = sample_to_timestamp(k, segment.t0);
|
8529
8551
|
if (j < n - 1 && tokens[j].t1 > tokens[j + 1].t0) {
|
8530
8552
|
tokens[j].t1 = tokens[j + 1].t0;
|
8531
8553
|
} else {
|
@@ -8536,7 +8558,7 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
8536
8558
|
k--;
|
8537
8559
|
}
|
8538
8560
|
s1 = k;
|
8539
|
-
tokens[j].t1 = sample_to_timestamp(k);
|
8561
|
+
tokens[j].t1 = sample_to_timestamp(k, segment.t0);
|
8540
8562
|
}
|
8541
8563
|
}
|
8542
8564
|
}
|
@@ -8893,6 +8915,10 @@ void whisper_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
8893
8915
|
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
|
8894
8916
|
}
|
8895
8917
|
|
8918
|
+
const char * whisper_version(void) {
|
8919
|
+
return WHISPER_VERSION;
|
8920
|
+
}
|
8921
|
+
|
8896
8922
|
GGML_ATTRIBUTE_FORMAT(2, 3)
|
8897
8923
|
static void whisper_log_internal(ggml_log_level level, const char * format, ...) {
|
8898
8924
|
va_list args;
|