whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
data/ext/sources/src/whisper.cpp
CHANGED
|
@@ -21,14 +21,12 @@
|
|
|
21
21
|
#define _USE_MATH_DEFINES
|
|
22
22
|
#include <cmath>
|
|
23
23
|
#include <climits>
|
|
24
|
-
#include <codecvt>
|
|
25
24
|
#include <cstdarg>
|
|
26
25
|
#include <cstdio>
|
|
27
26
|
#include <cstring>
|
|
28
27
|
#include <fstream>
|
|
29
28
|
#include <functional>
|
|
30
29
|
#include <map>
|
|
31
|
-
#include <mutex>
|
|
32
30
|
#include <random>
|
|
33
31
|
#include <regex>
|
|
34
32
|
#include <set>
|
|
@@ -36,6 +34,10 @@
|
|
|
36
34
|
#include <thread>
|
|
37
35
|
#include <vector>
|
|
38
36
|
|
|
37
|
+
#ifdef _MSC_VER
|
|
38
|
+
#include <codecvt>
|
|
39
|
+
#endif
|
|
40
|
+
|
|
39
41
|
#if defined(WHISPER_BIG_ENDIAN)
|
|
40
42
|
template<typename T>
|
|
41
43
|
static T byteswap(T value) {
|
|
@@ -138,6 +140,10 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
|
|
|
138
140
|
} while (0)
|
|
139
141
|
|
|
140
142
|
#define WHISPER_MAX_DECODERS 8
|
|
143
|
+
|
|
144
|
+
// temperature below which we condition on past text history
|
|
145
|
+
static constexpr float WHISPER_HISTORY_CONDITIONING_TEMP_CUTOFF = 0.5f;
|
|
146
|
+
|
|
141
147
|
#define WHISPER_MAX_NODES 4096
|
|
142
148
|
|
|
143
149
|
static std::string format(const char * fmt, ...) {
|
|
@@ -880,7 +886,10 @@ struct whisper_state {
|
|
|
880
886
|
std::vector<float> logits;
|
|
881
887
|
|
|
882
888
|
std::vector<whisper_segment> result_all;
|
|
883
|
-
|
|
889
|
+
|
|
890
|
+
// prompt history split into static prefix (prompt_past0) and dynamic rolling context (prompt_past1)
|
|
891
|
+
std::vector<whisper_token> prompt_past0; // static carried initial prompt (if enabled)
|
|
892
|
+
std::vector<whisper_token> prompt_past1; // dynamic context from decoded output
|
|
884
893
|
|
|
885
894
|
int lang_id = 0; // english by default
|
|
886
895
|
|
|
@@ -1287,7 +1296,11 @@ static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & pa
|
|
|
1287
1296
|
if (params.use_gpu) {
|
|
1288
1297
|
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
|
1289
1298
|
ggml_backend_dev_t dev_cur = ggml_backend_dev_get(i);
|
|
1290
|
-
|
|
1299
|
+
enum ggml_backend_dev_type dev_type = ggml_backend_dev_type(dev_cur);
|
|
1300
|
+
const char * dev_name = ggml_backend_dev_name(dev_cur);
|
|
1301
|
+
WHISPER_LOG_INFO("%s: device %zu: %s (type: %d)\n", __func__, i, dev_name, dev_type);
|
|
1302
|
+
if (dev_type == GGML_BACKEND_DEVICE_TYPE_GPU || dev_type == GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
|
1303
|
+
WHISPER_LOG_INFO("%s: found GPU device %zu: %s (type: %d, cnt: %d)\n", __func__, i, dev_name, dev_type, cnt);
|
|
1291
1304
|
if (cnt == params.gpu_device) {
|
|
1292
1305
|
dev = dev_cur;
|
|
1293
1306
|
}
|
|
@@ -1356,7 +1369,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
|
|
|
1356
1369
|
int cnt = 0;
|
|
1357
1370
|
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
|
1358
1371
|
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
1359
|
-
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
|
1372
|
+
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
|
1360
1373
|
if (cnt == params.gpu_device) {
|
|
1361
1374
|
auto * buft = ggml_backend_dev_buffer_type(dev);
|
|
1362
1375
|
if (buft) {
|
|
@@ -1394,6 +1407,7 @@ static bool weight_buft_supported(const whisper_hparams & hparams, ggml_tensor *
|
|
|
1394
1407
|
bool op_supported = true;
|
|
1395
1408
|
|
|
1396
1409
|
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU ||
|
|
1410
|
+
ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_IGPU ||
|
|
1397
1411
|
(ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU && buft == ggml_backend_cpu_buffer_type())) {
|
|
1398
1412
|
// GPU and default CPU backend support all operators
|
|
1399
1413
|
op_supported = true;
|
|
@@ -2491,7 +2505,7 @@ static struct ggml_cgraph * whisper_build_graph_decoder(
|
|
|
2491
2505
|
|
|
2492
2506
|
const float KQscale = pow(float(n_state_head), -0.25);
|
|
2493
2507
|
|
|
2494
|
-
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv,
|
|
2508
|
+
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1);
|
|
2495
2509
|
ggml_set_name(KQ_mask, "KQ_mask");
|
|
2496
2510
|
ggml_set_input(KQ_mask);
|
|
2497
2511
|
|
|
@@ -2915,7 +2929,7 @@ static bool whisper_decode_internal(
|
|
|
2915
2929
|
}
|
|
2916
2930
|
}
|
|
2917
2931
|
|
|
2918
|
-
for (int i = n_tokens; i <
|
|
2932
|
+
for (int i = n_tokens; i < n_tokens; ++i) {
|
|
2919
2933
|
for (int j = 0; j < n_kv; ++j) {
|
|
2920
2934
|
data[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY;
|
|
2921
2935
|
}
|
|
@@ -4446,6 +4460,7 @@ static bool weight_buft_supported(const whisper_vad_hparams & hparams, ggml_tens
|
|
|
4446
4460
|
bool op_supported = true;
|
|
4447
4461
|
|
|
4448
4462
|
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU ||
|
|
4463
|
+
ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_IGPU ||
|
|
4449
4464
|
(ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU && buft == ggml_backend_cpu_buffer_type())) {
|
|
4450
4465
|
// GPU and default CPU backend support all operators
|
|
4451
4466
|
op_supported = true;
|
|
@@ -5920,9 +5935,10 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
|
|
|
5920
5935
|
|
|
5921
5936
|
/* suppress_regex =*/ nullptr,
|
|
5922
5937
|
|
|
5923
|
-
/*.initial_prompt
|
|
5924
|
-
/*.
|
|
5925
|
-
/*.
|
|
5938
|
+
/*.initial_prompt =*/ nullptr,
|
|
5939
|
+
/*.carry_initial_prompt =*/ false,
|
|
5940
|
+
/*.prompt_tokens =*/ nullptr,
|
|
5941
|
+
/*.prompt_n_tokens =*/ 0,
|
|
5926
5942
|
|
|
5927
5943
|
/*.language =*/ "en",
|
|
5928
5944
|
/*.detect_language =*/ false,
|
|
@@ -6010,6 +6026,19 @@ static inline bool should_split_on_word(const char * txt, bool split_on_word) {
|
|
|
6010
6026
|
return txt[0] == ' ';
|
|
6011
6027
|
}
|
|
6012
6028
|
|
|
6029
|
+
// Count UTF-8 characters (not bytes) in a string
|
|
6030
|
+
static int utf8_len(const char * str) {
|
|
6031
|
+
int count = 0;
|
|
6032
|
+
while (*str) {
|
|
6033
|
+
// Skip continuation bytes (10xxxxxx)
|
|
6034
|
+
if ((*str & 0xC0) != 0x80) {
|
|
6035
|
+
count++;
|
|
6036
|
+
}
|
|
6037
|
+
str++;
|
|
6038
|
+
}
|
|
6039
|
+
return count;
|
|
6040
|
+
}
|
|
6041
|
+
|
|
6013
6042
|
static void whisper_exp_compute_token_level_timestamps_dtw(
|
|
6014
6043
|
struct whisper_context * ctx,
|
|
6015
6044
|
struct whisper_state * state,
|
|
@@ -6038,7 +6067,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
|
|
6038
6067
|
}
|
|
6039
6068
|
|
|
6040
6069
|
const auto txt = whisper_token_to_str(&ctx, token.id);
|
|
6041
|
-
const int cur =
|
|
6070
|
+
const int cur = utf8_len(txt); // Use UTF-8 character count instead of byte count
|
|
6042
6071
|
|
|
6043
6072
|
if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
|
|
6044
6073
|
state.result_all.back().text = std::move(text);
|
|
@@ -6618,6 +6647,10 @@ static bool whisper_vad(
|
|
|
6618
6647
|
|
|
6619
6648
|
whisper_vad_segments * vad_segments = whisper_vad_segments_from_samples(vctx, vad_params, samples, n_samples);
|
|
6620
6649
|
|
|
6650
|
+
if (!vad_segments) {
|
|
6651
|
+
return false;
|
|
6652
|
+
}
|
|
6653
|
+
|
|
6621
6654
|
if (vad_segments->data.size() > 0) {
|
|
6622
6655
|
state->has_vad_segments = true;
|
|
6623
6656
|
ctx->state->vad_segments.clear();
|
|
@@ -6660,7 +6693,6 @@ static bool whisper_vad(
|
|
|
6660
6693
|
} catch (const std::bad_alloc & /* e */) {
|
|
6661
6694
|
WHISPER_LOG_ERROR("%s: failed to allocate memory for filtered samples\n", __func__);
|
|
6662
6695
|
whisper_vad_free_segments(vad_segments);
|
|
6663
|
-
whisper_vad_free(vctx);
|
|
6664
6696
|
return false;
|
|
6665
6697
|
}
|
|
6666
6698
|
|
|
@@ -6674,7 +6706,7 @@ static bool whisper_vad(
|
|
|
6674
6706
|
}
|
|
6675
6707
|
|
|
6676
6708
|
segment_start_samples = std::min(segment_start_samples, n_samples - 1);
|
|
6677
|
-
segment_end_samples = std::min(segment_end_samples, n_samples);
|
|
6709
|
+
segment_end_samples = std::min(segment_end_samples, n_samples - 1);
|
|
6678
6710
|
int segment_length = segment_end_samples - segment_start_samples;
|
|
6679
6711
|
if (segment_length > 0) {
|
|
6680
6712
|
whisper_state::vad_segment_info segment;
|
|
@@ -6766,6 +6798,7 @@ static bool whisper_vad(
|
|
|
6766
6798
|
__func__, n_samples, filtered_n_samples, 100.0f * (1.0f - (float)filtered_n_samples / n_samples));
|
|
6767
6799
|
}
|
|
6768
6800
|
|
|
6801
|
+
whisper_vad_free_segments(vad_segments);
|
|
6769
6802
|
return true;
|
|
6770
6803
|
}
|
|
6771
6804
|
|
|
@@ -6874,17 +6907,22 @@ int whisper_full_with_state(
|
|
|
6874
6907
|
decoder.rng = std::mt19937(j);
|
|
6875
6908
|
}
|
|
6876
6909
|
|
|
6877
|
-
// the accumulated text context
|
|
6878
|
-
auto &
|
|
6910
|
+
// the accumulated text context split into static (prompt_past0) and dynamic (prompt_past1)
|
|
6911
|
+
auto & prompt_past0 = state->prompt_past0;
|
|
6912
|
+
auto & prompt_past1 = state->prompt_past1;
|
|
6879
6913
|
if (params.no_context) {
|
|
6880
|
-
|
|
6914
|
+
prompt_past0.clear();
|
|
6915
|
+
prompt_past1.clear();
|
|
6881
6916
|
}
|
|
6882
6917
|
|
|
6918
|
+
// calculate the maximum context budget for prompt history
|
|
6919
|
+
const int max_prompt_ctx = std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2);
|
|
6920
|
+
|
|
6883
6921
|
// prepare prompt
|
|
6884
6922
|
{
|
|
6885
6923
|
std::vector<whisper_token> prompt_tokens;
|
|
6886
6924
|
|
|
6887
|
-
// initial prompt
|
|
6925
|
+
// tokenize the initial prompt
|
|
6888
6926
|
if (!params.prompt_tokens && params.initial_prompt) {
|
|
6889
6927
|
prompt_tokens.resize(1024);
|
|
6890
6928
|
int n_needed = whisper_tokenize(ctx, params.initial_prompt, prompt_tokens.data(), prompt_tokens.size());
|
|
@@ -6896,14 +6934,25 @@ int whisper_full_with_state(
|
|
|
6896
6934
|
params.prompt_tokens = prompt_tokens.data();
|
|
6897
6935
|
params.prompt_n_tokens = prompt_tokens.size();
|
|
6898
6936
|
}
|
|
6899
|
-
|
|
6900
|
-
// prepend the prompt tokens to the prompt_past
|
|
6901
6937
|
if (params.prompt_tokens && params.prompt_n_tokens > 0) {
|
|
6902
|
-
|
|
6903
|
-
|
|
6904
|
-
|
|
6938
|
+
if (params.carry_initial_prompt) {
|
|
6939
|
+
if (prompt_past0.empty()) {
|
|
6940
|
+
const int max_tokens = std::max(1, max_prompt_ctx - 1);
|
|
6941
|
+
|
|
6942
|
+
if (params.prompt_n_tokens > max_tokens) {
|
|
6943
|
+
WHISPER_LOG_WARN("%s: initial prompt is too long (%d tokens), will use only the last %d tokens\n",
|
|
6944
|
+
__func__, params.prompt_n_tokens, max_tokens);
|
|
6945
|
+
}
|
|
6946
|
+
|
|
6947
|
+
const int n_tokens = std::min(params.prompt_n_tokens, max_tokens);
|
|
6948
|
+
prompt_past0.assign(params.prompt_tokens + (params.prompt_n_tokens - n_tokens), params.prompt_tokens + params.prompt_n_tokens);
|
|
6949
|
+
}
|
|
6950
|
+
} else {
|
|
6951
|
+
for (int i = 0; i < params.prompt_n_tokens; ++i) {
|
|
6952
|
+
prompt_past1.push_back(params.prompt_tokens[i]);
|
|
6953
|
+
}
|
|
6954
|
+
std::rotate(prompt_past1.begin(), prompt_past1.end() - params.prompt_n_tokens, prompt_past1.end());
|
|
6905
6955
|
}
|
|
6906
|
-
std::rotate(prompt_past.begin(), prompt_past.end() - params.prompt_n_tokens, prompt_past.end());
|
|
6907
6956
|
}
|
|
6908
6957
|
}
|
|
6909
6958
|
|
|
@@ -6989,7 +7038,8 @@ int whisper_full_with_state(
|
|
|
6989
7038
|
// if there is a very short audio segment left to process, we remove any past prompt since it tends
|
|
6990
7039
|
// to confuse the decoder and often make it repeat or hallucinate stuff
|
|
6991
7040
|
if (seek > seek_start && seek + 500 >= seek_end) {
|
|
6992
|
-
|
|
7041
|
+
prompt_past0.clear();
|
|
7042
|
+
prompt_past1.clear();
|
|
6993
7043
|
}
|
|
6994
7044
|
|
|
6995
7045
|
int best_decoder_id = 0;
|
|
@@ -7050,12 +7100,25 @@ int whisper_full_with_state(
|
|
|
7050
7100
|
{
|
|
7051
7101
|
prompt.clear();
|
|
7052
7102
|
|
|
7053
|
-
|
|
7054
|
-
|
|
7055
|
-
|
|
7103
|
+
if (params.n_max_text_ctx > 0 && t_cur < WHISPER_HISTORY_CONDITIONING_TEMP_CUTOFF) {
|
|
7104
|
+
const bool can_take0 = params.carry_initial_prompt && !prompt_past0.empty();
|
|
7105
|
+
const bool can_take1 = !prompt_past1.empty();
|
|
7056
7106
|
|
|
7057
|
-
|
|
7058
|
-
|
|
7107
|
+
if (max_prompt_ctx > 0 && (can_take0 || can_take1)) {
|
|
7108
|
+
// Always start with previous token marker to connect continuity
|
|
7109
|
+
prompt.push_back(whisper_token_prev(ctx));
|
|
7110
|
+
|
|
7111
|
+
// Take static tokens (initial prompt) first
|
|
7112
|
+
int n_take0 = 0;
|
|
7113
|
+
if (can_take0) {
|
|
7114
|
+
n_take0 = prompt_past0.size();
|
|
7115
|
+
prompt.insert(prompt.end(), prompt_past0.end() - n_take0, prompt_past0.end());
|
|
7116
|
+
}
|
|
7117
|
+
|
|
7118
|
+
// Fill remaining budget with dynamic tokens (rolling context)
|
|
7119
|
+
const int n_take1 = std::min<int>(max_prompt_ctx - n_take0 - 1, prompt_past1.size());
|
|
7120
|
+
prompt.insert(prompt.end(), prompt_past1.end() - n_take1, prompt_past1.end());
|
|
7121
|
+
}
|
|
7059
7122
|
}
|
|
7060
7123
|
|
|
7061
7124
|
// init new transcription with sot, language (opt) and task tokens
|
|
@@ -7537,14 +7600,17 @@ int whisper_full_with_state(
|
|
|
7537
7600
|
|
|
7538
7601
|
//WHISPER_LOG_DEBUG("prompt_init.size() = %d, prompt.size() = %d, result_len = %d, seek_delta = %d\n", prompt_init.size(), prompt.size(), result_len, seek_delta);
|
|
7539
7602
|
|
|
7540
|
-
// update
|
|
7541
|
-
|
|
7542
|
-
if (prompt.front() == whisper_token_prev(ctx)) {
|
|
7543
|
-
|
|
7603
|
+
// update prompt_past1
|
|
7604
|
+
prompt_past1.clear();
|
|
7605
|
+
if (!params.carry_initial_prompt && !prompt.empty() && prompt.front() == whisper_token_prev(ctx)) {
|
|
7606
|
+
prompt_past1.insert(prompt_past1.end(), prompt.begin() + 1, prompt.end() - prompt_init.size());
|
|
7544
7607
|
}
|
|
7545
7608
|
|
|
7546
|
-
|
|
7547
|
-
|
|
7609
|
+
// Add newly decoded tokens to the rolling context
|
|
7610
|
+
if (!is_no_speech) {
|
|
7611
|
+
for (int i = 0; i < result_len; ++i) {
|
|
7612
|
+
prompt_past1.push_back(tokens_cur[i].id);
|
|
7613
|
+
}
|
|
7548
7614
|
}
|
|
7549
7615
|
|
|
7550
7616
|
if (!tokens_cur.empty() && ctx->model.n_loaded > 0 && !is_no_speech) {
|
|
@@ -94,7 +94,7 @@ add_executable(${VAD_TEST} ${VAD_TEST}.cpp)
|
|
|
94
94
|
target_include_directories(${VAD_TEST} PRIVATE ../include ../ggml/include ../examples)
|
|
95
95
|
target_link_libraries(${VAD_TEST} PRIVATE common)
|
|
96
96
|
target_compile_definitions(${VAD_TEST} PRIVATE
|
|
97
|
-
VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-
|
|
97
|
+
VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-v6.2.0-ggml.bin"
|
|
98
98
|
SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/jfk.wav")
|
|
99
99
|
add_test(NAME ${VAD_TEST} COMMAND ${VAD_TEST})
|
|
100
100
|
set_tests_properties(${VAD_TEST} PROPERTIES LABELS "unit")
|
|
@@ -106,7 +106,7 @@ target_include_directories(${VAD_TEST} PRIVATE ../include ../ggml/include ../exa
|
|
|
106
106
|
target_link_libraries(${VAD_TEST} PRIVATE common)
|
|
107
107
|
target_compile_definitions(${VAD_TEST} PRIVATE
|
|
108
108
|
WHISPER_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/ggml-base.en.bin"
|
|
109
|
-
VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-
|
|
109
|
+
VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-v6.2.0-ggml.bin"
|
|
110
110
|
SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/jfk.wav")
|
|
111
111
|
add_test(NAME ${VAD_TEST} COMMAND ${VAD_TEST})
|
|
112
112
|
set_tests_properties(${VAD_TEST} PROPERTIES LABELS "base;en")
|
|
@@ -42,11 +42,13 @@ int main() {
|
|
|
42
42
|
const int n_segments = whisper_full_n_segments(wctx);
|
|
43
43
|
assert(n_segments == 1);
|
|
44
44
|
|
|
45
|
+
|
|
46
|
+
printf("Segment text:\n%s", whisper_full_get_segment_text(wctx, 0));
|
|
45
47
|
assert(strcmp(" And so my fellow Americans, ask not what your country can do for you,"
|
|
46
48
|
" ask what you can do for your country.",
|
|
47
49
|
whisper_full_get_segment_text(wctx, 0)) == 0);
|
|
48
|
-
assert(whisper_full_get_segment_t0(wctx, 0) ==
|
|
49
|
-
assert(whisper_full_get_segment_t1(wctx, 0) ==
|
|
50
|
+
assert(whisper_full_get_segment_t0(wctx, 0) == 32);
|
|
51
|
+
assert(whisper_full_get_segment_t1(wctx, 0) == 1051);
|
|
50
52
|
|
|
51
53
|
whisper_free(wctx);
|
|
52
54
|
|
|
@@ -36,7 +36,7 @@ struct whisper_vad_segments * test_detect_timestamps(
|
|
|
36
36
|
struct whisper_vad_context * vctx,
|
|
37
37
|
struct whisper_vad_params params) {
|
|
38
38
|
struct whisper_vad_segments * timestamps = whisper_vad_segments_from_probs(vctx, params);
|
|
39
|
-
assert(whisper_vad_segments_n_segments(timestamps) ==
|
|
39
|
+
assert(whisper_vad_segments_n_segments(timestamps) == 4);
|
|
40
40
|
|
|
41
41
|
for (int i = 0; i < whisper_vad_segments_n_segments(timestamps); ++i) {
|
|
42
42
|
printf("VAD segment %d: start = %.2f, end = %.2f\n", i,
|
data/extsources.rb
CHANGED
data/lib/whisper/model/uri.rb
CHANGED
|
@@ -94,7 +94,8 @@ module Whisper
|
|
|
94
94
|
end
|
|
95
95
|
|
|
96
96
|
def show_progress(current, size)
|
|
97
|
-
|
|
97
|
+
line_size = 47
|
|
98
|
+
progress_rate_available = size && $stderr.tty? && $stderr.winsize[1] >= line_size
|
|
98
99
|
|
|
99
100
|
unless @prev
|
|
100
101
|
@prev = Time.now
|
|
@@ -181,7 +182,6 @@ module Whisper
|
|
|
181
182
|
base-q8_0
|
|
182
183
|
small
|
|
183
184
|
small.en
|
|
184
|
-
small.en-tdrz
|
|
185
185
|
small-q5_1
|
|
186
186
|
small.en-q5_1
|
|
187
187
|
small-q8_0
|
|
@@ -203,28 +203,27 @@ module Whisper
|
|
|
203
203
|
models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
+
%w[
|
|
207
|
+
small.en-tdrz
|
|
208
|
+
].each do |name|
|
|
209
|
+
@pre_converted_models[name] = URI.new("https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/ggml-#{name}.bin")
|
|
210
|
+
end
|
|
211
|
+
|
|
206
212
|
%w[
|
|
207
213
|
silero-v5.1.2
|
|
214
|
+
silero-v6.2.0
|
|
208
215
|
].each do |name|
|
|
209
216
|
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
|
|
210
217
|
end
|
|
211
218
|
|
|
212
|
-
@coreml_compiled_models =
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
medium.en
|
|
221
|
-
large-v1
|
|
222
|
-
large-v2
|
|
223
|
-
large-v3
|
|
224
|
-
large-v3-turbo
|
|
225
|
-
].each_with_object({}) do |name, models|
|
|
226
|
-
models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
|
|
227
|
-
end
|
|
219
|
+
@coreml_compiled_models = @pre_converted_models.each_with_object({}) {|(name, uri), models|
|
|
220
|
+
next if name.end_with?("-tdrz") || name.start_with?("silero-")
|
|
221
|
+
|
|
222
|
+
if matched = name.match(/\A(?<name>.*)-q\d_\d\z/)
|
|
223
|
+
name = matched[:name]
|
|
224
|
+
end
|
|
225
|
+
models[uri] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
|
|
226
|
+
}
|
|
228
227
|
|
|
229
228
|
class << self
|
|
230
229
|
attr_reader :pre_converted_models, :coreml_compiled_models
|
data/sig/whisper.rbs
CHANGED
|
@@ -138,6 +138,7 @@ module Whisper
|
|
|
138
138
|
?max_len: Integer,
|
|
139
139
|
?split_on_word: boolish,
|
|
140
140
|
?initial_prompt: string | nil,
|
|
141
|
+
?carry_initial_prompt: boolish,
|
|
141
142
|
?diarize: boolish,
|
|
142
143
|
?offset: Integer,
|
|
143
144
|
?duration: Integer,
|
|
@@ -236,6 +237,7 @@ module Whisper
|
|
|
236
237
|
def split_on_word: () -> (true | false)
|
|
237
238
|
|
|
238
239
|
def initial_prompt=: (_ToS) -> _ToS
|
|
240
|
+
def carry_initial_prompt=: (boolish) -> boolish
|
|
239
241
|
|
|
240
242
|
# Tokens to provide to the whisper decoder as initial prompt
|
|
241
243
|
# these are prepended to any existing text context from a previous call
|
|
@@ -243,6 +245,7 @@ module Whisper
|
|
|
243
245
|
# Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224).
|
|
244
246
|
#
|
|
245
247
|
def initial_prompt: () -> (String | nil)
|
|
248
|
+
def carry_initial_prompt: () -> (true | false)
|
|
246
249
|
|
|
247
250
|
def diarize=: (boolish) -> boolish
|
|
248
251
|
|
|
@@ -431,7 +434,8 @@ module Whisper
|
|
|
431
434
|
end_time: (Integer | nil),
|
|
432
435
|
text: (String | nil),
|
|
433
436
|
no_speech_prob: (Float | nil),
|
|
434
|
-
speaker_turn_next: (true | false | nil)
|
|
437
|
+
speaker_turn_next: (true | false | nil),
|
|
438
|
+
n_tokens: (Integer | nil)
|
|
435
439
|
}
|
|
436
440
|
|
|
437
441
|
# Start time in milliseconds.
|
|
@@ -443,13 +447,32 @@ module Whisper
|
|
|
443
447
|
def end_time: () -> Integer
|
|
444
448
|
|
|
445
449
|
# Whether the next segment is predicted as a speaker turn.
|
|
450
|
+
#
|
|
446
451
|
def speaker_turn_next?: () -> (true | false)
|
|
447
452
|
|
|
448
453
|
def text: () -> String
|
|
449
454
|
def no_speech_prob: () -> Float
|
|
455
|
+
|
|
456
|
+
# Get number of tokens in the segment
|
|
457
|
+
#
|
|
458
|
+
def n_tokens: () -> Integer
|
|
459
|
+
|
|
460
|
+
# Yields each Whisper::Token:
|
|
461
|
+
#
|
|
462
|
+
# whisper.each_segment.first.each_token do |token|
|
|
463
|
+
# p token
|
|
464
|
+
# end
|
|
465
|
+
#
|
|
466
|
+
# Returns an Enumerator if no block is given:
|
|
467
|
+
#
|
|
468
|
+
# whisper.each_segment.first.each_token.to_a # => [#<Whisper::Token>, ...]
|
|
469
|
+
#
|
|
470
|
+
def each_token: { (Token) -> void } -> void
|
|
471
|
+
| () -> Enumerator[Token]
|
|
450
472
|
def to_srt_cue: () -> String
|
|
451
473
|
def to_webvtt_cue: () -> String
|
|
452
474
|
|
|
475
|
+
|
|
453
476
|
# Possible keys: :start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next
|
|
454
477
|
#
|
|
455
478
|
# whisper.each_segment do |segment|
|
|
@@ -457,7 +480,77 @@ module Whisper
|
|
|
457
480
|
#
|
|
458
481
|
# puts "[#{start_time} --> #{end_time}] #{text} (no speech prob: #{no_speech_prob}#{speaker_turn_next ? ', speaker turns next' : ''})"
|
|
459
482
|
# end
|
|
460
|
-
def deconstruct_keys: (Array[:start_time | :end_time | :text | :no_speech_prob | :speaker_turn_next] | nil) -> deconstructed_keys
|
|
483
|
+
def deconstruct_keys: (Array[:start_time | :end_time | :text | :no_speech_prob | :speaker_turn_next | :n_tokens] | nil) -> deconstructed_keys
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
module Token
|
|
487
|
+
type deconstructed_keys = {
|
|
488
|
+
id: (Integer | nil),
|
|
489
|
+
tid: (Integer | nil),
|
|
490
|
+
probability: (Float | nil),
|
|
491
|
+
log_probability: (Float | nil),
|
|
492
|
+
pt: (Float | nil),
|
|
493
|
+
ptsum: (Float | nil),
|
|
494
|
+
t_dtw: (Integer | nil),
|
|
495
|
+
voice_length: (Float | nil),
|
|
496
|
+
text: (String | nil),
|
|
497
|
+
start_time: (Integer | nil),
|
|
498
|
+
end_time: (Integer | nil),
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
# Token ID.
|
|
502
|
+
#
|
|
503
|
+
def id: () -> Integer
|
|
504
|
+
|
|
505
|
+
# Forced timestamp token ID.
|
|
506
|
+
#
|
|
507
|
+
def tid: () -> Integer
|
|
508
|
+
|
|
509
|
+
# Probability of the token.
|
|
510
|
+
#
|
|
511
|
+
def probability: () -> Float
|
|
512
|
+
|
|
513
|
+
# Log probability of the token.
|
|
514
|
+
#
|
|
515
|
+
def log_probability: () -> Float
|
|
516
|
+
|
|
517
|
+
# Probability of the timestamp token.
|
|
518
|
+
#
|
|
519
|
+
def pt: () -> Float
|
|
520
|
+
|
|
521
|
+
# Sum of probability of all timestamp tokens.
|
|
522
|
+
#
|
|
523
|
+
def ptsum: () -> Float
|
|
524
|
+
|
|
525
|
+
# [EXPERIMENTAL] Token-level timestamps with DTW
|
|
526
|
+
#
|
|
527
|
+
# Do not use if you haven't computed token-level timestamps with dtw.
|
|
528
|
+
# Roughly corresponds to the moment in audio in which the token was output.
|
|
529
|
+
#
|
|
530
|
+
def t_dtw: () -> Integer
|
|
531
|
+
|
|
532
|
+
# Voice length of the token.
|
|
533
|
+
#
|
|
534
|
+
def voice_length: () -> Float
|
|
535
|
+
|
|
536
|
+
# Start time of the token.
|
|
537
|
+
#
|
|
538
|
+
# Token-level timestamp data.
|
|
539
|
+
# Do not use if you haven't computed token-level timestamps.
|
|
540
|
+
#
|
|
541
|
+
def start_time: () -> Integer
|
|
542
|
+
|
|
543
|
+
# End time of the token.
|
|
544
|
+
#
|
|
545
|
+
# Token-level timestamp data.
|
|
546
|
+
# Do not use if you haven't computed token-level timestamps.
|
|
547
|
+
#
|
|
548
|
+
def end_time: () -> Integer
|
|
549
|
+
|
|
550
|
+
# Get the token text of the token.
|
|
551
|
+
#
|
|
552
|
+
def text: () -> String
|
|
553
|
+
def deconstruct_keys: (Array[:id | :tid | :probability | :log_probability | :pt | :ptsum | :t_dtw | :voice_length | :start_time | :end_time | :text] | nil) -> deconstructed_keys
|
|
461
554
|
end
|
|
462
555
|
|
|
463
556
|
module VAD
|
|
@@ -507,6 +600,30 @@ module Whisper
|
|
|
507
600
|
def samples_overlap: () -> Float
|
|
508
601
|
def ==: (Params) -> (true | false)
|
|
509
602
|
end
|
|
603
|
+
|
|
604
|
+
class Context
|
|
605
|
+
def self.new: (String | path | ::URI::HTTP model_name_or_path) -> instance
|
|
606
|
+
def detect: (path wav_file_path, Params) -> Segments
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
class Segments
|
|
610
|
+
include Enumerable[Segment]
|
|
611
|
+
|
|
612
|
+
def each: { (Segment) -> void } -> void
|
|
613
|
+
| () -> Enumerator[Segment]
|
|
614
|
+
def length: -> Integer
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
class Segment
|
|
618
|
+
type deconstructed_keys = {
|
|
619
|
+
start_time: (Integer | nil),
|
|
620
|
+
end_time: (Integer | nil),
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
def start_time: () -> Integer
|
|
624
|
+
def end_time: () -> Integer
|
|
625
|
+
def deconstruct_keys: (Array[:start_time | :end_time] | nil) -> deconstructed_keys
|
|
626
|
+
end
|
|
510
627
|
end
|
|
511
628
|
|
|
512
629
|
class Error < StandardError
|
data/test/test_params.rb
CHANGED
|
@@ -16,6 +16,7 @@ class TestParams < TestBase
|
|
|
16
16
|
:max_len,
|
|
17
17
|
:split_on_word,
|
|
18
18
|
:initial_prompt,
|
|
19
|
+
:carry_initial_prompt,
|
|
19
20
|
:diarize,
|
|
20
21
|
:offset,
|
|
21
22
|
:duration,
|
|
@@ -119,6 +120,13 @@ class TestParams < TestBase
|
|
|
119
120
|
assert !@params.print_timestamps
|
|
120
121
|
end
|
|
121
122
|
|
|
123
|
+
def test_carry_initial_prompt
|
|
124
|
+
@params.carry_initial_prompt = true
|
|
125
|
+
assert @params.carry_initial_prompt
|
|
126
|
+
@params.carry_initial_prompt = false
|
|
127
|
+
assert !@params.carry_initial_prompt
|
|
128
|
+
end
|
|
129
|
+
|
|
122
130
|
def test_suppress_blank
|
|
123
131
|
@params.suppress_blank = true
|
|
124
132
|
assert @params.suppress_blank
|
|
@@ -210,12 +218,12 @@ class TestParams < TestBase
|
|
|
210
218
|
|
|
211
219
|
def test_vad_model_path
|
|
212
220
|
assert_nil @params.vad_model_path
|
|
213
|
-
@params.vad_model_path = "silero-
|
|
214
|
-
assert_equal Whisper::Model.pre_converted_models["silero-
|
|
221
|
+
@params.vad_model_path = "silero-v6.2.0"
|
|
222
|
+
assert_equal Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path, @params.vad_model_path
|
|
215
223
|
end
|
|
216
224
|
|
|
217
225
|
def test_vad_model_path_with_nil
|
|
218
|
-
@params.vad_model_path = "silero-
|
|
226
|
+
@params.vad_model_path = "silero-v6.2.0"
|
|
219
227
|
@params.vad_model_path = nil
|
|
220
228
|
assert_nil @params.vad_model_path
|
|
221
229
|
end
|
|
@@ -227,13 +235,13 @@ class TestParams < TestBase
|
|
|
227
235
|
end
|
|
228
236
|
|
|
229
237
|
def test_vad_model_path_with_URI_string
|
|
230
|
-
@params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-
|
|
231
|
-
assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-
|
|
238
|
+
@params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin"
|
|
239
|
+
assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path
|
|
232
240
|
end
|
|
233
241
|
|
|
234
242
|
def test_vad_model_path_with_URI
|
|
235
|
-
@params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-
|
|
236
|
-
assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-
|
|
243
|
+
@params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin")
|
|
244
|
+
assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path
|
|
237
245
|
end
|
|
238
246
|
|
|
239
247
|
def test_vad_params
|
|
@@ -281,7 +289,7 @@ class TestParams < TestBase
|
|
|
281
289
|
in [/_user_data\Z/, *]
|
|
282
290
|
Object.new
|
|
283
291
|
in [:vad_model_path, *]
|
|
284
|
-
Whisper::Model.pre_converted_models["silero-
|
|
292
|
+
Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path
|
|
285
293
|
in [:vad_params, *]
|
|
286
294
|
Whisper::VAD::Params.new
|
|
287
295
|
end
|
data/test/test_segment.rb
CHANGED
|
@@ -73,7 +73,6 @@ class TestSegment < TestBase
|
|
|
73
73
|
end
|
|
74
74
|
|
|
75
75
|
def test_transcription_after_segment_retrieved
|
|
76
|
-
params = Whisper::Params.new
|
|
77
76
|
segment = whisper.each_segment.first
|
|
78
77
|
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
|
79
78
|
|