whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -15,6 +15,17 @@
|
|
|
15
15
|
#include <string>
|
|
16
16
|
#include <vector>
|
|
17
17
|
|
|
18
|
+
#define GGUF_MAX_STRING_LENGTH (1024*1024*1024)
|
|
19
|
+
#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
|
|
20
|
+
|
|
21
|
+
#ifdef _WIN32
|
|
22
|
+
# define gguf_ftell _ftelli64
|
|
23
|
+
# define gguf_fseek _fseeki64
|
|
24
|
+
#else
|
|
25
|
+
# define gguf_ftell ftello
|
|
26
|
+
# define gguf_fseek fseeko
|
|
27
|
+
#endif
|
|
28
|
+
|
|
18
29
|
template <typename T>
|
|
19
30
|
struct type_to_gguf_type;
|
|
20
31
|
|
|
@@ -217,17 +228,64 @@ struct gguf_context {
|
|
|
217
228
|
};
|
|
218
229
|
|
|
219
230
|
struct gguf_reader {
|
|
220
|
-
FILE * file
|
|
231
|
+
gguf_reader(FILE * file) : file(file) {
|
|
232
|
+
// read the remaining bytes once and update on each read
|
|
233
|
+
nbytes_remain = file_remain(file);
|
|
234
|
+
}
|
|
221
235
|
|
|
222
|
-
|
|
236
|
+
// helper for remaining bytes in a file
|
|
237
|
+
static uint64_t file_remain(FILE * file) {
|
|
238
|
+
const int64_t cur = gguf_ftell(file);
|
|
239
|
+
if (cur < 0) {
|
|
240
|
+
return 0;
|
|
241
|
+
}
|
|
242
|
+
if (gguf_fseek(file, 0, SEEK_END) != 0) {
|
|
243
|
+
gguf_fseek(file, cur, SEEK_SET);
|
|
244
|
+
|
|
245
|
+
return 0;
|
|
246
|
+
}
|
|
247
|
+
const int64_t end = gguf_ftell(file);
|
|
248
|
+
if (end < 0) {
|
|
249
|
+
gguf_fseek(file, cur, SEEK_SET);
|
|
250
|
+
|
|
251
|
+
return 0;
|
|
252
|
+
}
|
|
253
|
+
gguf_fseek(file, cur, SEEK_SET);
|
|
254
|
+
return static_cast<uint64_t>(end - cur);
|
|
255
|
+
}
|
|
223
256
|
|
|
224
257
|
template <typename T>
|
|
225
258
|
bool read(T & dst) const {
|
|
226
|
-
|
|
259
|
+
const size_t size = sizeof(dst);
|
|
260
|
+
if (nbytes_remain < size) {
|
|
261
|
+
return false;
|
|
262
|
+
}
|
|
263
|
+
const size_t nread = fread(&dst, 1, size, file);
|
|
264
|
+
nbytes_remain -= nread;
|
|
265
|
+
return nread == size;
|
|
227
266
|
}
|
|
228
267
|
|
|
229
268
|
template <typename T>
|
|
230
269
|
bool read(std::vector<T> & dst, const size_t n) const {
|
|
270
|
+
if (n > GGUF_MAX_ARRAY_ELEMENTS) {
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
if constexpr (std::is_same<T, std::string>::value) {
|
|
274
|
+
// strings are prefixed with their length, so we need to account for that
|
|
275
|
+
if (n > SIZE_MAX / sizeof(uint64_t)) {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
if (nbytes_remain < n * sizeof(uint64_t)) {
|
|
279
|
+
return false;
|
|
280
|
+
}
|
|
281
|
+
} else {
|
|
282
|
+
if (n > SIZE_MAX / sizeof(T)) {
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
if (nbytes_remain < n * sizeof(T)) {
|
|
286
|
+
return false;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
231
289
|
dst.resize(n);
|
|
232
290
|
for (size_t i = 0; i < dst.size(); ++i) {
|
|
233
291
|
if constexpr (std::is_same<T, bool>::value) {
|
|
@@ -273,17 +331,37 @@ struct gguf_reader {
|
|
|
273
331
|
}
|
|
274
332
|
|
|
275
333
|
bool read(std::string & dst) const {
|
|
276
|
-
uint64_t size =
|
|
334
|
+
uint64_t size = 0;
|
|
277
335
|
if (!read(size)) {
|
|
278
336
|
return false;
|
|
279
337
|
}
|
|
280
|
-
|
|
281
|
-
|
|
338
|
+
if (size > GGUF_MAX_STRING_LENGTH) {
|
|
339
|
+
GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds maximum %" PRIu64 "\n", __func__, size, (uint64_t) GGUF_MAX_STRING_LENGTH);
|
|
340
|
+
return false;
|
|
341
|
+
}
|
|
342
|
+
if (size > nbytes_remain) {
|
|
343
|
+
GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds remaining file size %" PRIu64 " bytes\n", __func__, size, nbytes_remain);
|
|
344
|
+
return false;
|
|
345
|
+
}
|
|
346
|
+
dst.resize(static_cast<size_t>(size));
|
|
347
|
+
const size_t nread = fread(dst.data(), 1, size, file);
|
|
348
|
+
nbytes_remain -= nread;
|
|
349
|
+
return nread == size;
|
|
282
350
|
}
|
|
283
351
|
|
|
284
352
|
bool read(void * dst, const size_t size) const {
|
|
285
|
-
|
|
353
|
+
if (size > nbytes_remain) {
|
|
354
|
+
return false;
|
|
355
|
+
}
|
|
356
|
+
const size_t nread = fread(dst, 1, size, file);
|
|
357
|
+
nbytes_remain -= nread;
|
|
358
|
+
return nread == size;
|
|
286
359
|
}
|
|
360
|
+
|
|
361
|
+
private:
|
|
362
|
+
FILE * file;
|
|
363
|
+
|
|
364
|
+
mutable uint64_t nbytes_remain;
|
|
287
365
|
};
|
|
288
366
|
|
|
289
367
|
struct gguf_context * gguf_init_empty(void) {
|
|
@@ -523,7 +601,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
523
601
|
|
|
524
602
|
// tensor shape
|
|
525
603
|
{
|
|
526
|
-
uint32_t n_dims =
|
|
604
|
+
uint32_t n_dims = 0;
|
|
527
605
|
ok = ok && gr.read(n_dims);
|
|
528
606
|
if (n_dims > GGML_MAX_DIMS) {
|
|
529
607
|
GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
|
|
@@ -568,8 +646,8 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
568
646
|
|
|
569
647
|
// check that tensor type is within defined range
|
|
570
648
|
if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
|
|
571
|
-
GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d
|
|
572
|
-
__func__, info.t.name, info.t.type,
|
|
649
|
+
GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d. should be in [0, %d)\n",
|
|
650
|
+
__func__, info.t.name, info.t.type, GGML_TYPE_COUNT);
|
|
573
651
|
ok = false;
|
|
574
652
|
break;
|
|
575
653
|
}
|
|
@@ -585,6 +663,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
585
663
|
break;
|
|
586
664
|
}
|
|
587
665
|
|
|
666
|
+
// check that the size of the tensor in bytes is representable
|
|
667
|
+
if (ok && uint64_t(ggml_nelements(&info.t)/ggml_blck_size(info.t.type)) > SIZE_MAX/ggml_type_size(info.t.type)) {
|
|
668
|
+
GGML_LOG_ERROR("%s: tensor '%s' with shape (%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") has a size in bytes > %zu\n",
|
|
669
|
+
__func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX);
|
|
670
|
+
ok = false;
|
|
671
|
+
break;
|
|
672
|
+
}
|
|
673
|
+
|
|
588
674
|
// calculate byte offsets given the tensor shape and type
|
|
589
675
|
info.t.nb[0] = type_size;
|
|
590
676
|
info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
|
|
@@ -610,14 +696,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
610
696
|
GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
|
|
611
697
|
|
|
612
698
|
// we require the data section to be aligned, so take into account any padding
|
|
613
|
-
if (
|
|
699
|
+
if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) {
|
|
614
700
|
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
|
|
615
701
|
gguf_free(ctx);
|
|
616
702
|
return nullptr;
|
|
617
703
|
}
|
|
618
704
|
|
|
619
705
|
// store the current file offset - this is where the data section starts
|
|
620
|
-
ctx->offset =
|
|
706
|
+
ctx->offset = gguf_ftell(file);
|
|
621
707
|
|
|
622
708
|
// compute the total size of the data section, taking into account the alignment
|
|
623
709
|
{
|
|
@@ -649,10 +735,34 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
649
735
|
// the ggml_tensor structs to the appropriate locations in the binary blob
|
|
650
736
|
|
|
651
737
|
// compute the exact size needed for the new ggml_context
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
(n_tensors
|
|
655
|
-
|
|
738
|
+
size_t mem_size = 0;
|
|
739
|
+
if (params.no_alloc) {
|
|
740
|
+
if (n_tensors != 0 && SIZE_MAX / n_tensors < ggml_tensor_overhead()) {
|
|
741
|
+
GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
|
|
742
|
+
gguf_free(ctx);
|
|
743
|
+
return nullptr;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
const size_t overhead = n_tensors * ggml_tensor_overhead();
|
|
747
|
+
|
|
748
|
+
mem_size = overhead;
|
|
749
|
+
} else {
|
|
750
|
+
if ((n_tensors + 1) != 0 && SIZE_MAX / (n_tensors + 1) < ggml_tensor_overhead()) {
|
|
751
|
+
GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
|
|
752
|
+
gguf_free(ctx);
|
|
753
|
+
return nullptr;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
const size_t overhead = (n_tensors + 1) * ggml_tensor_overhead();
|
|
757
|
+
|
|
758
|
+
if (SIZE_MAX - overhead < ctx->size) {
|
|
759
|
+
GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
|
|
760
|
+
gguf_free(ctx);
|
|
761
|
+
return nullptr;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
mem_size = overhead + ctx->size;
|
|
765
|
+
}
|
|
656
766
|
|
|
657
767
|
struct ggml_init_params pdata = {
|
|
658
768
|
/*mem_size =*/ mem_size,
|
|
@@ -734,7 +844,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
734
844
|
FILE * file = ggml_fopen(fname, "rb");
|
|
735
845
|
|
|
736
846
|
if (!file) {
|
|
737
|
-
GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
|
|
847
|
+
GGML_LOG_ERROR("%s: failed to open GGUF file '%s' (%s)\n", __func__, fname, strerror(errno));
|
|
738
848
|
return nullptr;
|
|
739
849
|
}
|
|
740
850
|
|
|
@@ -1166,50 +1276,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
|
|
|
1166
1276
|
ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
|
|
1167
1277
|
}
|
|
1168
1278
|
|
|
1169
|
-
struct
|
|
1170
|
-
|
|
1279
|
+
struct gguf_writer_base {
|
|
1280
|
+
size_t written_bytes {0u};
|
|
1281
|
+
|
|
1282
|
+
~gguf_writer_base(void) = default;
|
|
1171
1283
|
|
|
1172
|
-
|
|
1284
|
+
// we bet on devirtualization
|
|
1285
|
+
virtual void write(int8_t val) = 0;
|
|
1286
|
+
virtual void write(const std::vector<int8_t> & val) = 0;
|
|
1287
|
+
virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
|
|
1173
1288
|
|
|
1174
1289
|
template <typename T>
|
|
1175
|
-
void write(const T & val)
|
|
1290
|
+
void write(const T & val) {
|
|
1176
1291
|
for (size_t i = 0; i < sizeof(val); ++i) {
|
|
1177
|
-
|
|
1292
|
+
write(reinterpret_cast<const int8_t *>(&val)[i]);
|
|
1178
1293
|
}
|
|
1179
1294
|
}
|
|
1180
1295
|
|
|
1181
|
-
void write(const
|
|
1182
|
-
buf.insert(buf.end(), val.begin(), val.end());
|
|
1183
|
-
}
|
|
1184
|
-
|
|
1185
|
-
void write(const bool & val) const {
|
|
1296
|
+
void write(const bool & val) {
|
|
1186
1297
|
const int8_t val8 = val ? 1 : 0;
|
|
1187
1298
|
write(val8);
|
|
1188
1299
|
}
|
|
1189
1300
|
|
|
1190
|
-
void write(const std::string & val)
|
|
1301
|
+
void write(const std::string & val) {
|
|
1191
1302
|
{
|
|
1192
1303
|
const uint64_t n = val.length();
|
|
1193
1304
|
write(n);
|
|
1194
1305
|
}
|
|
1195
1306
|
for (size_t i = 0; i < val.length(); ++i) {
|
|
1196
|
-
|
|
1307
|
+
write((val.data())[i]);
|
|
1197
1308
|
}
|
|
1198
1309
|
}
|
|
1199
1310
|
|
|
1200
|
-
void write(const char * val)
|
|
1311
|
+
void write(const char * val) {
|
|
1201
1312
|
write(std::string(val));
|
|
1202
1313
|
}
|
|
1203
1314
|
|
|
1204
|
-
void write(const enum ggml_type & val)
|
|
1315
|
+
void write(const enum ggml_type & val) {
|
|
1205
1316
|
write(int32_t(val));
|
|
1206
1317
|
}
|
|
1207
1318
|
|
|
1208
|
-
void write(const enum gguf_type & val)
|
|
1319
|
+
void write(const enum gguf_type & val) {
|
|
1209
1320
|
write(int32_t(val));
|
|
1210
1321
|
}
|
|
1211
1322
|
|
|
1212
|
-
void write(const struct gguf_kv & kv)
|
|
1323
|
+
void write(const struct gguf_kv & kv) {
|
|
1213
1324
|
const uint64_t ne = kv.get_ne();
|
|
1214
1325
|
|
|
1215
1326
|
write(kv.get_key());
|
|
@@ -1250,7 +1361,7 @@ struct gguf_writer {
|
|
|
1250
1361
|
}
|
|
1251
1362
|
}
|
|
1252
1363
|
|
|
1253
|
-
void write_tensor_meta(const struct gguf_tensor_info & info)
|
|
1364
|
+
void write_tensor_meta(const struct gguf_tensor_info & info) {
|
|
1254
1365
|
write(info.t.name);
|
|
1255
1366
|
|
|
1256
1367
|
const uint32_t n_dims = ggml_n_dims(&info.t);
|
|
@@ -1263,14 +1374,33 @@ struct gguf_writer {
|
|
|
1263
1374
|
write(info.offset);
|
|
1264
1375
|
}
|
|
1265
1376
|
|
|
1266
|
-
void pad(const size_t alignment)
|
|
1267
|
-
while (
|
|
1377
|
+
void pad(const size_t alignment) {
|
|
1378
|
+
while (written_bytes % alignment != 0) {
|
|
1268
1379
|
const int8_t zero = 0;
|
|
1269
1380
|
write(zero);
|
|
1270
1381
|
}
|
|
1271
1382
|
}
|
|
1383
|
+
};
|
|
1384
|
+
|
|
1385
|
+
// vector buffer based writer
|
|
1386
|
+
struct gguf_writer_buf final : public gguf_writer_base {
|
|
1387
|
+
std::vector<int8_t> & buf;
|
|
1388
|
+
|
|
1389
|
+
gguf_writer_buf(std::vector<int8_t> & buf) : buf(buf) {}
|
|
1272
1390
|
|
|
1273
|
-
|
|
1391
|
+
using gguf_writer_base::write;
|
|
1392
|
+
|
|
1393
|
+
void write(const int8_t val) override {
|
|
1394
|
+
buf.push_back(val);
|
|
1395
|
+
written_bytes++;
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
void write(const std::vector<int8_t> & val) override {
|
|
1399
|
+
buf.insert(buf.end(), val.begin(), val.end());
|
|
1400
|
+
written_bytes += val.size();
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
|
|
1274
1404
|
GGML_ASSERT(buf.size() - offset_data == info.offset);
|
|
1275
1405
|
|
|
1276
1406
|
GGML_ASSERT(ggml_is_contiguous(&info.t));
|
|
@@ -1284,14 +1414,58 @@ struct gguf_writer {
|
|
|
1284
1414
|
GGML_ASSERT(info.t.data);
|
|
1285
1415
|
memcpy(buf.data() + offset, info.t.data, nbytes);
|
|
1286
1416
|
}
|
|
1417
|
+
written_bytes += nbytes;
|
|
1287
1418
|
|
|
1288
1419
|
pad(alignment);
|
|
1289
1420
|
}
|
|
1290
1421
|
};
|
|
1291
1422
|
|
|
1292
|
-
|
|
1293
|
-
|
|
1423
|
+
// file based writer
|
|
1424
|
+
struct gguf_writer_file final : public gguf_writer_base {
|
|
1425
|
+
FILE * file;
|
|
1426
|
+
|
|
1427
|
+
gguf_writer_file(FILE* file) : file(file) {}
|
|
1428
|
+
|
|
1429
|
+
using gguf_writer_base::write;
|
|
1430
|
+
|
|
1431
|
+
void write(const int8_t val) override {
|
|
1432
|
+
const auto real_val = static_cast<uint8_t>(val);
|
|
1433
|
+
const auto ret = fputc(real_val, file);
|
|
1434
|
+
written_bytes++;
|
|
1435
|
+
if (ret != real_val) {
|
|
1436
|
+
throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'");
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
void write(const std::vector<int8_t> & val) override {
|
|
1441
|
+
const auto ret = fwrite(val.data(), 1, val.size(), file);
|
|
1442
|
+
written_bytes += val.size();
|
|
1443
|
+
if (ret != val.size()) {
|
|
1444
|
+
throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'");
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
|
|
1448
|
+
void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
|
|
1449
|
+
GGML_ASSERT(written_bytes - offset_data == info.offset);
|
|
1450
|
+
|
|
1451
|
+
GGML_ASSERT(ggml_is_contiguous(&info.t));
|
|
1452
|
+
const size_t nbytes = ggml_nbytes(&info.t);
|
|
1453
|
+
|
|
1454
|
+
std::vector<int8_t> buf(nbytes);
|
|
1455
|
+
if (info.t.buffer) {
|
|
1456
|
+
ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes);
|
|
1457
|
+
} else {
|
|
1458
|
+
GGML_ASSERT(info.t.data);
|
|
1459
|
+
memcpy(buf.data(), info.t.data, nbytes);
|
|
1460
|
+
}
|
|
1461
|
+
write(buf);
|
|
1294
1462
|
|
|
1463
|
+
pad(alignment);
|
|
1464
|
+
}
|
|
1465
|
+
};
|
|
1466
|
+
|
|
1467
|
+
template <typename writer_t>
|
|
1468
|
+
static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
|
|
1295
1469
|
const int64_t n_kv = gguf_get_n_kv(ctx);
|
|
1296
1470
|
const int64_t n_tensors = gguf_get_n_tensors(ctx);
|
|
1297
1471
|
|
|
@@ -1321,7 +1495,7 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
|
|
|
1321
1495
|
return;
|
|
1322
1496
|
}
|
|
1323
1497
|
|
|
1324
|
-
const size_t offset_data = gw.
|
|
1498
|
+
const size_t offset_data = gw.written_bytes;
|
|
1325
1499
|
|
|
1326
1500
|
// write tensor data
|
|
1327
1501
|
for (int64_t i = 0; i < n_tensors; ++i) {
|
|
@@ -1329,6 +1503,11 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
|
|
|
1329
1503
|
}
|
|
1330
1504
|
}
|
|
1331
1505
|
|
|
1506
|
+
void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
|
|
1507
|
+
gguf_writer_buf gw(buf);
|
|
1508
|
+
gguf_write_out(ctx, gw, only_meta);
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1332
1511
|
bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
|
|
1333
1512
|
FILE * file = ggml_fopen(fname, "wb");
|
|
1334
1513
|
|
|
@@ -1337,11 +1516,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
|
|
|
1337
1516
|
return false;
|
|
1338
1517
|
}
|
|
1339
1518
|
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1519
|
+
try {
|
|
1520
|
+
gguf_writer_file gw(file);
|
|
1521
|
+
gguf_write_out(ctx, gw, only_meta);
|
|
1522
|
+
} catch (const std::runtime_error& ex) {
|
|
1523
|
+
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
|
|
1524
|
+
fclose(file);
|
|
1525
|
+
return false;
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1343
1528
|
fclose(file);
|
|
1344
|
-
return
|
|
1529
|
+
return true;
|
|
1345
1530
|
}
|
|
1346
1531
|
|
|
1347
1532
|
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
|
data/ext/sources/src/whisper.cpp
CHANGED
|
@@ -6701,12 +6701,13 @@ static bool whisper_vad(
|
|
|
6701
6701
|
int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
|
|
6702
6702
|
int segment_end_samples = cs_to_samples(vad_segments->data[i].end);
|
|
6703
6703
|
|
|
6704
|
-
if (i < (int)vad_segments->data.size() - 1) {
|
|
6705
|
-
segment_end_samples += overlap_samples;
|
|
6706
|
-
}
|
|
6707
|
-
|
|
6708
6704
|
segment_start_samples = std::min(segment_start_samples, n_samples - 1);
|
|
6709
6705
|
segment_end_samples = std::min(segment_end_samples, n_samples - 1);
|
|
6706
|
+
int original_segment_length = segment_end_samples - segment_start_samples;
|
|
6707
|
+
|
|
6708
|
+
if (i < (int)vad_segments->data.size() - 1) {
|
|
6709
|
+
segment_end_samples = std::min(segment_end_samples + overlap_samples, n_samples - 1);
|
|
6710
|
+
}
|
|
6710
6711
|
int segment_length = segment_end_samples - segment_start_samples;
|
|
6711
6712
|
if (segment_length > 0) {
|
|
6712
6713
|
whisper_state::vad_segment_info segment;
|
|
@@ -6715,7 +6716,7 @@ static bool whisper_vad(
|
|
|
6715
6716
|
segment.orig_end = vad_segments->data[i].end;
|
|
6716
6717
|
|
|
6717
6718
|
segment.vad_start = samples_to_cs(offset);
|
|
6718
|
-
segment.vad_end = samples_to_cs(offset +
|
|
6719
|
+
segment.vad_end = samples_to_cs(offset + original_segment_length);
|
|
6719
6720
|
|
|
6720
6721
|
// Add segment boundaries to mapping table
|
|
6721
6722
|
vad_time_mapping start_mapping = {segment.vad_start, segment.orig_start};
|
|
@@ -6724,29 +6725,6 @@ static bool whisper_vad(
|
|
|
6724
6725
|
state->vad_mapping_table.push_back(start_mapping);
|
|
6725
6726
|
state->vad_mapping_table.push_back(end_mapping);
|
|
6726
6727
|
|
|
6727
|
-
// Add intermediate points for longer segments to improve interpolation accuracy
|
|
6728
|
-
const int64_t min_segment_length = 100; // 1 second
|
|
6729
|
-
const int64_t point_interval = 20; // Add a point every 200ms
|
|
6730
|
-
|
|
6731
|
-
if (segment.vad_end - segment.vad_start > min_segment_length) {
|
|
6732
|
-
int64_t segment_duration = segment.vad_end - segment.vad_start;
|
|
6733
|
-
int num_points = (int)(segment_duration / point_interval) - 1;
|
|
6734
|
-
|
|
6735
|
-
for (int j = 1; j <= num_points; j++) {
|
|
6736
|
-
int64_t vad_time = segment.vad_start + j * point_interval;
|
|
6737
|
-
|
|
6738
|
-
if (vad_time >= segment.vad_end) continue;
|
|
6739
|
-
|
|
6740
|
-
int64_t vad_elapsed = vad_time - segment.vad_start;
|
|
6741
|
-
int64_t vad_total = segment.vad_end - segment.vad_start;
|
|
6742
|
-
int64_t orig_total = segment.orig_end - segment.orig_start;
|
|
6743
|
-
int64_t orig_time = segment.orig_start + (vad_elapsed * orig_total) / vad_total;
|
|
6744
|
-
|
|
6745
|
-
vad_time_mapping intermediate_mapping = {vad_time, orig_time};
|
|
6746
|
-
state->vad_mapping_table.push_back(intermediate_mapping);
|
|
6747
|
-
}
|
|
6748
|
-
}
|
|
6749
|
-
|
|
6750
6728
|
WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
|
|
6751
6729
|
__func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
|
|
6752
6730
|
ctx->state->vad_segments.push_back(segment);
|
data/sig/whisper.rbs
CHANGED
|
@@ -17,6 +17,21 @@ module Whisper
|
|
|
17
17
|
LOG_LEVEL_ERROR: Integer
|
|
18
18
|
LOG_LEVEL_DEBUG: Integer
|
|
19
19
|
LOG_LEVEL_CONT: Integer
|
|
20
|
+
AHEADS_NONE: Integer
|
|
21
|
+
AHEADS_N_TOP_MOST: Integer
|
|
22
|
+
AHEADS_CUSTOM: Integer
|
|
23
|
+
AHEADS_TINY_EN: Integer
|
|
24
|
+
AHEADS_TINY: Integer
|
|
25
|
+
AHEADS_BASE_EN: Integer
|
|
26
|
+
AHEADS_BASE: Integer
|
|
27
|
+
AHEADS_SMALL_EN: Integer
|
|
28
|
+
AHEADS_SMALL: Integer
|
|
29
|
+
AHEADS_MEDIUM_EN: Integer
|
|
30
|
+
AHEADS_MEDIUM: Integer
|
|
31
|
+
AHEADS_LARGE_V1: Integer
|
|
32
|
+
AHEADS_LARGE_V2: Integer
|
|
33
|
+
AHEADS_LARGE_V3: Integer
|
|
34
|
+
AHEADS_LARGE_V3_TURBO: Integer
|
|
20
35
|
|
|
21
36
|
def self.lang_max_id: () -> Integer
|
|
22
37
|
def self.lang_id: (string name) -> Integer
|
|
@@ -37,8 +52,8 @@ module Whisper
|
|
|
37
52
|
# puts text
|
|
38
53
|
# end
|
|
39
54
|
#
|
|
40
|
-
def transcribe: (
|
|
41
|
-
| (
|
|
55
|
+
def transcribe: (path, Params, ?n_processors: Integer) -> self
|
|
56
|
+
| (path, Params, ?n_processors: Integer) { (String) -> void } -> self
|
|
42
57
|
|
|
43
58
|
def model_n_vocab: () -> Integer
|
|
44
59
|
def model_n_audio_ctx: () -> Integer
|
|
@@ -120,6 +135,30 @@ module Whisper
|
|
|
120
135
|
|
|
121
136
|
def to_srt: () -> String
|
|
122
137
|
def to_webvtt: () -> String
|
|
138
|
+
|
|
139
|
+
class Params
|
|
140
|
+
def self.new: (
|
|
141
|
+
use_gpu: boolish,
|
|
142
|
+
flash_attn: boolish,
|
|
143
|
+
gpu_device: Integer,
|
|
144
|
+
dtw_token_timestamps: boolish,
|
|
145
|
+
dtw_aheads_preset: Integer,
|
|
146
|
+
dtw_n_top: Integer | nil,
|
|
147
|
+
) -> instance
|
|
148
|
+
|
|
149
|
+
def use_gpu=: (boolish) -> boolish
|
|
150
|
+
def use_gpu: () -> (true | false)
|
|
151
|
+
def flash_attn=: (boolish) -> boolish
|
|
152
|
+
def flash_attn: () -> (true | false)
|
|
153
|
+
def gpu_device=: (Integer) -> Integer
|
|
154
|
+
def gpu_device: () -> Integer
|
|
155
|
+
def dtw_token_timestamps=: (boolish) -> boolish
|
|
156
|
+
def dtw_token_timestamps: () -> (true | false)
|
|
157
|
+
def dtw_aheads_preset=: (Integer) -> Integer
|
|
158
|
+
def dtw_aheads_preset: () -> Integer
|
|
159
|
+
def dtw_n_top=: (Integer | nil) -> (Integer | nil)
|
|
160
|
+
def dtw_n_top: () -> (Integer | nil)
|
|
161
|
+
end
|
|
123
162
|
end
|
|
124
163
|
|
|
125
164
|
class Params
|
|
@@ -603,6 +642,8 @@ module Whisper
|
|
|
603
642
|
|
|
604
643
|
class Context
|
|
605
644
|
def self.new: (String | path | ::URI::HTTP model_name_or_path) -> instance
|
|
645
|
+
def segments_from_samples: (Params, Array[Float] samples, ?Integer n_samples) -> Segments
|
|
646
|
+
| (Params, _Samples, ?Integer n_samples) -> Segments
|
|
606
647
|
def detect: (path wav_file_path, Params) -> Segments
|
|
607
648
|
end
|
|
608
649
|
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require_relative "helper"
|
|
2
|
+
|
|
3
|
+
class TestContextParams < TestBase
|
|
4
|
+
PARAM_NAMES = [
|
|
5
|
+
:use_gpu,
|
|
6
|
+
:flash_attn,
|
|
7
|
+
:gpu_device,
|
|
8
|
+
:dtw_token_timestamps,
|
|
9
|
+
:dtw_aheads_preset,
|
|
10
|
+
:dtw_n_top
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
def test_new
|
|
14
|
+
params = Whisper::Context::Params.new
|
|
15
|
+
assert_instance_of Whisper::Context::Params, params
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_attributes
|
|
19
|
+
params = Whisper::Context::Params.new
|
|
20
|
+
|
|
21
|
+
assert_true params.use_gpu
|
|
22
|
+
params.use_gpu = false
|
|
23
|
+
assert_false params.use_gpu
|
|
24
|
+
|
|
25
|
+
assert_true params.flash_attn
|
|
26
|
+
params.flash_attn = false
|
|
27
|
+
assert_false params.flash_attn
|
|
28
|
+
|
|
29
|
+
assert_equal 0, params.gpu_device
|
|
30
|
+
params.gpu_device = 1
|
|
31
|
+
assert_equal 1, params.gpu_device
|
|
32
|
+
|
|
33
|
+
assert_false params.dtw_token_timestamps
|
|
34
|
+
params.dtw_token_timestamps = true
|
|
35
|
+
assert_true params.dtw_token_timestamps
|
|
36
|
+
|
|
37
|
+
assert_equal Whisper::AHEADS_NONE, params.dtw_aheads_preset
|
|
38
|
+
params.dtw_aheads_preset =Whisper::AHEADS_BASE
|
|
39
|
+
assert_equal Whisper::AHEADS_BASE, params.dtw_aheads_preset
|
|
40
|
+
|
|
41
|
+
assert_nil params.dtw_n_top
|
|
42
|
+
params.dtw_n_top = 6
|
|
43
|
+
assert_equal 6, params.dtw_n_top
|
|
44
|
+
params.dtw_n_top = nil
|
|
45
|
+
assert_nil params.dtw_n_top
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def test_new_with_kw_args
|
|
49
|
+
params = Whisper::Context::Params.new(use_gpu: false)
|
|
50
|
+
assert_false params.use_gpu
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def test_new_with_kw_wargs_non_existent
|
|
54
|
+
assert_raise ArgumentError do
|
|
55
|
+
Whisper::Context::Params.new(non_existent: "value")
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
data(PARAM_NAMES.collect {|param| [param, param]}.to_h)
|
|
60
|
+
def test_new_with_kw_args_default_values(param)
|
|
61
|
+
default_params = Whisper::Context::Params.new
|
|
62
|
+
default_value = default_params.send(param)
|
|
63
|
+
value = if param == :dtw_n_top
|
|
64
|
+
6
|
|
65
|
+
else
|
|
66
|
+
case default_value
|
|
67
|
+
in true | false
|
|
68
|
+
!default_value
|
|
69
|
+
in Integer
|
|
70
|
+
default_value + 1
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
params = Whisper::Context::Params.new(param => value)
|
|
74
|
+
assert_equal value, params.send(param)
|
|
75
|
+
|
|
76
|
+
PARAM_NAMES.reject {|name| name == param}.each do |name|
|
|
77
|
+
expected = default_params.send(name)
|
|
78
|
+
actual = params.send(name)
|
|
79
|
+
assert_equal expected, actual
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
data/test/test_token.rb
CHANGED
|
@@ -56,6 +56,17 @@ class TestToken < TestBase
|
|
|
56
56
|
@segment.each_token.collect(&:text)
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
+
def test_token_timestamps
|
|
60
|
+
params = Whisper::Params.new(token_timestamps: true)
|
|
61
|
+
whisper.transcribe(TestBase::AUDIO, params)
|
|
62
|
+
prev = -1
|
|
63
|
+
whisper.each_segment.first.each_token do |token|
|
|
64
|
+
assert token.start_time >= prev
|
|
65
|
+
assert token.end_time >= token.start_time
|
|
66
|
+
prev = token.end_time
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
59
70
|
def test_deconstruct_keys_with_nil
|
|
60
71
|
keys = %i[id tid probability log_probability pt ptsum t_dtw voice_length start_time end_time text]
|
|
61
72
|
expected = keys.collect {|key| [key, @token.send(key)] }.to_h
|