whispercpp 1.3.6 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.document +3 -0
- data/.rdoc_options +2 -0
- data/README.md +38 -5
- data/Rakefile +18 -3
- data/ext/dependencies.rb +10 -4
- data/ext/dependencies_for_windows.rb +17 -0
- data/ext/extconf.rb +20 -8
- data/ext/options.rb +54 -14
- data/ext/options_for_windows.rb +51 -0
- data/ext/ruby_whisper.c +36 -42
- data/ext/ruby_whisper.h +135 -0
- data/ext/ruby_whisper_context.c +107 -28
- data/ext/ruby_whisper_log_queue.c +180 -0
- data/ext/ruby_whisper_log_settable.h +47 -0
- data/ext/ruby_whisper_parakeet.c +49 -0
- data/ext/ruby_whisper_parakeet_context.c +304 -0
- data/ext/ruby_whisper_parakeet_context_params.c +117 -0
- data/ext/ruby_whisper_parakeet_model.c +84 -0
- data/ext/ruby_whisper_parakeet_params.c +548 -0
- data/ext/ruby_whisper_parakeet_segment.c +157 -0
- data/ext/ruby_whisper_parakeet_token.c +188 -0
- data/ext/ruby_whisper_parakeet_transcribe.cpp +58 -0
- data/ext/ruby_whisper_params.c +256 -65
- data/ext/ruby_whisper_segment.c +6 -6
- data/ext/ruby_whisper_transcribe.cpp +42 -15
- data/ext/sources/CMakeLists.txt +41 -3
- data/ext/sources/CMakePresets.json +95 -0
- data/ext/sources/cmake/parakeet-config.cmake.in +30 -0
- data/ext/sources/cmake/parakeet.pc.in +10 -0
- data/ext/sources/cmake/whisper.pc.in +1 -1
- data/ext/sources/examples/CMakeLists.txt +4 -2
- data/ext/sources/examples/bench/bench.cpp +1 -1
- data/ext/sources/examples/cli/cli.cpp +43 -9
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +139 -67
- data/ext/sources/examples/common-whisper.h +11 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +211 -341
- data/ext/sources/examples/parakeet-cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/parakeet-cli/parakeet-cli.cpp +243 -0
- data/ext/sources/examples/parakeet-quantize/CMakeLists.txt +7 -0
- data/ext/sources/examples/parakeet-quantize/parakeet-quantize.cpp +230 -0
- data/ext/sources/examples/server/server.cpp +199 -163
- data/ext/sources/ggml/CMakeLists.txt +21 -13
- data/ext/sources/ggml/cmake/FindNCCL.cmake +36 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +12 -2
- data/ext/sources/ggml/include/ggml-alloc.h +1 -0
- data/ext/sources/ggml/include/ggml-backend.h +72 -10
- data/ext/sources/ggml/include/ggml-cuda.h +3 -0
- data/ext/sources/ggml/include/ggml-rpc.h +3 -3
- data/ext/sources/ggml/include/ggml.h +101 -9
- data/ext/sources/ggml/include/gguf.h +10 -2
- data/ext/sources/ggml/src/CMakeLists.txt +22 -5
- data/ext/sources/ggml/src/ggml-alloc.c +5 -1
- data/ext/sources/ggml/src/ggml-backend-impl.h +22 -2
- data/ext/sources/ggml/src/ggml-backend-meta.cpp +2263 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +12 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +110 -9
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +672 -257
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +71 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +20 -10
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +211 -30
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +58 -29
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +2 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +16 -16
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +116 -7
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +65 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +151 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +0 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +4279 -1292
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +5 -35
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +0 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +72 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +177 -27
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +95 -5
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +146 -134
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +88 -70
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +372 -73
- data/ext/sources/ggml/src/ggml-cpu/ops.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +55 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +90 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +3 -16
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1402 -687
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +597 -2766
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +182 -19
- data/ext/sources/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +37 -53
- data/ext/sources/ggml/src/ggml-cpu/vec.h +225 -240
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +17 -7
- data/ext/sources/ggml/src/ggml-cuda/allreduce.cu +971 -0
- data/ext/sources/ggml/src/ggml-cuda/allreduce.cuh +29 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +62 -26
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +44 -18
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +242 -28
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +120 -114
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +45 -21
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +53 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +14 -6
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +22 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +278 -44
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +331 -130
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +12 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +126 -27
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +40 -15
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +18 -9
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +152 -49
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/fwht.cu +101 -0
- data/ext/sources/ggml/src/ggml-cuda/fwht.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +84 -35
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +34 -12
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1069 -609
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +32 -29
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +4 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +242 -195
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +18 -12
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +502 -423
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +19 -12
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +485 -57
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +6 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +36 -10
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +23 -7
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +133 -26
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +5 -1
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +11 -4
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +4 -1
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +14 -6
- data/ext/sources/ggml/src/ggml-cuda/snake.cu +72 -0
- data/ext/sources/ggml/src/ggml-cuda/snake.cuh +8 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +4 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +45 -13
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +40 -18
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +8 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +26 -23
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +31 -2
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +80 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +7 -2
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -4
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +2 -1
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +1428 -743
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -7
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +53 -84
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +25 -12
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +165 -184
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +5 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/concat-ops.c +277 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +170 -127
- data/ext/sources/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +125 -97
- data/ext/sources/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +1148 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +148 -42
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +2 -2
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +252 -62
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +9 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +87 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1878 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +2066 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-ops.c +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-ops.h +88 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +96 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +182 -57
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +9 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +71 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +27 -10
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +63 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +9 -8
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-flash-attn.h +47 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-log.h +65 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-pow.h +42 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +1 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sin-cos.h +90 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +5 -8
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +529 -815
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2522 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/pad-ops.c +547 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +291 -95
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +59 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +121 -133
- data/ext/sources/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +244 -151
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +6 -6
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +719 -45
- data/ext/sources/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-opnode.h +272 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +3 -1
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +22 -9
- data/ext/sources/ggml/src/ggml-impl.h +6 -1
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +138 -13
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +32 -1
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +164 -28
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +80 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +190 -19
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +2 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +39 -26
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +823 -322
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +5 -6
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +54 -5
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +12248 -5907
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +67 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +59 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +1819 -112
- data/ext/sources/ggml/src/ggml-opencl/kernels/gated_delta_net.cl +249 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +306 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +256 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +258 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_k_f32_ns.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +260 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +262 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_k_f32_ns.cl +288 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q6_k_f32_ns.cl +267 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mat_Ab_Bi_8x4.cl → gemm_noshuffle_q4_0_f32.cl} +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_0_f32.cl +131 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_1_f32.cl +134 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mm_q8_0_f32_8x4.cl → gemm_noshuffle_q8_0_f32.cl} +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +120 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +123 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_k_f32_ns.cl +155 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +123 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_k_f32_ns.cl +160 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q6_k_f32_ns.cl +141 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle_general.cl → gemv_noshuffle_q4_0_f32.cl} +5 -5
- data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle.cl → gemv_noshuffle_q4_0_f32_spec.cl} +5 -5
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_0_f32.cl +291 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_1_f32.cl +294 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +15 -9
- data/ext/sources/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_0_f32_l4_lm.cl +173 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_1_f32_l4_lm.cl +175 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_0_f32.cl +241 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_0_f32_flat.cl +243 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_1_f32.cl +243 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_1_f32_flat.cl +247 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +48 -64
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +15 -5
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +18 -11
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +35 -13
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +264 -192
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +33 -7
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +1 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +1 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +27 -3
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +67 -36
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +1 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +101 -44
- data/ext/sources/ggml/src/ggml-openvino/utils.h +23 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +1 -0
- data/ext/sources/ggml/src/ggml-quants.c +289 -114
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +24 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +167 -311
- data/ext/sources/ggml/src/ggml-rpc/transport.cpp +683 -0
- data/ext/sources/ggml/src/ggml-rpc/transport.h +34 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +50 -4
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/common.cpp +74 -2
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +41 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +115 -13
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cumsum.cpp +148 -0
- data/ext/sources/ggml/src/ggml-sycl/cumsum.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +663 -0
- data/ext/sources/ggml/src/ggml-sycl/diag.cpp +67 -0
- data/ext/sources/ggml/src/ggml-sycl/diag.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +586 -6
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1 -90
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +0 -2
- data/ext/sources/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +7 -5
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +76 -168
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/fill.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fill.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +69 -31
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +79 -3
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +823 -190
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +353 -89
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +5 -3
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1344 -26
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +16 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +27 -27
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +71 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +7 -1
- data/ext/sources/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
- data/ext/sources/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +6 -1
- data/ext/sources/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +62 -10
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +18 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/type.hpp +112 -0
- data/ext/sources/ggml/src/ggml-sycl/upscale.cpp +410 -0
- data/ext/sources/ggml/src/ggml-sycl/upscale.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +215 -53
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +4 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +2 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +2 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +1 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +1 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +0 -2
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +2060 -535
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +6 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +146 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +25 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +643 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dot_product_funcs.glsl +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2_decode_vector.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +197 -48
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +60 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +115 -113
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +122 -31
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +131 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fwht.comp +115 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +125 -64
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +10 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +16 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +76 -54
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +122 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +6 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +1 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +88 -55
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +43 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +159 -125
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +8 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +24 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +5 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +3 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/snake.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +11 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +79 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +171 -147
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +5 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +2202 -283
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2610 -1403
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +37 -7
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +8 -7
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +76 -95
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +19 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{cpy.tmpl.wgsl → cpy.wgsl} +25 -50
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +107 -184
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_quant_staging.tmpl +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +397 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +619 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +149 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +183 -78
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +655 -495
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +52 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +8 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +5 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +80 -409
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1432 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_q_acc.tmpl +303 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/quant_inner_loops.tmpl +21 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/quantize_q8.wgsl +173 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{rope.tmpl.wgsl → rope.wgsl} +71 -142
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +6 -4
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +2 -3
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows_quant.wgsl +224 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{soft_max.tmpl.wgsl → soft_max.wgsl} +106 -206
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +68 -48
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +18 -14
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +244 -10
- data/ext/sources/ggml/src/ggml.c +110 -28
- data/ext/sources/ggml/src/gguf.cpp +173 -28
- data/ext/sources/include/parakeet.h +342 -0
- data/ext/sources/include/whisper.h +10 -0
- data/ext/sources/media/matmul.png +0 -0
- data/ext/sources/src/CMakeLists.txt +23 -0
- data/ext/sources/src/parakeet-arch.h +188 -0
- data/ext/sources/src/parakeet.cpp +3838 -0
- data/ext/sources/src/whisper.cpp +56 -12
- data/extsources.rb +26 -10
- data/lib/whisper/log_settable.rb +36 -0
- data/lib/whisper/model/uri.rb +13 -1
- data/lib/whisper/output.rb +74 -0
- data/sig/whisper.rbs +411 -62
- data/test/helper.rb +2 -0
- data/test/jfk_reader/jfk_reader.c +50 -7
- data/test/test_callback.rb +1 -0
- data/test/test_package.rb +6 -5
- data/test/test_parakeet.rb +28 -0
- data/test/test_parakeet_callback.rb +107 -0
- data/test/test_parakeet_context.rb +116 -0
- data/test/test_parakeet_context_params.rb +24 -0
- data/test/test_parakeet_model.rb +21 -0
- data/test/test_parakeet_params.rb +78 -0
- data/test/test_parakeet_segment.rb +42 -0
- data/test/test_parakeet_token.rb +73 -0
- data/test/test_params.rb +2 -0
- data/test/test_vad_segment.rb +1 -1
- data/test/test_whisper.rb +24 -6
- data/whispercpp.gemspec +2 -2
- metadata +215 -281
- data/ext/sources/bindings/javascript/CMakeLists.txt +0 -41
- data/ext/sources/bindings/javascript/emscripten.cpp +0 -93
- data/ext/sources/bindings/javascript/libwhisper.worker.js +0 -1
- data/ext/sources/bindings/javascript/package.json +0 -26
- data/ext/sources/bindings/javascript/whisper.js +0 -19
- data/ext/sources/examples/addon.node/CMakeLists.txt +0 -31
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +0 -133
- data/ext/sources/examples/addon.node/addon.cpp +0 -557
- data/ext/sources/examples/addon.node/index.js +0 -59
- data/ext/sources/examples/addon.node/package.json +0 -16
- data/ext/sources/examples/addon.node/vad-example.js +0 -132
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +0 -49
- data/ext/sources/examples/bench.wasm/emscripten.cpp +0 -87
- data/ext/sources/examples/bench.wasm/index-tmpl.html +0 -285
- data/ext/sources/examples/coi-serviceworker.js +0 -146
- data/ext/sources/examples/command/CMakeLists.txt +0 -10
- data/ext/sources/examples/command/command.cpp +0 -802
- data/ext/sources/examples/command/commands.txt +0 -9
- data/ext/sources/examples/command.wasm/CMakeLists.txt +0 -50
- data/ext/sources/examples/command.wasm/emscripten.cpp +0 -327
- data/ext/sources/examples/command.wasm/index-tmpl.html +0 -415
- data/ext/sources/examples/generate-karaoke.sh +0 -57
- data/ext/sources/examples/helpers.js +0 -191
- data/ext/sources/examples/livestream.sh +0 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +0 -10
- data/ext/sources/examples/lsp/lsp.cpp +0 -471
- data/ext/sources/examples/lsp/whisper.vim +0 -362
- data/ext/sources/examples/python/test_whisper_processor.py +0 -7
- data/ext/sources/examples/python/whisper_processor.py +0 -54
- data/ext/sources/examples/server/bench.js +0 -29
- data/ext/sources/examples/server.py +0 -120
- data/ext/sources/examples/stream/CMakeLists.txt +0 -10
- data/ext/sources/examples/stream/stream.cpp +0 -437
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +0 -49
- data/ext/sources/examples/stream.wasm/emscripten.cpp +0 -216
- data/ext/sources/examples/stream.wasm/index-tmpl.html +0 -491
- data/ext/sources/examples/sycl/CMakeLists.txt +0 -9
- data/ext/sources/examples/sycl/build.sh +0 -22
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +0 -11
- data/ext/sources/examples/sycl/run-whisper.sh +0 -17
- data/ext/sources/examples/talk-llama/CMakeLists.txt +0 -48
- data/ext/sources/examples/talk-llama/eleven-labs.py +0 -80
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +0 -488
- data/ext/sources/examples/talk-llama/llama-adapter.h +0 -89
- data/ext/sources/examples/talk-llama/llama-arch.cpp +0 -2877
- data/ext/sources/examples/talk-llama/llama-arch.h +0 -628
- data/ext/sources/examples/talk-llama/llama-batch.cpp +0 -919
- data/ext/sources/examples/talk-llama/llama-batch.h +0 -173
- data/ext/sources/examples/talk-llama/llama-chat.cpp +0 -896
- data/ext/sources/examples/talk-llama/llama-chat.h +0 -71
- data/ext/sources/examples/talk-llama/llama-context.cpp +0 -3633
- data/ext/sources/examples/talk-llama/llama-context.h +0 -359
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +0 -5
- data/ext/sources/examples/talk-llama/llama-cparams.h +0 -47
- data/ext/sources/examples/talk-llama/llama-ext.h +0 -12
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +0 -1464
- data/ext/sources/examples/talk-llama/llama-grammar.h +0 -194
- data/ext/sources/examples/talk-llama/llama-graph.cpp +0 -2735
- data/ext/sources/examples/talk-llama/llama-graph.h +0 -1031
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +0 -258
- data/ext/sources/examples/talk-llama/llama-hparams.h +0 -353
- data/ext/sources/examples/talk-llama/llama-impl.cpp +0 -171
- data/ext/sources/examples/talk-llama/llama-impl.h +0 -75
- data/ext/sources/examples/talk-llama/llama-io.cpp +0 -15
- data/ext/sources/examples/talk-llama/llama-io.h +0 -35
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +0 -330
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +0 -137
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2285
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +0 -389
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +0 -533
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +0 -275
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +0 -140
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +0 -268
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +0 -139
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +0 -1165
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +0 -182
- data/ext/sources/examples/talk-llama/llama-memory.cpp +0 -59
- data/ext/sources/examples/talk-llama/llama-memory.h +0 -122
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +0 -752
- data/ext/sources/examples/talk-llama/llama-mmap.h +0 -73
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +0 -1655
- data/ext/sources/examples/talk-llama/llama-model-loader.h +0 -206
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +0 -299
- data/ext/sources/examples/talk-llama/llama-model-saver.h +0 -40
- data/ext/sources/examples/talk-llama/llama-model.cpp +0 -9056
- data/ext/sources/examples/talk-llama/llama-model.h +0 -597
- data/ext/sources/examples/talk-llama/llama-quant.cpp +0 -1304
- data/ext/sources/examples/talk-llama/llama-quant.h +0 -1
- data/ext/sources/examples/talk-llama/llama-sampler.cpp +0 -3885
- data/ext/sources/examples/talk-llama/llama-sampler.h +0 -42
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +0 -3970
- data/ext/sources/examples/talk-llama/llama-vocab.h +0 -187
- data/ext/sources/examples/talk-llama/llama.cpp +0 -1194
- data/ext/sources/examples/talk-llama/llama.h +0 -1573
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +0 -190
- data/ext/sources/examples/talk-llama/models/apertus.cpp +0 -125
- data/ext/sources/examples/talk-llama/models/arcee.cpp +0 -135
- data/ext/sources/examples/talk-llama/models/arctic.cpp +0 -137
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +0 -143
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +0 -133
- data/ext/sources/examples/talk-llama/models/bert.cpp +0 -184
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +0 -145
- data/ext/sources/examples/talk-llama/models/bloom.cpp +0 -101
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +0 -178
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +0 -111
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +0 -102
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +0 -134
- data/ext/sources/examples/talk-llama/models/command-r.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/deci.cpp +0 -135
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +0 -142
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +0 -262
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +0 -445
- data/ext/sources/examples/talk-llama/models/dots1.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/dream.cpp +0 -105
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +0 -148
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +0 -110
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +0 -97
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +0 -145
- data/ext/sources/examples/talk-llama/models/exaone.cpp +0 -114
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +0 -111
- data/ext/sources/examples/talk-llama/models/falcon.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +0 -116
- data/ext/sources/examples/talk-llama/models/gemma.cpp +0 -112
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +0 -155
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +0 -384
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +0 -170
- data/ext/sources/examples/talk-llama/models/glm4.cpp +0 -157
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +0 -105
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +0 -144
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +0 -195
- data/ext/sources/examples/talk-llama/models/granite.cpp +0 -210
- data/ext/sources/examples/talk-llama/models/grok.cpp +0 -159
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +0 -139
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +0 -153
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/jais.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/jais2.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/jamba.cpp +0 -106
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +0 -381
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +0 -196
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/llada.cpp +0 -99
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +0 -178
- data/ext/sources/examples/talk-llama/models/llama.cpp +0 -175
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +0 -117
- data/ext/sources/examples/talk-llama/models/mamba-base.cpp +0 -289
- data/ext/sources/examples/talk-llama/models/mamba.cpp +0 -54
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +0 -129
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +0 -200
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +0 -160
- data/ext/sources/examples/talk-llama/models/models.h +0 -704
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +0 -109
- data/ext/sources/examples/talk-llama/models/mpt.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +0 -162
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +0 -104
- data/ext/sources/examples/talk-llama/models/olmo.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +0 -150
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +0 -127
- data/ext/sources/examples/talk-llama/models/openelm.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/orion.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/phi2.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/phi3.cpp +0 -152
- data/ext/sources/examples/talk-llama/models/plamo.cpp +0 -110
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +0 -320
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/plm.cpp +0 -169
- data/ext/sources/examples/talk-llama/models/qwen.cpp +0 -108
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +0 -151
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +0 -117
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +0 -381
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +0 -422
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +0 -131
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +0 -525
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +0 -140
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/refact.cpp +0 -94
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +0 -164
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +0 -94
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +0 -137
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +0 -90
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +0 -146
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +0 -100
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +0 -165
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +0 -166
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +0 -96
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +0 -149
- data/ext/sources/examples/talk-llama/models/xverse.cpp +0 -108
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +0 -23
- data/ext/sources/examples/talk-llama/speak +0 -40
- data/ext/sources/examples/talk-llama/speak.bat +0 -1
- data/ext/sources/examples/talk-llama/speak.ps1 +0 -14
- data/ext/sources/examples/talk-llama/talk-llama.cpp +0 -813
- data/ext/sources/examples/talk-llama/unicode-data.cpp +0 -7034
- data/ext/sources/examples/talk-llama/unicode-data.h +0 -20
- data/ext/sources/examples/talk-llama/unicode.cpp +0 -1103
- data/ext/sources/examples/talk-llama/unicode.h +0 -111
- data/ext/sources/examples/wchess/CMakeLists.txt +0 -10
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +0 -19
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +0 -803
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +0 -33
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +0 -193
- data/ext/sources/examples/wchess/libwchess/WChess.h +0 -63
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +0 -117
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +0 -8
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +0 -253
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +0 -50
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +0 -118
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +0 -659
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +0 -99
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +0 -155
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +0 -153
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +0 -26
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +0 -123
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +0 -17
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +0 -333
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +0 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +0 -182
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +0 -323
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +0 -718
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +0 -123
- data/ext/sources/tests/CMakeLists.txt +0 -112
- data/ext/sources/tests/earnings21/eval.mk +0 -58
- data/ext/sources/tests/earnings21/eval.py +0 -68
- data/ext/sources/tests/earnings21/normalizers/__init__.py +0 -2
- data/ext/sources/tests/earnings21/normalizers/basic.py +0 -80
- data/ext/sources/tests/earnings21/normalizers/english.json +0 -1741
- data/ext/sources/tests/earnings21/normalizers/english.py +0 -550
- data/ext/sources/tests/earnings21/requirements.txt +0 -6
- data/ext/sources/tests/en-0-ref.txt +0 -1
- data/ext/sources/tests/en-1-ref.txt +0 -1
- data/ext/sources/tests/en-2-ref.txt +0 -1
- data/ext/sources/tests/es-0-ref.txt +0 -1
- data/ext/sources/tests/librispeech/eval.mk +0 -39
- data/ext/sources/tests/librispeech/eval.py +0 -47
- data/ext/sources/tests/librispeech/normalizers/__init__.py +0 -2
- data/ext/sources/tests/librispeech/normalizers/basic.py +0 -80
- data/ext/sources/tests/librispeech/normalizers/english.json +0 -1741
- data/ext/sources/tests/librispeech/normalizers/english.py +0 -550
- data/ext/sources/tests/librispeech/requirements.txt +0 -6
- data/ext/sources/tests/run-tests.sh +0 -130
- data/ext/sources/tests/test-c.c +0 -3
- data/ext/sources/tests/test-vad-full.cpp +0 -56
- data/ext/sources/tests/test-vad.cpp +0 -83
- data/ext/sources/tests/test-whisper.js +0 -58
- data/lib/whisper/context.rb +0 -15
- data/lib/whisper/segment.rb +0 -58
- /data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle_general_q8_0_f32.cl → gemv_noshuffle_q8_0_f32.cl} +0 -0
|
@@ -664,6 +664,7 @@ void ggml_compute_forward_add(
|
|
|
664
664
|
{
|
|
665
665
|
ggml_compute_forward_add_non_quantized(params, dst);
|
|
666
666
|
} break;
|
|
667
|
+
case GGML_TYPE_Q1_0:
|
|
667
668
|
case GGML_TYPE_Q4_0:
|
|
668
669
|
case GGML_TYPE_Q4_1:
|
|
669
670
|
case GGML_TYPE_Q5_0:
|
|
@@ -1113,6 +1114,7 @@ void ggml_compute_forward_add1(
|
|
|
1113
1114
|
GGML_ABORT("fatal error");
|
|
1114
1115
|
}
|
|
1115
1116
|
} break;
|
|
1117
|
+
case GGML_TYPE_Q1_0:
|
|
1116
1118
|
case GGML_TYPE_Q4_0:
|
|
1117
1119
|
case GGML_TYPE_Q4_1:
|
|
1118
1120
|
case GGML_TYPE_Q5_0:
|
|
@@ -1242,6 +1244,7 @@ void ggml_compute_forward_acc(
|
|
|
1242
1244
|
} break;
|
|
1243
1245
|
case GGML_TYPE_F16:
|
|
1244
1246
|
case GGML_TYPE_BF16:
|
|
1247
|
+
case GGML_TYPE_Q1_0:
|
|
1245
1248
|
case GGML_TYPE_Q4_0:
|
|
1246
1249
|
case GGML_TYPE_Q4_1:
|
|
1247
1250
|
case GGML_TYPE_Q5_0:
|
|
@@ -2232,8 +2235,42 @@ static void ggml_compute_forward_fill_f32(const ggml_compute_params * params, gg
|
|
|
2232
2235
|
}
|
|
2233
2236
|
}
|
|
2234
2237
|
|
|
2238
|
+
static void ggml_compute_forward_fill_f16(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
2239
|
+
const ggml_fp16_t c = GGML_CPU_FP32_TO_FP16(ggml_get_op_params_f32(dst, 0));
|
|
2240
|
+
|
|
2241
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
|
2242
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
|
2243
|
+
|
|
2244
|
+
const auto [ir0, ir1] = get_thread_range(params, dst);
|
|
2245
|
+
|
|
2246
|
+
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
|
2247
|
+
const int64_t i03 = ir/(ne2*ne1);
|
|
2248
|
+
const int64_t i02 = (ir - i03*ne2*ne1)/ne1;
|
|
2249
|
+
const int64_t i01 = (ir - i03*ne2*ne1 - i02*ne1);
|
|
2250
|
+
|
|
2251
|
+
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
|
|
2252
|
+
|
|
2253
|
+
ggml_vec_set_f16(ne0, dst_ptr, c);
|
|
2254
|
+
}
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2235
2257
|
void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
2236
|
-
|
|
2258
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
2259
|
+
|
|
2260
|
+
switch (src0->type) {
|
|
2261
|
+
case GGML_TYPE_F32:
|
|
2262
|
+
{
|
|
2263
|
+
ggml_compute_forward_fill_f32(params, dst);
|
|
2264
|
+
} break;
|
|
2265
|
+
case GGML_TYPE_F16:
|
|
2266
|
+
{
|
|
2267
|
+
ggml_compute_forward_fill_f16(params, dst);
|
|
2268
|
+
} break;
|
|
2269
|
+
default:
|
|
2270
|
+
{
|
|
2271
|
+
GGML_ABORT("unsupported type for ggml_compute_forward_fill: %s", ggml_type_name(src0->type));
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2237
2274
|
}
|
|
2238
2275
|
|
|
2239
2276
|
// ggml_compute_tri
|
|
@@ -3710,11 +3747,27 @@ void ggml_compute_forward_norm(
|
|
|
3710
3747
|
|
|
3711
3748
|
// ggml_compute_forward_group_rms_norm
|
|
3712
3749
|
|
|
3750
|
+
// fusion kinds that can be combined with the rms_norm computation in a single pass.
|
|
3751
|
+
// extend this enum when adding new fused variants (e.g. FUSE_ADD, FUSE_MUL_ADD, ...).
|
|
3752
|
+
enum ggml_rms_norm_fuse_op {
|
|
3753
|
+
GGML_RMS_NORM_FUSE_OP_NONE,
|
|
3754
|
+
GGML_RMS_NORM_FUSE_OP_MUL,
|
|
3755
|
+
};
|
|
3756
|
+
|
|
3757
|
+
template <ggml_rms_norm_fuse_op FUSE_OP>
|
|
3713
3758
|
static void ggml_compute_forward_rms_norm_f32(
|
|
3714
3759
|
const ggml_compute_params * params,
|
|
3715
|
-
ggml_tensor *
|
|
3760
|
+
ggml_tensor * dst_rms_norm,
|
|
3761
|
+
ggml_tensor * dst_fused = nullptr) {
|
|
3716
3762
|
|
|
3717
|
-
const ggml_tensor * src0 =
|
|
3763
|
+
const ggml_tensor * src0 = dst_rms_norm->src[0];
|
|
3764
|
+
const ggml_tensor * src1 = nullptr;
|
|
3765
|
+
ggml_tensor * dst = dst_rms_norm;
|
|
3766
|
+
|
|
3767
|
+
if constexpr (FUSE_OP == GGML_RMS_NORM_FUSE_OP_MUL) {
|
|
3768
|
+
src1 = (dst_fused->src[0] == dst_rms_norm) ? dst_fused->src[1] : dst_fused->src[0];
|
|
3769
|
+
dst = dst_fused;
|
|
3770
|
+
}
|
|
3718
3771
|
|
|
3719
3772
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
3720
3773
|
|
|
@@ -3723,11 +3776,10 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
|
3723
3776
|
const int ith = params->ith;
|
|
3724
3777
|
const int nth = params->nth;
|
|
3725
3778
|
|
|
3726
|
-
|
|
3779
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
3727
3780
|
|
|
3728
3781
|
float eps;
|
|
3729
|
-
memcpy(&eps,
|
|
3730
|
-
|
|
3782
|
+
memcpy(&eps, dst_rms_norm->op_params, sizeof(float));
|
|
3731
3783
|
GGML_ASSERT(eps >= 0.0f);
|
|
3732
3784
|
|
|
3733
3785
|
// TODO: optimize
|
|
@@ -3737,25 +3789,32 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
|
3737
3789
|
const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
|
3738
3790
|
|
|
3739
3791
|
ggml_float sum = 0.0;
|
|
3792
|
+
// worth switching to explicit SIMD?
|
|
3740
3793
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
|
3741
3794
|
sum += (ggml_float)(x[i00] * x[i00]);
|
|
3742
3795
|
}
|
|
3743
3796
|
|
|
3744
|
-
const float mean
|
|
3745
|
-
|
|
3746
|
-
float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
|
3747
|
-
|
|
3748
|
-
memcpy(y, x, ne00 * sizeof(float));
|
|
3749
|
-
// for (int i00 = 0; i00 < ne00; i00++) {
|
|
3750
|
-
// y[i00] = x[i00];
|
|
3751
|
-
// }
|
|
3752
|
-
|
|
3797
|
+
const float mean = sum/ne00;
|
|
3753
3798
|
const float scale = 1.0f/sqrtf(mean + eps);
|
|
3754
3799
|
|
|
3755
3800
|
// if you hit this, likely you got an inf somewhere earlier
|
|
3756
3801
|
assert(scale > 0.0f);
|
|
3757
3802
|
|
|
3758
|
-
|
|
3803
|
+
float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
|
3804
|
+
|
|
3805
|
+
if constexpr (FUSE_OP == GGML_RMS_NORM_FUSE_OP_MUL) {
|
|
3806
|
+
const int64_t i11 = i01 % ne11;
|
|
3807
|
+
const int64_t i12 = i02 % ne12;
|
|
3808
|
+
const int64_t i13 = i03 % ne13;
|
|
3809
|
+
const float * w = (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13);
|
|
3810
|
+
|
|
3811
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
|
3812
|
+
y[i00] = x[i00] * scale * w[i00];
|
|
3813
|
+
}
|
|
3814
|
+
} else {
|
|
3815
|
+
memcpy(y, x, ne00 * sizeof(float));
|
|
3816
|
+
ggml_vec_scale_f32(ne00, y, scale);
|
|
3817
|
+
}
|
|
3759
3818
|
}
|
|
3760
3819
|
}
|
|
3761
3820
|
}
|
|
@@ -3770,7 +3829,31 @@ void ggml_compute_forward_rms_norm(
|
|
|
3770
3829
|
switch (src0->type) {
|
|
3771
3830
|
case GGML_TYPE_F32:
|
|
3772
3831
|
{
|
|
3773
|
-
ggml_compute_forward_rms_norm_f32(params, dst);
|
|
3832
|
+
ggml_compute_forward_rms_norm_f32<GGML_RMS_NORM_FUSE_OP_NONE>(params, dst);
|
|
3833
|
+
} break;
|
|
3834
|
+
default:
|
|
3835
|
+
{
|
|
3836
|
+
GGML_ABORT("fatal error");
|
|
3837
|
+
}
|
|
3838
|
+
}
|
|
3839
|
+
}
|
|
3840
|
+
|
|
3841
|
+
// Fused RMS_NORM + MUL: computes dst = rms_norm(src0) * src1 in a single pass.
|
|
3842
|
+
// This avoids materializing the intermediate rms_norm result in memory.
|
|
3843
|
+
void ggml_compute_forward_rms_norm_mul_fused(
|
|
3844
|
+
const ggml_compute_params * params,
|
|
3845
|
+
ggml_tensor * dst_rms_norm,
|
|
3846
|
+
ggml_tensor * dst_mul) {
|
|
3847
|
+
|
|
3848
|
+
GGML_ASSERT(dst_mul != nullptr);
|
|
3849
|
+
GGML_ASSERT(dst_mul->src[0] == dst_rms_norm || dst_mul->src[1] == dst_rms_norm);
|
|
3850
|
+
|
|
3851
|
+
const ggml_tensor * src0 = dst_rms_norm->src[0];
|
|
3852
|
+
|
|
3853
|
+
switch (src0->type) {
|
|
3854
|
+
case GGML_TYPE_F32:
|
|
3855
|
+
{
|
|
3856
|
+
ggml_compute_forward_rms_norm_f32<GGML_RMS_NORM_FUSE_OP_MUL>(params, dst_rms_norm, dst_mul);
|
|
3774
3857
|
} break;
|
|
3775
3858
|
default:
|
|
3776
3859
|
{
|
|
@@ -3925,12 +4008,12 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
|
3925
4008
|
// dx := scale(dx, rrms)
|
|
3926
4009
|
float * dx = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
|
3927
4010
|
|
|
3928
|
-
// dx[i00] = (x*(-sum_xdz/sum_eps)
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
|
|
3933
|
-
|
|
4011
|
+
// dx[i00] = (dz + x*(-sum_xdz/sum_eps)) * rrms
|
|
4012
|
+
// note: https://github.com/ggml-org/ggml/issues/1491
|
|
4013
|
+
const float scale_x = (float) (-sum_xdz) / sum_eps;
|
|
4014
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
|
4015
|
+
dx[i00] = (dz[i00] + x[i00] * scale_x) * rrms;
|
|
4016
|
+
}
|
|
3934
4017
|
}
|
|
3935
4018
|
}
|
|
3936
4019
|
}
|
|
@@ -4331,6 +4414,7 @@ void ggml_compute_forward_out_prod(
|
|
|
4331
4414
|
const ggml_tensor * src0 = dst->src[0];
|
|
4332
4415
|
|
|
4333
4416
|
switch (src0->type) {
|
|
4417
|
+
case GGML_TYPE_Q1_0:
|
|
4334
4418
|
case GGML_TYPE_Q4_0:
|
|
4335
4419
|
case GGML_TYPE_Q4_1:
|
|
4336
4420
|
case GGML_TYPE_Q5_0:
|
|
@@ -4606,6 +4690,7 @@ void ggml_compute_forward_set(
|
|
|
4606
4690
|
} break;
|
|
4607
4691
|
case GGML_TYPE_F16:
|
|
4608
4692
|
case GGML_TYPE_BF16:
|
|
4693
|
+
case GGML_TYPE_Q1_0:
|
|
4609
4694
|
case GGML_TYPE_Q4_0:
|
|
4610
4695
|
case GGML_TYPE_Q4_1:
|
|
4611
4696
|
case GGML_TYPE_Q5_0:
|
|
@@ -4829,6 +4914,7 @@ void ggml_compute_forward_get_rows(
|
|
|
4829
4914
|
const ggml_tensor * src0 = dst->src[0];
|
|
4830
4915
|
|
|
4831
4916
|
switch (src0->type) {
|
|
4917
|
+
case GGML_TYPE_Q1_0:
|
|
4832
4918
|
case GGML_TYPE_Q4_0:
|
|
4833
4919
|
case GGML_TYPE_Q4_1:
|
|
4834
4920
|
case GGML_TYPE_Q5_0:
|
|
@@ -5554,6 +5640,7 @@ void ggml_compute_forward_clamp(
|
|
|
5554
5640
|
ggml_compute_forward_clamp_f16(params, dst);
|
|
5555
5641
|
} break;
|
|
5556
5642
|
case GGML_TYPE_BF16:
|
|
5643
|
+
case GGML_TYPE_Q1_0:
|
|
5557
5644
|
case GGML_TYPE_Q4_0:
|
|
5558
5645
|
case GGML_TYPE_Q4_1:
|
|
5559
5646
|
case GGML_TYPE_Q5_0:
|
|
@@ -6643,6 +6730,78 @@ static inline int64_t ggml_wrap_around(int64_t coord, int64_t size) {
|
|
|
6643
6730
|
return (coord + size) % size; // adding size avoids negative number weirdness
|
|
6644
6731
|
}
|
|
6645
6732
|
|
|
6733
|
+
// ggml_compute_forward_col2im_1d
|
|
6734
|
+
//
|
|
6735
|
+
// Scatter-add columns [K*OC, T_in] -> signal [T_out, OC]
|
|
6736
|
+
// where T_out = (T_in - 1)*s + K - 2*p. Gather approach: each output reads ceil(K/s) inputs.
|
|
6737
|
+
// Parallelized over the time axis so the split stays balanced whatever OC is.
|
|
6738
|
+
// Supports F32, F16, BF16 input/output (same type), F32 accumulator.
|
|
6739
|
+
|
|
6740
|
+
template <typename elem_t>
|
|
6741
|
+
static void ggml_compute_forward_col2im_1d_impl(
|
|
6742
|
+
const ggml_compute_params * params,
|
|
6743
|
+
ggml_tensor * dst) {
|
|
6744
|
+
|
|
6745
|
+
const ggml_tensor * src = dst->src[0]; // [K*OC, T_in]
|
|
6746
|
+
|
|
6747
|
+
GGML_ASSERT(ggml_is_contiguous(src));
|
|
6748
|
+
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
6749
|
+
|
|
6750
|
+
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
|
6751
|
+
const int32_t OC = ((const int32_t *)(dst->op_params))[1];
|
|
6752
|
+
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
|
6753
|
+
|
|
6754
|
+
const int64_t K_OC = src->ne[0];
|
|
6755
|
+
const int64_t T_in = src->ne[1];
|
|
6756
|
+
const int64_t K = K_OC / OC;
|
|
6757
|
+
const int64_t T_out = dst->ne[0];
|
|
6758
|
+
|
|
6759
|
+
const elem_t * col_data = (const elem_t *) src->data;
|
|
6760
|
+
elem_t * dst_data = (elem_t *) dst->data;
|
|
6761
|
+
|
|
6762
|
+
const int ith = params->ith;
|
|
6763
|
+
const int nth = params->nth;
|
|
6764
|
+
|
|
6765
|
+
// Parallelize over the time axis: the split stays balanced whatever OC is,
|
|
6766
|
+
// down to OC = 1 for mono audio, and threads read disjoint column bands
|
|
6767
|
+
const int64_t dr = (T_out + nth - 1) / nth;
|
|
6768
|
+
const int64_t it0 = dr * ith;
|
|
6769
|
+
const int64_t it1 = it0 + dr < T_out ? it0 + dr : T_out;
|
|
6770
|
+
|
|
6771
|
+
for (int64_t oc = 0; oc < OC; oc++) {
|
|
6772
|
+
for (int64_t t_out = it0; t_out < it1; t_out++) {
|
|
6773
|
+
const int64_t t_abs = t_out + p0; // absolute position in uncropped signal
|
|
6774
|
+
// Gather: find all (t_in, k) where t_in * s + k == t_abs, 0 <= k < K
|
|
6775
|
+
int64_t t_in_min = (t_abs - K + 1 + s0 - 1) / s0; // ceil((t_abs-K+1)/s)
|
|
6776
|
+
if (t_in_min < 0) t_in_min = 0;
|
|
6777
|
+
int64_t t_in_max = t_abs / s0;
|
|
6778
|
+
if (t_in_max >= T_in) t_in_max = T_in - 1;
|
|
6779
|
+
|
|
6780
|
+
float sum = 0.0f;
|
|
6781
|
+
for (int64_t t_in = t_in_min; t_in <= t_in_max; t_in++) {
|
|
6782
|
+
int64_t k = t_abs - t_in * s0;
|
|
6783
|
+
if (k >= 0 && k < K) {
|
|
6784
|
+
// col layout: [K*OC, T_in], element (oc*K+k, t_in)
|
|
6785
|
+
sum += type_conversion_table<elem_t>::to_f32(col_data[(oc * K + k) + t_in * K_OC]);
|
|
6786
|
+
}
|
|
6787
|
+
}
|
|
6788
|
+
// dst layout: [T_out, OC], element (t_out, oc)
|
|
6789
|
+
dst_data[t_out + oc * T_out] = type_conversion_table<elem_t>::from_f32(sum);
|
|
6790
|
+
}
|
|
6791
|
+
}
|
|
6792
|
+
}
|
|
6793
|
+
|
|
6794
|
+
void ggml_compute_forward_col2im_1d(
|
|
6795
|
+
const ggml_compute_params * params,
|
|
6796
|
+
ggml_tensor * dst) {
|
|
6797
|
+
switch (dst->src[0]->type) {
|
|
6798
|
+
case GGML_TYPE_F32: ggml_compute_forward_col2im_1d_impl<float> (params, dst); break;
|
|
6799
|
+
case GGML_TYPE_F16: ggml_compute_forward_col2im_1d_impl<ggml_fp16_t>(params, dst); break;
|
|
6800
|
+
case GGML_TYPE_BF16: ggml_compute_forward_col2im_1d_impl<ggml_bf16_t>(params, dst); break;
|
|
6801
|
+
default: GGML_ABORT("col2im_1d: unsupported type %d", dst->src[0]->type);
|
|
6802
|
+
}
|
|
6803
|
+
}
|
|
6804
|
+
|
|
6646
6805
|
// ggml_compute_forward_conv_2d
|
|
6647
6806
|
|
|
6648
6807
|
|
|
@@ -6923,16 +7082,15 @@ void ggml_compute_forward_conv_3d(
|
|
|
6923
7082
|
ggml_compute_forward_conv_3d_impl(params, src0, src1, dst, src0->type);
|
|
6924
7083
|
}
|
|
6925
7084
|
|
|
6926
|
-
|
|
6927
|
-
|
|
6928
|
-
|
|
6929
|
-
|
|
6930
|
-
ggml_tensor * dst) {
|
|
7085
|
+
template <typename kernel_t>
|
|
7086
|
+
static void ggml_compute_forward_conv_transpose_2d_impl(
|
|
7087
|
+
const ggml_compute_params * params,
|
|
7088
|
+
ggml_tensor * dst) {
|
|
6931
7089
|
|
|
6932
7090
|
const ggml_tensor * src0 = dst->src[0];
|
|
6933
7091
|
const ggml_tensor * src1 = dst->src[1];
|
|
6934
7092
|
|
|
6935
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
7093
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_F32);
|
|
6936
7094
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
6937
7095
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
6938
7096
|
|
|
@@ -6943,7 +7101,7 @@ void ggml_compute_forward_conv_transpose_2d(
|
|
|
6943
7101
|
|
|
6944
7102
|
const int nk = ne00*ne01*ne02*ne03;
|
|
6945
7103
|
|
|
6946
|
-
GGML_ASSERT(nb00 ==
|
|
7104
|
+
GGML_ASSERT(nb00 == ggml_type_size(src0->type));
|
|
6947
7105
|
GGML_ASSERT(nb10 == sizeof(float));
|
|
6948
7106
|
|
|
6949
7107
|
if (ith == 0) {
|
|
@@ -6951,12 +7109,12 @@ void ggml_compute_forward_conv_transpose_2d(
|
|
|
6951
7109
|
|
|
6952
7110
|
// permute kernel data (src0) from (Kw x Kh x Cout x Cin) to (Cin x Kw x Kh x Cout)
|
|
6953
7111
|
{
|
|
6954
|
-
|
|
7112
|
+
kernel_t * const wdata = (kernel_t *) params->wdata + 0;
|
|
6955
7113
|
|
|
6956
7114
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
|
6957
7115
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
|
6958
|
-
const
|
|
6959
|
-
|
|
7116
|
+
const kernel_t * const src = (kernel_t *)((char *) src0->data + i03*nb03 + i02*nb02);
|
|
7117
|
+
kernel_t * dst_data = wdata + i02*ne01*ne00*ne03;
|
|
6960
7118
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
|
6961
7119
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
|
6962
7120
|
dst_data[i01*ne00*ne03 + i00*ne03 + i03] = src[i01 * ne00 + i00];
|
|
@@ -6968,13 +7126,17 @@ void ggml_compute_forward_conv_transpose_2d(
|
|
|
6968
7126
|
|
|
6969
7127
|
// permute source data (src1) from (Sw x Sh x Cin) to (Cin x Sw x Sh)
|
|
6970
7128
|
{
|
|
6971
|
-
|
|
7129
|
+
kernel_t * const wdata = (kernel_t *) params->wdata + nk;
|
|
6972
7130
|
for (int i12 = 0; i12 < ne12; i12++) {
|
|
6973
7131
|
for (int i11 = 0; i11 < ne11; i11++) {
|
|
6974
7132
|
const float * const src = (float *)((char *) src1->data + i12*nb12 + i11*nb11);
|
|
6975
|
-
|
|
7133
|
+
kernel_t * dst_data = wdata + i11*ne10*ne12;
|
|
6976
7134
|
for (int i10 = 0; i10 < ne10; i10++) {
|
|
6977
|
-
|
|
7135
|
+
if constexpr (std::is_same_v<kernel_t, ggml_fp16_t>) {
|
|
7136
|
+
dst_data[i10*ne12 + i12] = GGML_CPU_FP32_TO_FP16(src[i10]);
|
|
7137
|
+
} else {
|
|
7138
|
+
dst_data[i10*ne12 + i12] = src[i10];
|
|
7139
|
+
}
|
|
6978
7140
|
}
|
|
6979
7141
|
}
|
|
6980
7142
|
}
|
|
@@ -6996,21 +7158,27 @@ void ggml_compute_forward_conv_transpose_2d(
|
|
|
6996
7158
|
const int ip0 = dp*ith;
|
|
6997
7159
|
const int ip1 = MIN(ip0 + dp, np);
|
|
6998
7160
|
|
|
6999
|
-
|
|
7000
|
-
|
|
7161
|
+
kernel_t * const wdata = (kernel_t *) params->wdata + 0;
|
|
7162
|
+
kernel_t * const wdata_src = wdata + nk;
|
|
7001
7163
|
|
|
7002
7164
|
for (int i2 = ip0; i2 < ip1; i2++) { // Cout
|
|
7003
7165
|
float * dst_data = (float *)((char *) dst->data + i2*nb2);
|
|
7004
|
-
|
|
7166
|
+
kernel_t * wdata_kernel = wdata + i2*ne01*ne00*ne03;
|
|
7005
7167
|
for (int i11 = 0; i11 < ne11; i11++) {
|
|
7006
7168
|
for (int i10 = 0; i10 < ne10; i10++) {
|
|
7007
7169
|
const int i1n = i11*ne10*ne12 + i10*ne12;
|
|
7008
7170
|
for (int i01 = 0; i01 < ne01; i01++) {
|
|
7009
7171
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
7010
7172
|
float v = 0;
|
|
7011
|
-
|
|
7012
|
-
|
|
7013
|
-
|
|
7173
|
+
if constexpr (std::is_same_v<kernel_t, ggml_fp16_t>) {
|
|
7174
|
+
ggml_vec_dot_f16(ne03, &v, 0,
|
|
7175
|
+
wdata_src + i1n, 0,
|
|
7176
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03, 0, 1);
|
|
7177
|
+
} else {
|
|
7178
|
+
ggml_vec_dot_f32(ne03, &v, 0,
|
|
7179
|
+
wdata_src + i1n, 0,
|
|
7180
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03, 0, 1);
|
|
7181
|
+
}
|
|
7014
7182
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
|
7015
7183
|
}
|
|
7016
7184
|
}
|
|
@@ -7019,6 +7187,28 @@ void ggml_compute_forward_conv_transpose_2d(
|
|
|
7019
7187
|
}
|
|
7020
7188
|
}
|
|
7021
7189
|
|
|
7190
|
+
void ggml_compute_forward_conv_transpose_2d(
|
|
7191
|
+
const ggml_compute_params * params,
|
|
7192
|
+
ggml_tensor * dst) {
|
|
7193
|
+
|
|
7194
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
7195
|
+
|
|
7196
|
+
switch (src0->type) {
|
|
7197
|
+
case GGML_TYPE_F16:
|
|
7198
|
+
{
|
|
7199
|
+
ggml_compute_forward_conv_transpose_2d_impl<ggml_fp16_t>(params, dst);
|
|
7200
|
+
} break;
|
|
7201
|
+
case GGML_TYPE_F32:
|
|
7202
|
+
{
|
|
7203
|
+
ggml_compute_forward_conv_transpose_2d_impl<float>(params, dst);
|
|
7204
|
+
} break;
|
|
7205
|
+
default:
|
|
7206
|
+
{
|
|
7207
|
+
GGML_ABORT("fatal error");
|
|
7208
|
+
}
|
|
7209
|
+
}
|
|
7210
|
+
}
|
|
7211
|
+
|
|
7022
7212
|
// ggml_compute_forward_conv_2d_dw
|
|
7023
7213
|
|
|
7024
7214
|
struct ggml_conv_2d_dw_params {
|
|
@@ -8837,7 +9027,12 @@ static void ggml_compute_forward_flash_attn_ext_f16(
|
|
|
8837
9027
|
k->type == v->type &&
|
|
8838
9028
|
neq1 >= Q_TILE_SZ);
|
|
8839
9029
|
#ifdef GGML_SIMD
|
|
8840
|
-
|
|
9030
|
+
#if defined(__ARM_FEATURE_SVE)
|
|
9031
|
+
const int64_t f32_epr = svcntw();
|
|
9032
|
+
#else
|
|
9033
|
+
const int64_t f32_epr = GGML_F32_EPR;
|
|
9034
|
+
#endif
|
|
9035
|
+
use_tiled &= (DV % f32_epr == 0);
|
|
8841
9036
|
#endif
|
|
8842
9037
|
int current_chunk = ith;
|
|
8843
9038
|
|
|
@@ -9922,13 +10117,9 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
|
|
|
9922
10117
|
const int ith = params->ith;
|
|
9923
10118
|
const int nth = params->nth;
|
|
9924
10119
|
|
|
9925
|
-
|
|
9926
|
-
|
|
9927
|
-
|
|
9928
|
-
|
|
9929
|
-
const int h_start = (HEADS * ith) / nth;
|
|
9930
|
-
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
|
|
9931
|
-
(HEADS * (ith + 1)) / nth : HEADS;
|
|
10120
|
+
const int h_start = (HEADS * (ith )) / nth;
|
|
10121
|
+
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
|
|
10122
|
+
(HEADS * (ith + 1)) / nth : HEADS;
|
|
9932
10123
|
|
|
9933
10124
|
float * k = (float *) dst->src[0]->data;
|
|
9934
10125
|
float * v = (float *) dst->src[1]->data;
|
|
@@ -10139,13 +10330,9 @@ static void ggml_compute_forward_gla_f32(
|
|
|
10139
10330
|
const int ith = params->ith;
|
|
10140
10331
|
const int nth = params->nth;
|
|
10141
10332
|
|
|
10142
|
-
|
|
10143
|
-
|
|
10144
|
-
|
|
10145
|
-
|
|
10146
|
-
const int h_start = (HEADS * ith) / nth;
|
|
10147
|
-
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
|
|
10148
|
-
(HEADS * (ith + 1)) / nth : HEADS;
|
|
10333
|
+
const int h_start = (HEADS * (ith )) / nth;
|
|
10334
|
+
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
|
|
10335
|
+
(HEADS * (ith + 1)) / nth : HEADS;
|
|
10149
10336
|
|
|
10150
10337
|
float * k = (float *) dst->src[0]->data;
|
|
10151
10338
|
float * v = (float *) dst->src[1]->data;
|
|
@@ -10437,19 +10624,29 @@ static void ggml_compute_forward_gated_delta_net_one_chunk(
|
|
|
10437
10624
|
|
|
10438
10625
|
const bool kda = (neg0 == S_v);
|
|
10439
10626
|
|
|
10440
|
-
//
|
|
10441
|
-
const int64_t
|
|
10627
|
+
// K (snapshot slot count) is an op param; state holds s0 only [S_v, S_v, H, n_seqs].
|
|
10628
|
+
const int64_t K = ggml_get_op_params_i32(dst, 0);
|
|
10629
|
+
GGML_ASSERT(K >= 1);
|
|
10630
|
+
// per-seq stride in floats (seq s starts at state + s * seq_stride)
|
|
10631
|
+
const int64_t state_seq_stride = src_state->nb[3] / sizeof(float);
|
|
10632
|
+
|
|
10633
|
+
const int64_t per_thread = S_v + (K > 1 ? S_v * S_v : 0);
|
|
10442
10634
|
const int ith = params->ith;
|
|
10443
10635
|
|
|
10444
|
-
float * delta
|
|
10636
|
+
float * delta = (float *)params->wdata + ith * per_thread + CACHE_LINE_SIZE_F32;
|
|
10637
|
+
float * state_work = K > 1 ? (delta + S_v) : nullptr;
|
|
10445
10638
|
|
|
10446
10639
|
// output layout: [attn_scores | new_states]
|
|
10447
|
-
// attn_scores: S_v * H * n_tokens * n_seqs
|
|
10448
|
-
// new_states: S_v * S_v * H * n_seqs floats
|
|
10449
|
-
const int64_t attn_score_elems
|
|
10640
|
+
// attn_scores: S_v * H * n_tokens * n_seqs floats
|
|
10641
|
+
// new_states: S_v * S_v * H * n_seqs * K floats (K snapshot slots; last min(n_tokens, K))
|
|
10642
|
+
const int64_t attn_score_elems = S_v * H * n_tokens * n_seqs;
|
|
10643
|
+
const int64_t state_size_per_snap = S_v * S_v * H * n_seqs;
|
|
10450
10644
|
float * attn_out_base = (float *)dst->data;
|
|
10451
10645
|
float * state_out_base = (float *)dst->data + attn_score_elems;
|
|
10452
10646
|
|
|
10647
|
+
// snapshot slot mapping: slot 0 = most recent state, slot s = s tokens back.
|
|
10648
|
+
// When n_tokens < K only slots 0..n_tokens-1 are written; older slots are caller-owned.
|
|
10649
|
+
|
|
10453
10650
|
const float * state_in_base = (const float *)src_state->data;
|
|
10454
10651
|
|
|
10455
10652
|
//const int64_t rq1 = nev1 / neq1;
|
|
@@ -10469,10 +10666,15 @@ static void ggml_compute_forward_gated_delta_net_one_chunk(
|
|
|
10469
10666
|
const int64_t iq3 = iv3 / rq3;
|
|
10470
10667
|
const int64_t ik3 = iv3 / rk3;
|
|
10471
10668
|
|
|
10472
|
-
|
|
10669
|
+
// For K=1, write directly to the single output slot to avoid an extra memcpy at the end.
|
|
10670
|
+
// For K>1, work in scratch and copy out per-token when the slot is in range.
|
|
10671
|
+
float * s_out = (K > 1)
|
|
10672
|
+
? state_work
|
|
10673
|
+
: state_out_base + (iv3 * H + iv1) * S_v * S_v;
|
|
10473
10674
|
|
|
10474
|
-
// copy input state into
|
|
10475
|
-
|
|
10675
|
+
// copy input state into the working buffer and operate in-place
|
|
10676
|
+
// state layout [S_v, S_v, H, n_seqs]: seq iv3 starts at iv3 * state_seq_stride.
|
|
10677
|
+
const float * s_in = state_in_base + iv3 * state_seq_stride + iv1 * S_v * S_v;
|
|
10476
10678
|
memcpy(s_out, s_in, S_v * S_v * sizeof(float));
|
|
10477
10679
|
|
|
10478
10680
|
// attn output pointer for first token of this (head, seq)
|
|
@@ -10522,6 +10724,15 @@ static void ggml_compute_forward_gated_delta_net_one_chunk(
|
|
|
10522
10724
|
}
|
|
10523
10725
|
|
|
10524
10726
|
attn_data += S_v * H; // advance to next token
|
|
10727
|
+
|
|
10728
|
+
if (K > 1) {
|
|
10729
|
+
const int64_t target_slot = n_tokens - 1 - t;
|
|
10730
|
+
if (target_slot >= 0 && target_slot < K) {
|
|
10731
|
+
float * curr_state_o = state_out_base + target_slot * state_size_per_snap +
|
|
10732
|
+
(iv3 * H + iv1) * S_v * S_v;
|
|
10733
|
+
memcpy(curr_state_o, s_out, S_v * S_v * sizeof(float));
|
|
10734
|
+
}
|
|
10735
|
+
}
|
|
10525
10736
|
}
|
|
10526
10737
|
}
|
|
10527
10738
|
}
|
|
@@ -10602,13 +10813,9 @@ static void ggml_compute_forward_rwkv_wkv7_f32(
|
|
|
10602
10813
|
const int ith = params->ith;
|
|
10603
10814
|
const int nth = params->nth;
|
|
10604
10815
|
|
|
10605
|
-
|
|
10606
|
-
|
|
10607
|
-
|
|
10608
|
-
|
|
10609
|
-
const int h_start = (HEADS * ith) / nth;
|
|
10610
|
-
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
|
|
10611
|
-
(HEADS * (ith + 1)) / nth : HEADS;
|
|
10816
|
+
const int h_start = (HEADS * (ith )) / nth;
|
|
10817
|
+
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
|
|
10818
|
+
(HEADS * (ith + 1)) / nth : HEADS;
|
|
10612
10819
|
|
|
10613
10820
|
float * r = (float *) dst->src[0]->data;
|
|
10614
10821
|
float * w = (float *) dst->src[1]->data;
|
|
@@ -11186,3 +11393,95 @@ void ggml_compute_forward_opt_step_sgd(const ggml_compute_params * params, ggml_
|
|
|
11186
11393
|
}
|
|
11187
11394
|
}
|
|
11188
11395
|
}
|
|
11396
|
+
|
|
11397
|
+
static void ggml_compute_forward_fwht_f32(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
11398
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
11399
|
+
const ggml_tensor * src1 = dst->src[1];
|
|
11400
|
+
|
|
11401
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
11402
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
11403
|
+
|
|
11404
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
11405
|
+
|
|
11406
|
+
const int ith = params->ith;
|
|
11407
|
+
const int nth = params->nth;
|
|
11408
|
+
|
|
11409
|
+
const int64_t n = ne10;
|
|
11410
|
+
GGML_ASSERT((n & (n - 1)) == 0); // must be power of 2
|
|
11411
|
+
|
|
11412
|
+
const int64_t nr = ne11 * ne12 * ne13;
|
|
11413
|
+
const int64_t rows_per_thread = (nr + nth - 1) / nth;
|
|
11414
|
+
const int64_t start_row = ith * rows_per_thread;
|
|
11415
|
+
const int64_t end_row = MIN(start_row + rows_per_thread, nr);
|
|
11416
|
+
|
|
11417
|
+
const float scale = 1.0f / sqrtf((float)n);
|
|
11418
|
+
|
|
11419
|
+
#if defined(GGML_SIMD)
|
|
11420
|
+
const GGML_F32_VEC v_minus_one = GGML_F32_VEC_SET1(-1.0f);
|
|
11421
|
+
#endif
|
|
11422
|
+
|
|
11423
|
+
for (int64_t r = start_row; r < end_row; r++) {
|
|
11424
|
+
const int64_t i13 = r / (ne11 * ne12);
|
|
11425
|
+
const int64_t i12 = (r - i13 * ne11 * ne12) / ne11;
|
|
11426
|
+
const int64_t i11 = r - i13 * ne11 * ne12 - i12 * ne11;
|
|
11427
|
+
|
|
11428
|
+
const float * src_row = (const float *) ((const char *) src1->data + i11 * nb11 + i12 * nb12 + i13 * nb13);
|
|
11429
|
+
float * dst_row = (float *) ((char *) dst->data + i11 * nb1 + i12 * nb2 + i13 * nb3);
|
|
11430
|
+
|
|
11431
|
+
for (int64_t j = 0; j < n; j++) {
|
|
11432
|
+
dst_row[j] = src_row[j] * scale;
|
|
11433
|
+
}
|
|
11434
|
+
|
|
11435
|
+
// Scalar passes
|
|
11436
|
+
#if defined(GGML_SIMD)
|
|
11437
|
+
#if defined(__ARM_FEATURE_SVE)
|
|
11438
|
+
const int step = svcntw();
|
|
11439
|
+
#else
|
|
11440
|
+
const int step = GGML_F32_EPR;
|
|
11441
|
+
#endif
|
|
11442
|
+
#else
|
|
11443
|
+
const int step = n;
|
|
11444
|
+
#endif
|
|
11445
|
+
for (int64_t len = 1; len < step && len < n; len <<= 1) {
|
|
11446
|
+
for (int64_t i = 0; i < n; i += 2 * len) {
|
|
11447
|
+
for (int64_t j = 0; j < len; j++) {
|
|
11448
|
+
float u = dst_row[i + j];
|
|
11449
|
+
float v = dst_row[i + len + j];
|
|
11450
|
+
dst_row[i + j] = u + v;
|
|
11451
|
+
dst_row[i + len + j] = u - v;
|
|
11452
|
+
}
|
|
11453
|
+
}
|
|
11454
|
+
}
|
|
11455
|
+
|
|
11456
|
+
// SIMD passes using GGML_F32_VEC_* macros for multi-architecture support
|
|
11457
|
+
#if defined(GGML_SIMD)
|
|
11458
|
+
for (int64_t len = step; len < n; len <<= 1) {
|
|
11459
|
+
for (int64_t i = 0; i < n; i += 2 * len) {
|
|
11460
|
+
for (int64_t j = 0; j < len; j += step) {
|
|
11461
|
+
GGML_F32_VEC u = GGML_F32_VEC_LOAD(dst_row + i + j);
|
|
11462
|
+
GGML_F32_VEC v = GGML_F32_VEC_LOAD(dst_row + i + len + j);
|
|
11463
|
+
|
|
11464
|
+
GGML_F32_VEC_STORE(dst_row + i + j, GGML_F32_VEC_ADD(u, v));
|
|
11465
|
+
GGML_F32_VEC_STORE(dst_row + i + len + j, GGML_F32_VEC_FMA(u, v, v_minus_one));
|
|
11466
|
+
}
|
|
11467
|
+
}
|
|
11468
|
+
}
|
|
11469
|
+
#endif
|
|
11470
|
+
}
|
|
11471
|
+
}
|
|
11472
|
+
|
|
11473
|
+
void ggml_compute_forward_fwht(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
11474
|
+
const ggml_tensor * src1 = dst->src[1];
|
|
11475
|
+
|
|
11476
|
+
switch (src1->type) {
|
|
11477
|
+
case GGML_TYPE_F32:
|
|
11478
|
+
{
|
|
11479
|
+
ggml_compute_forward_fwht_f32(params, dst);
|
|
11480
|
+
}
|
|
11481
|
+
break;
|
|
11482
|
+
default:
|
|
11483
|
+
{
|
|
11484
|
+
GGML_ABORT("fatal error - fwht is F32 only");
|
|
11485
|
+
}
|
|
11486
|
+
}
|
|
11487
|
+
}
|