whispercpp 1.3.6 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.document +3 -0
- data/.rdoc_options +2 -0
- data/README.md +38 -5
- data/Rakefile +18 -3
- data/ext/dependencies.rb +10 -4
- data/ext/dependencies_for_windows.rb +17 -0
- data/ext/extconf.rb +20 -8
- data/ext/options.rb +54 -14
- data/ext/options_for_windows.rb +51 -0
- data/ext/ruby_whisper.c +36 -42
- data/ext/ruby_whisper.h +135 -0
- data/ext/ruby_whisper_context.c +107 -28
- data/ext/ruby_whisper_log_queue.c +180 -0
- data/ext/ruby_whisper_log_settable.h +47 -0
- data/ext/ruby_whisper_parakeet.c +49 -0
- data/ext/ruby_whisper_parakeet_context.c +304 -0
- data/ext/ruby_whisper_parakeet_context_params.c +117 -0
- data/ext/ruby_whisper_parakeet_model.c +84 -0
- data/ext/ruby_whisper_parakeet_params.c +548 -0
- data/ext/ruby_whisper_parakeet_segment.c +157 -0
- data/ext/ruby_whisper_parakeet_token.c +188 -0
- data/ext/ruby_whisper_parakeet_transcribe.cpp +58 -0
- data/ext/ruby_whisper_params.c +256 -65
- data/ext/ruby_whisper_segment.c +6 -6
- data/ext/ruby_whisper_transcribe.cpp +42 -15
- data/ext/sources/CMakeLists.txt +41 -3
- data/ext/sources/CMakePresets.json +95 -0
- data/ext/sources/cmake/parakeet-config.cmake.in +30 -0
- data/ext/sources/cmake/parakeet.pc.in +10 -0
- data/ext/sources/cmake/whisper.pc.in +1 -1
- data/ext/sources/examples/CMakeLists.txt +4 -2
- data/ext/sources/examples/bench/bench.cpp +1 -1
- data/ext/sources/examples/cli/cli.cpp +43 -9
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +139 -67
- data/ext/sources/examples/common-whisper.h +11 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +211 -341
- data/ext/sources/examples/parakeet-cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/parakeet-cli/parakeet-cli.cpp +243 -0
- data/ext/sources/examples/parakeet-quantize/CMakeLists.txt +7 -0
- data/ext/sources/examples/parakeet-quantize/parakeet-quantize.cpp +230 -0
- data/ext/sources/examples/server/server.cpp +199 -163
- data/ext/sources/ggml/CMakeLists.txt +21 -13
- data/ext/sources/ggml/cmake/FindNCCL.cmake +36 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +12 -2
- data/ext/sources/ggml/include/ggml-alloc.h +1 -0
- data/ext/sources/ggml/include/ggml-backend.h +72 -10
- data/ext/sources/ggml/include/ggml-cuda.h +3 -0
- data/ext/sources/ggml/include/ggml-rpc.h +3 -3
- data/ext/sources/ggml/include/ggml.h +101 -9
- data/ext/sources/ggml/include/gguf.h +10 -2
- data/ext/sources/ggml/src/CMakeLists.txt +22 -5
- data/ext/sources/ggml/src/ggml-alloc.c +5 -1
- data/ext/sources/ggml/src/ggml-backend-impl.h +22 -2
- data/ext/sources/ggml/src/ggml-backend-meta.cpp +2263 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +12 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +110 -9
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +672 -257
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +71 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +20 -10
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +211 -30
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +58 -29
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +2 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +16 -16
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +116 -7
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +65 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +151 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +0 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +4279 -1292
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +5 -35
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +0 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +72 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +177 -27
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +95 -5
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +146 -134
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +88 -70
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +372 -73
- data/ext/sources/ggml/src/ggml-cpu/ops.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +55 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +90 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +3 -16
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1402 -687
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +597 -2766
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +182 -19
- data/ext/sources/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +37 -53
- data/ext/sources/ggml/src/ggml-cpu/vec.h +225 -240
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +17 -7
- data/ext/sources/ggml/src/ggml-cuda/allreduce.cu +971 -0
- data/ext/sources/ggml/src/ggml-cuda/allreduce.cuh +29 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +62 -26
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +44 -18
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +242 -28
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +120 -114
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +45 -21
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +53 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +14 -6
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +22 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +278 -44
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +331 -130
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +12 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +126 -27
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +40 -15
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +18 -9
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +152 -49
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/fwht.cu +101 -0
- data/ext/sources/ggml/src/ggml-cuda/fwht.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +84 -35
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +34 -12
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1069 -609
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +32 -29
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +4 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +242 -195
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +18 -12
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +502 -423
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +19 -12
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +485 -57
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +6 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +36 -10
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +23 -7
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +133 -26
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +5 -1
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +11 -4
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +4 -1
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +14 -6
- data/ext/sources/ggml/src/ggml-cuda/snake.cu +72 -0
- data/ext/sources/ggml/src/ggml-cuda/snake.cuh +8 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +4 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +45 -13
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +40 -18
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +8 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +26 -23
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +31 -2
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +80 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +7 -2
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -4
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +2 -1
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +1428 -743
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -7
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +53 -84
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +25 -12
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +165 -184
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +5 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/concat-ops.c +277 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +170 -127
- data/ext/sources/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +125 -97
- data/ext/sources/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +1148 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +148 -42
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +2 -2
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +252 -62
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +9 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +87 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1878 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +2066 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-ops.c +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-ops.h +88 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +96 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +182 -57
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +9 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +71 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +27 -10
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +63 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +9 -8
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-flash-attn.h +47 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-log.h +65 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-pow.h +42 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +1 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sin-cos.h +90 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +5 -8
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +529 -815
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2522 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/pad-ops.c +547 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +291 -95
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +59 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +121 -133
- data/ext/sources/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +244 -151
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +6 -6
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +719 -45
- data/ext/sources/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-opnode.h +272 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +3 -1
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +22 -9
- data/ext/sources/ggml/src/ggml-impl.h +6 -1
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +138 -13
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +32 -1
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +164 -28
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +80 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +190 -19
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +2 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +39 -26
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +823 -322
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +5 -6
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +54 -5
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +12248 -5907
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +67 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +59 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +1819 -112
- data/ext/sources/ggml/src/ggml-opencl/kernels/gated_delta_net.cl +249 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +306 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +256 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +258 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_k_f32_ns.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +260 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +262 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_k_f32_ns.cl +288 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q6_k_f32_ns.cl +267 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mat_Ab_Bi_8x4.cl → gemm_noshuffle_q4_0_f32.cl} +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_0_f32.cl +131 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_1_f32.cl +134 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mm_q8_0_f32_8x4.cl → gemm_noshuffle_q8_0_f32.cl} +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +120 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +123 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_k_f32_ns.cl +155 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +123 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_k_f32_ns.cl +160 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q6_k_f32_ns.cl +141 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle_general.cl → gemv_noshuffle_q4_0_f32.cl} +5 -5
- data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle.cl → gemv_noshuffle_q4_0_f32_spec.cl} +5 -5
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_0_f32.cl +291 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_1_f32.cl +294 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +15 -9
- data/ext/sources/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_0_f32_l4_lm.cl +173 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_1_f32_l4_lm.cl +175 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_0_f32.cl +241 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_0_f32_flat.cl +243 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_1_f32.cl +243 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_1_f32_flat.cl +247 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +48 -64
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +15 -5
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +18 -11
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +35 -13
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +264 -192
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +33 -7
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +1 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +1 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +27 -3
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +67 -36
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +1 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +101 -44
- data/ext/sources/ggml/src/ggml-openvino/utils.h +23 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +1 -0
- data/ext/sources/ggml/src/ggml-quants.c +289 -114
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +24 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +167 -311
- data/ext/sources/ggml/src/ggml-rpc/transport.cpp +683 -0
- data/ext/sources/ggml/src/ggml-rpc/transport.h +34 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +50 -4
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/common.cpp +74 -2
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +41 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +115 -13
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cumsum.cpp +148 -0
- data/ext/sources/ggml/src/ggml-sycl/cumsum.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +663 -0
- data/ext/sources/ggml/src/ggml-sycl/diag.cpp +67 -0
- data/ext/sources/ggml/src/ggml-sycl/diag.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +586 -6
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1 -90
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +0 -2
- data/ext/sources/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +7 -5
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +76 -168
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/fill.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fill.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +69 -31
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +79 -3
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +823 -190
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +353 -89
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +5 -3
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1344 -26
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +16 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +27 -27
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +71 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +7 -1
- data/ext/sources/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
- data/ext/sources/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +6 -1
- data/ext/sources/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +62 -10
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +18 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/type.hpp +112 -0
- data/ext/sources/ggml/src/ggml-sycl/upscale.cpp +410 -0
- data/ext/sources/ggml/src/ggml-sycl/upscale.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +215 -53
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +4 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +2 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +2 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +1 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +1 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +0 -2
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +2060 -535
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +6 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +146 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +25 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +643 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dot_product_funcs.glsl +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2_decode_vector.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +197 -48
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +60 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +115 -113
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +122 -31
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +131 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fwht.comp +115 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +125 -64
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +10 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +16 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +76 -54
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +122 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +6 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +1 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +88 -55
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +43 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +159 -125
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +8 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +24 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +5 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +3 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/snake.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +11 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +79 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +171 -147
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +5 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +2202 -283
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2610 -1403
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +37 -7
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +8 -7
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +76 -95
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +19 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{cpy.tmpl.wgsl → cpy.wgsl} +25 -50
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +107 -184
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_quant_staging.tmpl +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +397 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +619 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +149 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +183 -78
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +655 -495
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +52 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +8 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +5 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +80 -409
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1432 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_q_acc.tmpl +303 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/quant_inner_loops.tmpl +21 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/quantize_q8.wgsl +173 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{rope.tmpl.wgsl → rope.wgsl} +71 -142
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +6 -4
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +2 -3
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows_quant.wgsl +224 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{soft_max.tmpl.wgsl → soft_max.wgsl} +106 -206
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +68 -48
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +18 -14
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +244 -10
- data/ext/sources/ggml/src/ggml.c +110 -28
- data/ext/sources/ggml/src/gguf.cpp +173 -28
- data/ext/sources/include/parakeet.h +342 -0
- data/ext/sources/include/whisper.h +10 -0
- data/ext/sources/media/matmul.png +0 -0
- data/ext/sources/src/CMakeLists.txt +23 -0
- data/ext/sources/src/parakeet-arch.h +188 -0
- data/ext/sources/src/parakeet.cpp +3838 -0
- data/ext/sources/src/whisper.cpp +56 -12
- data/extsources.rb +26 -10
- data/lib/whisper/log_settable.rb +36 -0
- data/lib/whisper/model/uri.rb +13 -1
- data/lib/whisper/output.rb +74 -0
- data/sig/whisper.rbs +411 -62
- data/test/helper.rb +2 -0
- data/test/jfk_reader/jfk_reader.c +50 -7
- data/test/test_callback.rb +1 -0
- data/test/test_package.rb +6 -5
- data/test/test_parakeet.rb +28 -0
- data/test/test_parakeet_callback.rb +107 -0
- data/test/test_parakeet_context.rb +116 -0
- data/test/test_parakeet_context_params.rb +24 -0
- data/test/test_parakeet_model.rb +21 -0
- data/test/test_parakeet_params.rb +78 -0
- data/test/test_parakeet_segment.rb +42 -0
- data/test/test_parakeet_token.rb +73 -0
- data/test/test_params.rb +2 -0
- data/test/test_vad_segment.rb +1 -1
- data/test/test_whisper.rb +24 -6
- data/whispercpp.gemspec +2 -2
- metadata +215 -281
- data/ext/sources/bindings/javascript/CMakeLists.txt +0 -41
- data/ext/sources/bindings/javascript/emscripten.cpp +0 -93
- data/ext/sources/bindings/javascript/libwhisper.worker.js +0 -1
- data/ext/sources/bindings/javascript/package.json +0 -26
- data/ext/sources/bindings/javascript/whisper.js +0 -19
- data/ext/sources/examples/addon.node/CMakeLists.txt +0 -31
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +0 -133
- data/ext/sources/examples/addon.node/addon.cpp +0 -557
- data/ext/sources/examples/addon.node/index.js +0 -59
- data/ext/sources/examples/addon.node/package.json +0 -16
- data/ext/sources/examples/addon.node/vad-example.js +0 -132
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +0 -49
- data/ext/sources/examples/bench.wasm/emscripten.cpp +0 -87
- data/ext/sources/examples/bench.wasm/index-tmpl.html +0 -285
- data/ext/sources/examples/coi-serviceworker.js +0 -146
- data/ext/sources/examples/command/CMakeLists.txt +0 -10
- data/ext/sources/examples/command/command.cpp +0 -802
- data/ext/sources/examples/command/commands.txt +0 -9
- data/ext/sources/examples/command.wasm/CMakeLists.txt +0 -50
- data/ext/sources/examples/command.wasm/emscripten.cpp +0 -327
- data/ext/sources/examples/command.wasm/index-tmpl.html +0 -415
- data/ext/sources/examples/generate-karaoke.sh +0 -57
- data/ext/sources/examples/helpers.js +0 -191
- data/ext/sources/examples/livestream.sh +0 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +0 -10
- data/ext/sources/examples/lsp/lsp.cpp +0 -471
- data/ext/sources/examples/lsp/whisper.vim +0 -362
- data/ext/sources/examples/python/test_whisper_processor.py +0 -7
- data/ext/sources/examples/python/whisper_processor.py +0 -54
- data/ext/sources/examples/server/bench.js +0 -29
- data/ext/sources/examples/server.py +0 -120
- data/ext/sources/examples/stream/CMakeLists.txt +0 -10
- data/ext/sources/examples/stream/stream.cpp +0 -437
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +0 -49
- data/ext/sources/examples/stream.wasm/emscripten.cpp +0 -216
- data/ext/sources/examples/stream.wasm/index-tmpl.html +0 -491
- data/ext/sources/examples/sycl/CMakeLists.txt +0 -9
- data/ext/sources/examples/sycl/build.sh +0 -22
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +0 -11
- data/ext/sources/examples/sycl/run-whisper.sh +0 -17
- data/ext/sources/examples/talk-llama/CMakeLists.txt +0 -48
- data/ext/sources/examples/talk-llama/eleven-labs.py +0 -80
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +0 -488
- data/ext/sources/examples/talk-llama/llama-adapter.h +0 -89
- data/ext/sources/examples/talk-llama/llama-arch.cpp +0 -2877
- data/ext/sources/examples/talk-llama/llama-arch.h +0 -628
- data/ext/sources/examples/talk-llama/llama-batch.cpp +0 -919
- data/ext/sources/examples/talk-llama/llama-batch.h +0 -173
- data/ext/sources/examples/talk-llama/llama-chat.cpp +0 -896
- data/ext/sources/examples/talk-llama/llama-chat.h +0 -71
- data/ext/sources/examples/talk-llama/llama-context.cpp +0 -3633
- data/ext/sources/examples/talk-llama/llama-context.h +0 -359
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +0 -5
- data/ext/sources/examples/talk-llama/llama-cparams.h +0 -47
- data/ext/sources/examples/talk-llama/llama-ext.h +0 -12
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +0 -1464
- data/ext/sources/examples/talk-llama/llama-grammar.h +0 -194
- data/ext/sources/examples/talk-llama/llama-graph.cpp +0 -2735
- data/ext/sources/examples/talk-llama/llama-graph.h +0 -1031
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +0 -258
- data/ext/sources/examples/talk-llama/llama-hparams.h +0 -353
- data/ext/sources/examples/talk-llama/llama-impl.cpp +0 -171
- data/ext/sources/examples/talk-llama/llama-impl.h +0 -75
- data/ext/sources/examples/talk-llama/llama-io.cpp +0 -15
- data/ext/sources/examples/talk-llama/llama-io.h +0 -35
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +0 -330
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +0 -137
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2285
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +0 -389
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +0 -533
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +0 -275
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +0 -140
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +0 -268
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +0 -139
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +0 -1165
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +0 -182
- data/ext/sources/examples/talk-llama/llama-memory.cpp +0 -59
- data/ext/sources/examples/talk-llama/llama-memory.h +0 -122
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +0 -752
- data/ext/sources/examples/talk-llama/llama-mmap.h +0 -73
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +0 -1655
- data/ext/sources/examples/talk-llama/llama-model-loader.h +0 -206
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +0 -299
- data/ext/sources/examples/talk-llama/llama-model-saver.h +0 -40
- data/ext/sources/examples/talk-llama/llama-model.cpp +0 -9056
- data/ext/sources/examples/talk-llama/llama-model.h +0 -597
- data/ext/sources/examples/talk-llama/llama-quant.cpp +0 -1304
- data/ext/sources/examples/talk-llama/llama-quant.h +0 -1
- data/ext/sources/examples/talk-llama/llama-sampler.cpp +0 -3885
- data/ext/sources/examples/talk-llama/llama-sampler.h +0 -42
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +0 -3970
- data/ext/sources/examples/talk-llama/llama-vocab.h +0 -187
- data/ext/sources/examples/talk-llama/llama.cpp +0 -1194
- data/ext/sources/examples/talk-llama/llama.h +0 -1573
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +0 -190
- data/ext/sources/examples/talk-llama/models/apertus.cpp +0 -125
- data/ext/sources/examples/talk-llama/models/arcee.cpp +0 -135
- data/ext/sources/examples/talk-llama/models/arctic.cpp +0 -137
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +0 -143
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +0 -133
- data/ext/sources/examples/talk-llama/models/bert.cpp +0 -184
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +0 -145
- data/ext/sources/examples/talk-llama/models/bloom.cpp +0 -101
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +0 -178
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +0 -111
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +0 -102
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +0 -134
- data/ext/sources/examples/talk-llama/models/command-r.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/deci.cpp +0 -135
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +0 -142
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +0 -262
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +0 -445
- data/ext/sources/examples/talk-llama/models/dots1.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/dream.cpp +0 -105
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +0 -148
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +0 -110
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +0 -97
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +0 -145
- data/ext/sources/examples/talk-llama/models/exaone.cpp +0 -114
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +0 -111
- data/ext/sources/examples/talk-llama/models/falcon.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +0 -116
- data/ext/sources/examples/talk-llama/models/gemma.cpp +0 -112
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +0 -155
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +0 -384
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +0 -170
- data/ext/sources/examples/talk-llama/models/glm4.cpp +0 -157
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +0 -105
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +0 -144
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +0 -195
- data/ext/sources/examples/talk-llama/models/granite.cpp +0 -210
- data/ext/sources/examples/talk-llama/models/grok.cpp +0 -159
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +0 -139
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +0 -153
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/jais.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/jais2.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/jamba.cpp +0 -106
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +0 -381
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +0 -196
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/llada.cpp +0 -99
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +0 -178
- data/ext/sources/examples/talk-llama/models/llama.cpp +0 -175
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +0 -117
- data/ext/sources/examples/talk-llama/models/mamba-base.cpp +0 -289
- data/ext/sources/examples/talk-llama/models/mamba.cpp +0 -54
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +0 -129
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +0 -200
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +0 -160
- data/ext/sources/examples/talk-llama/models/models.h +0 -704
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +0 -109
- data/ext/sources/examples/talk-llama/models/mpt.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +0 -162
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +0 -104
- data/ext/sources/examples/talk-llama/models/olmo.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +0 -150
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +0 -127
- data/ext/sources/examples/talk-llama/models/openelm.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/orion.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/phi2.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/phi3.cpp +0 -152
- data/ext/sources/examples/talk-llama/models/plamo.cpp +0 -110
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +0 -320
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/plm.cpp +0 -169
- data/ext/sources/examples/talk-llama/models/qwen.cpp +0 -108
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +0 -151
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +0 -117
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +0 -381
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +0 -422
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +0 -131
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +0 -525
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +0 -140
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/refact.cpp +0 -94
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +0 -164
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +0 -94
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +0 -137
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +0 -90
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +0 -146
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +0 -100
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +0 -165
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +0 -166
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +0 -96
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +0 -149
- data/ext/sources/examples/talk-llama/models/xverse.cpp +0 -108
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +0 -23
- data/ext/sources/examples/talk-llama/speak +0 -40
- data/ext/sources/examples/talk-llama/speak.bat +0 -1
- data/ext/sources/examples/talk-llama/speak.ps1 +0 -14
- data/ext/sources/examples/talk-llama/talk-llama.cpp +0 -813
- data/ext/sources/examples/talk-llama/unicode-data.cpp +0 -7034
- data/ext/sources/examples/talk-llama/unicode-data.h +0 -20
- data/ext/sources/examples/talk-llama/unicode.cpp +0 -1103
- data/ext/sources/examples/talk-llama/unicode.h +0 -111
- data/ext/sources/examples/wchess/CMakeLists.txt +0 -10
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +0 -19
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +0 -803
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +0 -33
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +0 -193
- data/ext/sources/examples/wchess/libwchess/WChess.h +0 -63
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +0 -117
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +0 -8
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +0 -253
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +0 -50
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +0 -118
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +0 -659
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +0 -99
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +0 -155
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +0 -153
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +0 -26
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +0 -123
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +0 -17
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +0 -333
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +0 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +0 -182
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +0 -323
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +0 -718
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +0 -123
- data/ext/sources/tests/CMakeLists.txt +0 -112
- data/ext/sources/tests/earnings21/eval.mk +0 -58
- data/ext/sources/tests/earnings21/eval.py +0 -68
- data/ext/sources/tests/earnings21/normalizers/__init__.py +0 -2
- data/ext/sources/tests/earnings21/normalizers/basic.py +0 -80
- data/ext/sources/tests/earnings21/normalizers/english.json +0 -1741
- data/ext/sources/tests/earnings21/normalizers/english.py +0 -550
- data/ext/sources/tests/earnings21/requirements.txt +0 -6
- data/ext/sources/tests/en-0-ref.txt +0 -1
- data/ext/sources/tests/en-1-ref.txt +0 -1
- data/ext/sources/tests/en-2-ref.txt +0 -1
- data/ext/sources/tests/es-0-ref.txt +0 -1
- data/ext/sources/tests/librispeech/eval.mk +0 -39
- data/ext/sources/tests/librispeech/eval.py +0 -47
- data/ext/sources/tests/librispeech/normalizers/__init__.py +0 -2
- data/ext/sources/tests/librispeech/normalizers/basic.py +0 -80
- data/ext/sources/tests/librispeech/normalizers/english.json +0 -1741
- data/ext/sources/tests/librispeech/normalizers/english.py +0 -550
- data/ext/sources/tests/librispeech/requirements.txt +0 -6
- data/ext/sources/tests/run-tests.sh +0 -130
- data/ext/sources/tests/test-c.c +0 -3
- data/ext/sources/tests/test-vad-full.cpp +0 -56
- data/ext/sources/tests/test-vad.cpp +0 -83
- data/ext/sources/tests/test-whisper.js +0 -58
- data/lib/whisper/context.rb +0 -15
- data/lib/whisper/segment.rb +0 -58
- /data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle_general_q8_0_f32.cl → gemv_noshuffle_q8_0_f32.cl} +0 -0
|
@@ -32,6 +32,9 @@
|
|
|
32
32
|
#include <aclnnop/aclnn_cat.h>
|
|
33
33
|
#include <aclnnop/aclnn_clamp.h>
|
|
34
34
|
#include <aclnnop/aclnn_cos.h>
|
|
35
|
+
#include <aclnnop/aclnn_cumsum.h>
|
|
36
|
+
#include <aclnnop/aclnn_tril.h>
|
|
37
|
+
#include <aclnnop/aclnn_triu.h>
|
|
35
38
|
#include <aclnnop/aclnn_exp.h>
|
|
36
39
|
#include <aclnnop/aclnn_gelu.h>
|
|
37
40
|
#include <aclnnop/aclnn_gelu_v2.h>
|
|
@@ -47,6 +50,9 @@
|
|
|
47
50
|
#include <aclnnop/aclnn_sign.h>
|
|
48
51
|
#include <aclnnop/aclnn_silu.h>
|
|
49
52
|
#include <aclnnop/aclnn_sin.h>
|
|
53
|
+
#include <aclnnop/aclnn_softplus.h>
|
|
54
|
+
#include <aclnnop/aclnn_swi_glu.h>
|
|
55
|
+
#include <aclnnop/aclnn_geglu.h>
|
|
50
56
|
#include <aclnnop/aclnn_slice.h>
|
|
51
57
|
#include <aclnnop/aclnn_sqrt.h>
|
|
52
58
|
#include <aclnnop/aclnn_tanh.h>
|
|
@@ -69,6 +75,9 @@
|
|
|
69
75
|
*/
|
|
70
76
|
void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
71
77
|
|
|
78
|
+
void ggml_cann_swiglu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
79
|
+
void ggml_cann_geglu(ggml_backend_cann_context & ctx, ggml_tensor * dst, int64_t approximate);
|
|
80
|
+
|
|
72
81
|
/**
|
|
73
82
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
|
74
83
|
* backend.
|
|
@@ -325,6 +334,48 @@ void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
325
334
|
|
|
326
335
|
void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
327
336
|
|
|
337
|
+
/**
|
|
338
|
+
* @brief Computes the cumulative sum of a ggml tensor along dim 0 using the
|
|
339
|
+
* CANN backend.
|
|
340
|
+
*
|
|
341
|
+
* @param ctx The CANN context used for operations.
|
|
342
|
+
* @param dst The destination tensor. dst->op is `GGML_OP_CUMSUM`.
|
|
343
|
+
*/
|
|
344
|
+
void ggml_cann_cumsum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* @brief Computes a triangular mask (tril/triu) of a square ggml tensor
|
|
348
|
+
* using the CANN backend.
|
|
349
|
+
*
|
|
350
|
+
* @param ctx The CANN context used for operations.
|
|
351
|
+
* @param dst The destination tensor. dst->op is `GGML_OP_TRI`.
|
|
352
|
+
*/
|
|
353
|
+
void ggml_cann_tri(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* @brief Solves a triangular linear system AX=B using the CANN backend.
|
|
357
|
+
*
|
|
358
|
+
* @param ctx The CANN context used for operations.
|
|
359
|
+
* @param dst The destination tensor. dst->op is `GGML_OP_SOLVE_TRI`.
|
|
360
|
+
*/
|
|
361
|
+
void ggml_cann_solve_tri(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* @brief Creates a diagonal matrix from a vector using the CANN backend.
|
|
365
|
+
*
|
|
366
|
+
* @param ctx The CANN context used for operations.
|
|
367
|
+
* @param dst The destination tensor. dst->op is `GGML_OP_DIAG`.
|
|
368
|
+
*/
|
|
369
|
+
void ggml_cann_diag(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* @brief Fills a tensor with a constant scalar value using the CANN backend.
|
|
373
|
+
*
|
|
374
|
+
* @param ctx The CANN context used for operations.
|
|
375
|
+
* @param dst The destination tensor. dst->op is `GGML_OP_FILL`.
|
|
376
|
+
*/
|
|
377
|
+
void ggml_cann_fill(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
378
|
+
|
|
328
379
|
/**
|
|
329
380
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
|
330
381
|
* the CANN backend.
|
|
@@ -461,6 +512,9 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
|
|
|
461
512
|
// @see ggml_cann_dup.
|
|
462
513
|
void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
463
514
|
|
|
515
|
+
// @see ggml_cann_acc, but copies src1 into dst instead of adding.
|
|
516
|
+
void ggml_cann_set(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
517
|
+
|
|
464
518
|
/**
|
|
465
519
|
* @brief Computes the softmax activation with optional masking.
|
|
466
520
|
*
|
|
@@ -543,6 +597,21 @@ void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
543
597
|
*/
|
|
544
598
|
void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
545
599
|
|
|
600
|
+
/**
|
|
601
|
+
* @brief Pre-load the RoPE cache before ACL graph capture.
|
|
602
|
+
*
|
|
603
|
+
* This function must be called outside of graph capture to perform
|
|
604
|
+
* host-to-device memory copies and device memory allocations that are
|
|
605
|
+
* not allowed on a captured stream. After pre-loading, the rope cache
|
|
606
|
+
* metadata is updated so that the subsequent call to
|
|
607
|
+
* aclnn_rope_cache_init (inside graph capture) skips these operations
|
|
608
|
+
* and only records the on-device computations into the captured graph.
|
|
609
|
+
*
|
|
610
|
+
* @param ctx CANN backend context.
|
|
611
|
+
* @param dst A ROPE destination tensor from the computation graph.
|
|
612
|
+
*/
|
|
613
|
+
void ggml_cann_rope_cache_preload(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
614
|
+
|
|
546
615
|
/**
|
|
547
616
|
* @brief Computes the index of the maximum value along the specified dimension
|
|
548
617
|
* of a ggml tensor using the CANN backend.
|
|
@@ -798,6 +867,8 @@ void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
|
798
867
|
* dst->op is expected to be `GGML_OP_STEP`.
|
|
799
868
|
*/
|
|
800
869
|
void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
870
|
+
void ggml_cann_softplus(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
871
|
+
void ggml_cann_geglu_quick(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|
801
872
|
|
|
802
873
|
/**
|
|
803
874
|
* @brief Performs the Flash Attention extended operator using the CANN backend.
|
|
@@ -216,14 +216,16 @@ struct ggml_cann_pool_alloc {
|
|
|
216
216
|
#ifdef USE_ACL_GRAPH
|
|
217
217
|
struct ggml_graph_node_properties {
|
|
218
218
|
// dst tensor
|
|
219
|
-
void *
|
|
220
|
-
|
|
221
|
-
|
|
219
|
+
void * node_address;
|
|
220
|
+
ggml_type node_type;
|
|
221
|
+
int64_t ne[GGML_MAX_DIMS];
|
|
222
|
+
size_t nb[GGML_MAX_DIMS];
|
|
222
223
|
|
|
223
224
|
// src tensor
|
|
224
|
-
void *
|
|
225
|
-
|
|
226
|
-
|
|
225
|
+
void * src_address[GGML_MAX_SRC];
|
|
226
|
+
ggml_type src_type[GGML_MAX_SRC];
|
|
227
|
+
int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS];
|
|
228
|
+
size_t src_nb[GGML_MAX_SRC][GGML_MAX_DIMS];
|
|
227
229
|
|
|
228
230
|
// op
|
|
229
231
|
ggml_op node_op;
|
|
@@ -247,6 +249,10 @@ struct ggml_graph_node_properties {
|
|
|
247
249
|
return false;
|
|
248
250
|
}
|
|
249
251
|
|
|
252
|
+
if (node->type != this->node_type) {
|
|
253
|
+
return false;
|
|
254
|
+
}
|
|
255
|
+
|
|
250
256
|
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
|
251
257
|
if (node->ne[i] != this->ne[i]) {
|
|
252
258
|
return false;
|
|
@@ -262,6 +268,10 @@ struct ggml_graph_node_properties {
|
|
|
262
268
|
return false;
|
|
263
269
|
}
|
|
264
270
|
|
|
271
|
+
if (node->src[i]->type != this->src_type[i]) {
|
|
272
|
+
return false;
|
|
273
|
+
}
|
|
274
|
+
|
|
265
275
|
for (int d = 0; d < GGML_MAX_DIMS; d++) {
|
|
266
276
|
if (node->src[i]->ne[d] != this->src_ne[i][d]) {
|
|
267
277
|
return false;
|
|
@@ -277,10 +287,7 @@ struct ggml_graph_node_properties {
|
|
|
277
287
|
}
|
|
278
288
|
}
|
|
279
289
|
|
|
280
|
-
|
|
281
|
-
return memcmp(this->op_params, node->op_params, GGML_MAX_OP_PARAMS) == 0;
|
|
282
|
-
}
|
|
283
|
-
return true;
|
|
290
|
+
return memcmp(this->op_params, node->op_params, GGML_MAX_OP_PARAMS) == 0;
|
|
284
291
|
}
|
|
285
292
|
};
|
|
286
293
|
|
|
@@ -322,6 +329,7 @@ struct ggml_cann_graph {
|
|
|
322
329
|
|
|
323
330
|
prop.node_address = node->data;
|
|
324
331
|
prop.node_op = node->op;
|
|
332
|
+
prop.node_type = node->type;
|
|
325
333
|
|
|
326
334
|
std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne);
|
|
327
335
|
std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb);
|
|
@@ -329,10 +337,12 @@ struct ggml_cann_graph {
|
|
|
329
337
|
for (int src = 0; src < GGML_MAX_SRC; ++src) {
|
|
330
338
|
if (node->src[src]) {
|
|
331
339
|
prop.src_address[src] = node->src[src]->data;
|
|
340
|
+
prop.src_type[src] = node->src[src]->type;
|
|
332
341
|
std::copy_n(node->src[src]->ne, GGML_MAX_DIMS, prop.src_ne[src]);
|
|
333
342
|
std::copy_n(node->src[src]->nb, GGML_MAX_DIMS, prop.src_nb[src]);
|
|
334
343
|
} else {
|
|
335
344
|
prop.src_address[src] = nullptr;
|
|
345
|
+
prop.src_type[src] = GGML_TYPE_COUNT;
|
|
336
346
|
std::fill_n(prop.src_ne[src], GGML_MAX_DIMS, 0);
|
|
337
347
|
std::fill_n(prop.src_nb[src], GGML_MAX_DIMS, 0);
|
|
338
348
|
}
|
|
@@ -36,10 +36,13 @@
|
|
|
36
36
|
#include <cmath>
|
|
37
37
|
#include <cstdio>
|
|
38
38
|
#include <cstring>
|
|
39
|
+
#include <memory>
|
|
39
40
|
#include <mutex>
|
|
40
41
|
#include <optional>
|
|
41
42
|
#include <queue>
|
|
43
|
+
#include <unordered_map>
|
|
42
44
|
#include <unordered_set>
|
|
45
|
+
#include <vector>
|
|
43
46
|
|
|
44
47
|
#define GGML_COMMON_DECL_C
|
|
45
48
|
|
|
@@ -770,6 +773,21 @@ std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(i
|
|
|
770
773
|
}
|
|
771
774
|
|
|
772
775
|
// cann buffer
|
|
776
|
+
|
|
777
|
+
/**
|
|
778
|
+
* @brief Tracks multi-threaded write progress for a single tensor.
|
|
779
|
+
*
|
|
780
|
+
* When multiple threads call set_tensor on different chunks of the same tensor,
|
|
781
|
+
* this tracker accumulates progress and defers post-processing (quantized format
|
|
782
|
+
* transform or ND-to-NZ conversion) until all data has been written.
|
|
783
|
+
*/
|
|
784
|
+
struct TensorSetTracker {
|
|
785
|
+
std::mutex mtx; ///< Protects concurrent access to this tracker
|
|
786
|
+
size_t bytes_written = 0; ///< Accumulated bytes written so far
|
|
787
|
+
size_t total_bytes = 0; ///< Target size (full tensor)
|
|
788
|
+
std::vector<uint8_t> host_buffer; ///< Host staging buffer for quantized tensors
|
|
789
|
+
};
|
|
790
|
+
|
|
773
791
|
/**
|
|
774
792
|
* @brief Context for managing a CANN buffer associated with a specific device.
|
|
775
793
|
*
|
|
@@ -780,6 +798,9 @@ struct ggml_backend_cann_buffer_context {
|
|
|
780
798
|
int32_t device; ///< The device ID associated with this buffer context.
|
|
781
799
|
void * dev_ptr = nullptr; ///< Pointer to the device memory allocated for the buffer.
|
|
782
800
|
|
|
801
|
+
std::mutex tracker_mutex; ///< Protects the trackers map
|
|
802
|
+
std::unordered_map<void *, std::unique_ptr<TensorSetTracker>> trackers;
|
|
803
|
+
|
|
783
804
|
/**
|
|
784
805
|
* @brief Constructor to initialize the CANN buffer context.
|
|
785
806
|
*
|
|
@@ -792,6 +813,31 @@ struct ggml_backend_cann_buffer_context {
|
|
|
792
813
|
* @brief Destructor to free the device memory allocated for the buffer.
|
|
793
814
|
*/
|
|
794
815
|
~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); }
|
|
816
|
+
|
|
817
|
+
/**
|
|
818
|
+
* @brief Get or create a tracker for the given tensor.
|
|
819
|
+
*/
|
|
820
|
+
TensorSetTracker * get_or_create_tracker(ggml_tensor * tensor) {
|
|
821
|
+
std::lock_guard<std::mutex> lock(tracker_mutex);
|
|
822
|
+
auto key = tensor->data;
|
|
823
|
+
auto it = trackers.find(key);
|
|
824
|
+
if (it == trackers.end()) {
|
|
825
|
+
auto tracker = std::make_unique<TensorSetTracker>();
|
|
826
|
+
tracker->total_bytes = ggml_nbytes(tensor);
|
|
827
|
+
auto * ptr = tracker.get();
|
|
828
|
+
trackers[key] = std::move(tracker);
|
|
829
|
+
return ptr;
|
|
830
|
+
}
|
|
831
|
+
return it->second.get();
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/**
|
|
835
|
+
* @brief Remove the tracker for the given tensor.
|
|
836
|
+
*/
|
|
837
|
+
void remove_tracker(ggml_tensor * tensor) {
|
|
838
|
+
std::lock_guard<std::mutex> lock(tracker_mutex);
|
|
839
|
+
trackers.erase(tensor->data);
|
|
840
|
+
}
|
|
795
841
|
};
|
|
796
842
|
|
|
797
843
|
// cann buffer type
|
|
@@ -1124,6 +1170,7 @@ static enum ggml_status ggml_backend_cann_buffer_init_tensor(ggml_backend_buffer
|
|
|
1124
1170
|
* designed to be used with a global array, one per device.
|
|
1125
1171
|
*/
|
|
1126
1172
|
struct ggml_cann_nz_workspace {
|
|
1173
|
+
std::mutex mtx; // Protects ptr/allocated from concurrent access
|
|
1127
1174
|
void * ptr; // Pointer to allocated device buffer
|
|
1128
1175
|
size_t allocated; // Size of currently allocated buffer in bytes
|
|
1129
1176
|
|
|
@@ -1190,13 +1237,15 @@ static ggml_cann_nz_workspace g_nz_workspaces[GGML_CANN_MAX_DEVICES];
|
|
|
1190
1237
|
* @note The workspace buffer used in this function is managed globally and reused
|
|
1191
1238
|
* across calls. This reduces overhead from repeated memory allocation and deallocation.
|
|
1192
1239
|
*/
|
|
1193
|
-
static void weight_format_to_nz(ggml_tensor * tensor,
|
|
1194
|
-
acl_tensor_ptr weightTransposed = ggml_cann_create_tensor(tensor, tensor->ne, tensor->nb, 2, ACL_FORMAT_ND,
|
|
1240
|
+
static void weight_format_to_nz(ggml_tensor * tensor, int device) {
|
|
1241
|
+
acl_tensor_ptr weightTransposed = ggml_cann_create_tensor(tensor, tensor->ne, tensor->nb, 2, ACL_FORMAT_ND, 0);
|
|
1195
1242
|
uint64_t workspaceSize = 0;
|
|
1196
1243
|
aclOpExecutor * executor;
|
|
1197
1244
|
|
|
1198
1245
|
// TransMatmulWeight
|
|
1199
1246
|
ACL_CHECK(aclnnTransMatmulWeightGetWorkspaceSize(weightTransposed.get(), &workspaceSize, &executor));
|
|
1247
|
+
|
|
1248
|
+
std::lock_guard<std::mutex> lock(g_nz_workspaces[device].mtx);
|
|
1200
1249
|
// Avoid frequent malloc/free of the workspace.
|
|
1201
1250
|
g_nz_workspaces[device].realloc(workspaceSize);
|
|
1202
1251
|
|
|
@@ -1210,7 +1259,13 @@ static void weight_format_to_nz(ggml_tensor * tensor, size_t offset, int device)
|
|
|
1210
1259
|
* @brief Set tensor data in a CANN buffer.
|
|
1211
1260
|
*
|
|
1212
1261
|
* This function sets tensor data in a CANN buffer, handling transformations
|
|
1213
|
-
* if needed based on the tensor's type.
|
|
1262
|
+
* if needed based on the tensor's type. It supports multi-threaded calls
|
|
1263
|
+
* where different threads write different chunks of the same tensor.
|
|
1264
|
+
*
|
|
1265
|
+
* For quantized tensors (Q4_0/Q8_0), data is staged in a host buffer and
|
|
1266
|
+
* the format transform is deferred until all chunks are written.
|
|
1267
|
+
* For NZ weight tensors, chunks are uploaded directly but the ND-to-NZ
|
|
1268
|
+
* conversion is deferred until all chunks are written.
|
|
1214
1269
|
*
|
|
1215
1270
|
* @param buffer The CANN buffer where the tensor data will be set.
|
|
1216
1271
|
* @param tensor Pointer to the tensor whose data will be set.
|
|
@@ -1226,25 +1281,72 @@ static void ggml_backend_cann_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
|
1226
1281
|
ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
|
|
1227
1282
|
|
|
1228
1283
|
ggml_cann_set_device(ctx->device);
|
|
1229
|
-
// TODO: refer to cann(#6017), it use thread's default stream.
|
|
1230
|
-
// For acl, synchronous functions use this default stream.
|
|
1231
|
-
// Why aclrtSynchronizeDevice?
|
|
1232
1284
|
|
|
1233
1285
|
// Only check env once.
|
|
1234
1286
|
static bool weight_to_nz = parse_bool(get_env_as_lowercase("GGML_CANN_WEIGHT_NZ").value_or("on"));
|
|
1235
|
-
|
|
1287
|
+
|
|
1288
|
+
bool is_quantized = need_transform(tensor->type);
|
|
1289
|
+
bool is_nz = !is_quantized && tensor->type != GGML_TYPE_BF16 && weight_to_nz &&
|
|
1290
|
+
is_matmul_weight((const ggml_tensor *) tensor);
|
|
1291
|
+
|
|
1292
|
+
// Plain tensor (not quantized, not NZ): direct copy, no tracking needed
|
|
1293
|
+
if (!is_quantized && !is_nz) {
|
|
1236
1294
|
ACL_CHECK(aclrtMemcpy((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
|
|
1237
|
-
|
|
1295
|
+
return;
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
// Single-shot write (full tensor at once): handle directly without tracking overhead
|
|
1299
|
+
if (offset == 0 && size == ggml_nbytes(tensor)) {
|
|
1300
|
+
if (is_quantized) {
|
|
1301
|
+
void * transform_buffer = malloc(size);
|
|
1302
|
+
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
|
1303
|
+
ACL_CHECK(aclrtMemcpy(tensor->data, size, transform_buffer, size, ACL_MEMCPY_HOST_TO_DEVICE));
|
|
1304
|
+
free(transform_buffer);
|
|
1305
|
+
} else {
|
|
1306
|
+
// NZ weight
|
|
1238
1307
|
GGML_ASSERT(tensor->ne[2] == 1);
|
|
1239
1308
|
GGML_ASSERT(tensor->ne[3] == 1);
|
|
1240
|
-
|
|
1309
|
+
ACL_CHECK(aclrtMemcpy(tensor->data, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
|
|
1310
|
+
weight_format_to_nz(tensor, ctx->device);
|
|
1311
|
+
}
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
// Chunked write: use tracker to accumulate progress and defer transform/conversion
|
|
1316
|
+
TensorSetTracker * tracker = ctx->get_or_create_tracker(tensor);
|
|
1317
|
+
std::unique_lock<std::mutex> lock(tracker->mtx);
|
|
1318
|
+
|
|
1319
|
+
if (is_quantized) {
|
|
1320
|
+
// Stage data in host buffer; transform requires full tensor data
|
|
1321
|
+
if (tracker->host_buffer.empty()) {
|
|
1322
|
+
tracker->host_buffer.resize(tracker->total_bytes);
|
|
1241
1323
|
}
|
|
1324
|
+
memcpy(tracker->host_buffer.data() + offset, data, size);
|
|
1242
1325
|
} else {
|
|
1243
|
-
|
|
1244
|
-
|
|
1326
|
+
// NZ weight: upload chunk to device immediately, defer conversion
|
|
1327
|
+
ACL_CHECK(aclrtMemcpy((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
|
|
1328
|
+
}
|
|
1245
1329
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1330
|
+
tracker->bytes_written += size;
|
|
1331
|
+
|
|
1332
|
+
// All chunks received: perform deferred transform/conversion
|
|
1333
|
+
if (tracker->bytes_written >= tracker->total_bytes) {
|
|
1334
|
+
if (is_quantized) {
|
|
1335
|
+
void * transform_buffer = malloc(tracker->total_bytes);
|
|
1336
|
+
ggml_backend_cann_transform(tensor, tracker->host_buffer.data(), transform_buffer);
|
|
1337
|
+
ACL_CHECK(aclrtMemcpy(tensor->data, tracker->total_bytes, transform_buffer, tracker->total_bytes, ACL_MEMCPY_HOST_TO_DEVICE));
|
|
1338
|
+
free(transform_buffer);
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
if (is_nz) {
|
|
1342
|
+
GGML_ASSERT(tensor->ne[2] == 1);
|
|
1343
|
+
GGML_ASSERT(tensor->ne[3] == 1);
|
|
1344
|
+
weight_format_to_nz(tensor, ctx->device);
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
// Unlock before removing tracker, as remove_tracker destroys the mutex
|
|
1348
|
+
lock.unlock();
|
|
1349
|
+
ctx->remove_tracker(tensor);
|
|
1248
1350
|
}
|
|
1249
1351
|
}
|
|
1250
1352
|
|
|
@@ -1326,6 +1428,22 @@ static bool ggml_backend_cann_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
|
|
1326
1428
|
return false;
|
|
1327
1429
|
}
|
|
1328
1430
|
|
|
1431
|
+
/**
|
|
1432
|
+
* @brief Set a region of a tensor's device memory to a specified value.
|
|
1433
|
+
*
|
|
1434
|
+
* @param buffer The CANN buffer containing the tensor.
|
|
1435
|
+
* @param tensor Pointer to the tensor whose memory will be set.
|
|
1436
|
+
* @param value The value to which each byte in the region will be set.
|
|
1437
|
+
* @param offset Byte offset within the tensor's data to start setting.
|
|
1438
|
+
* @param size Number of bytes to set.
|
|
1439
|
+
*/
|
|
1440
|
+
static void ggml_backend_cann_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
|
1441
|
+
ggml_backend_cann_buffer_context * ctx = (ggml_backend_cann_buffer_context *) buffer->context;
|
|
1442
|
+
|
|
1443
|
+
ggml_cann_set_device(ctx->device);
|
|
1444
|
+
ACL_CHECK(aclrtMemset((char *) tensor->data + offset, size, value, size));
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1329
1447
|
/**
|
|
1330
1448
|
* @brief Clear a CANN buffer by setting all its memory to a specified value.
|
|
1331
1449
|
*
|
|
@@ -1352,9 +1470,11 @@ static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
|
|
1352
1470
|
/* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
|
|
1353
1471
|
/* .get_base = */ ggml_backend_cann_buffer_get_base,
|
|
1354
1472
|
/* .init_tensor = */ ggml_backend_cann_buffer_init_tensor,
|
|
1355
|
-
/* .memset_tensor = */
|
|
1473
|
+
/* .memset_tensor = */ ggml_backend_cann_buffer_memset_tensor,
|
|
1356
1474
|
/* .set_tensor = */ ggml_backend_cann_buffer_set_tensor,
|
|
1357
1475
|
/* .get_tensor = */ ggml_backend_cann_buffer_get_tensor,
|
|
1476
|
+
/* .set_tensor_2d = */ NULL,
|
|
1477
|
+
/* .get_tensor_2d = */ NULL,
|
|
1358
1478
|
/* .cpy_tensor = */ ggml_backend_cann_buffer_cpy_tensor,
|
|
1359
1479
|
/* .clear = */ ggml_backend_cann_buffer_clear,
|
|
1360
1480
|
/* .reset = */ NULL,
|
|
@@ -1443,7 +1563,8 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
|
|
1443
1563
|
if (ne0 % MATRIX_ROW_PADDING != 0) {
|
|
1444
1564
|
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
|
1445
1565
|
}
|
|
1446
|
-
} else if (weight_to_nz &&
|
|
1566
|
+
} else if (weight_to_nz && tensor->type != GGML_TYPE_BF16
|
|
1567
|
+
&& is_matmul_weight((const ggml_tensor *) tensor)) {
|
|
1447
1568
|
// NZ format weight are not support quantized yet.
|
|
1448
1569
|
// If ND tensor transform to NZ, size may changed.
|
|
1449
1570
|
int64_t shape[] = { tensor->ne[1], tensor->ne[0] };
|
|
@@ -1730,6 +1851,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
|
|
|
1730
1851
|
case GGML_UNARY_OP_STEP:
|
|
1731
1852
|
ggml_cann_step(ctx, dst);
|
|
1732
1853
|
break;
|
|
1854
|
+
case GGML_UNARY_OP_SOFTPLUS:
|
|
1855
|
+
ggml_cann_softplus(ctx, dst);
|
|
1856
|
+
break;
|
|
1733
1857
|
default:
|
|
1734
1858
|
return false;
|
|
1735
1859
|
}
|
|
@@ -1740,20 +1864,16 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
|
|
|
1740
1864
|
GGML_CANN_CALL_OP_UNARY_GATED(Relu);
|
|
1741
1865
|
break;
|
|
1742
1866
|
case GGML_GLU_OP_GEGLU:
|
|
1867
|
+
ggml_cann_geglu(ctx, dst, 0); // approximate=0 → tanh
|
|
1868
|
+
break;
|
|
1743
1869
|
case GGML_GLU_OP_GEGLU_ERF:
|
|
1744
|
-
|
|
1745
|
-
GGML_CANN_CALL_OP_UNARY_GATED(Gelu);
|
|
1870
|
+
ggml_cann_geglu(ctx, dst, 1); // approximate=1 → erf
|
|
1746
1871
|
break;
|
|
1747
1872
|
case GGML_GLU_OP_SWIGLU:
|
|
1748
|
-
|
|
1873
|
+
ggml_cann_swiglu(ctx, dst);
|
|
1749
1874
|
break;
|
|
1750
1875
|
case GGML_GLU_OP_GEGLU_QUICK:
|
|
1751
|
-
|
|
1752
|
-
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) {
|
|
1753
|
-
GGML_CANN_CALL_ACLNN_OP(ctx, GeluV2, acl_src, 0, acl_dst);
|
|
1754
|
-
};
|
|
1755
|
-
ggml_cann_op_unary_gated(lambda, ctx, dst);
|
|
1756
|
-
}
|
|
1876
|
+
ggml_cann_geglu_quick(ctx, dst);
|
|
1757
1877
|
break;
|
|
1758
1878
|
default:
|
|
1759
1879
|
return false;
|
|
@@ -1815,6 +1935,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
|
|
|
1815
1935
|
case GGML_OP_CPY:
|
|
1816
1936
|
ggml_cann_cpy(ctx, dst);
|
|
1817
1937
|
break;
|
|
1938
|
+
case GGML_OP_SET:
|
|
1939
|
+
ggml_cann_set(ctx, dst);
|
|
1940
|
+
break;
|
|
1818
1941
|
case GGML_OP_CONT:
|
|
1819
1942
|
ggml_cann_dup(ctx, dst);
|
|
1820
1943
|
break;
|
|
@@ -1884,6 +2007,21 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
|
|
|
1884
2007
|
case GGML_OP_SSM_CONV:
|
|
1885
2008
|
ggml_cann_ssm_conv(ctx, dst);
|
|
1886
2009
|
break;
|
|
2010
|
+
case GGML_OP_CUMSUM:
|
|
2011
|
+
ggml_cann_cumsum(ctx, dst);
|
|
2012
|
+
break;
|
|
2013
|
+
case GGML_OP_TRI:
|
|
2014
|
+
ggml_cann_tri(ctx, dst);
|
|
2015
|
+
break;
|
|
2016
|
+
case GGML_OP_FILL:
|
|
2017
|
+
ggml_cann_fill(ctx, dst);
|
|
2018
|
+
break;
|
|
2019
|
+
case GGML_OP_DIAG:
|
|
2020
|
+
ggml_cann_diag(ctx, dst);
|
|
2021
|
+
break;
|
|
2022
|
+
case GGML_OP_SOLVE_TRI:
|
|
2023
|
+
ggml_cann_solve_tri(ctx, dst);
|
|
2024
|
+
break;
|
|
1887
2025
|
default:
|
|
1888
2026
|
return false;
|
|
1889
2027
|
}
|
|
@@ -2219,10 +2357,24 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
|
|
|
2219
2357
|
if (use_cann_graph) {
|
|
2220
2358
|
// If no matching graph is found, the graph needs to be recaptured.
|
|
2221
2359
|
graph_capture_required = !cann_ctx->graph_lru_cache.find_and_move_to_front(cgraph);
|
|
2360
|
+
|
|
2222
2361
|
if (graph_capture_required) {
|
|
2223
2362
|
// If no matching graph is found, add a new ACL graph.
|
|
2224
2363
|
ggml_cann_graph * new_graph = ggml_cann_graph::create_from_cgraph(cgraph);
|
|
2225
2364
|
cann_ctx->graph_lru_cache.push(new_graph);
|
|
2365
|
+
|
|
2366
|
+
// Pre-load rope cache before graph capture. During capture the
|
|
2367
|
+
// stream cannot perform host-to-device memcpy or device memory
|
|
2368
|
+
// malloc/free. Running the full cache init now populates the
|
|
2369
|
+
// cache metadata so these branches are skipped during capture,
|
|
2370
|
+
// while also warming up the memory pool.
|
|
2371
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
2372
|
+
ggml_tensor * node = cgraph->nodes[i];
|
|
2373
|
+
if (node->op == GGML_OP_ROPE) {
|
|
2374
|
+
ggml_cann_rope_cache_preload(*cann_ctx, node);
|
|
2375
|
+
break;
|
|
2376
|
+
}
|
|
2377
|
+
}
|
|
2226
2378
|
}
|
|
2227
2379
|
}
|
|
2228
2380
|
#else
|
|
@@ -2264,6 +2416,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2264
2416
|
case GGML_UNARY_OP_SGN:
|
|
2265
2417
|
case GGML_UNARY_OP_STEP:
|
|
2266
2418
|
case GGML_UNARY_OP_GELU_ERF:
|
|
2419
|
+
case GGML_UNARY_OP_SOFTPLUS:
|
|
2267
2420
|
return true;
|
|
2268
2421
|
default:
|
|
2269
2422
|
return false;
|
|
@@ -2283,6 +2436,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2283
2436
|
case GGML_OP_MUL_MAT:
|
|
2284
2437
|
{
|
|
2285
2438
|
switch (op->src[0]->type) {
|
|
2439
|
+
#ifndef ASCEND_310P
|
|
2440
|
+
case GGML_TYPE_BF16:
|
|
2441
|
+
#endif
|
|
2286
2442
|
case GGML_TYPE_F16:
|
|
2287
2443
|
case GGML_TYPE_F32:
|
|
2288
2444
|
return true;
|
|
@@ -2320,6 +2476,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2320
2476
|
switch (op->src[0]->type) {
|
|
2321
2477
|
case GGML_TYPE_F32:
|
|
2322
2478
|
case GGML_TYPE_F16:
|
|
2479
|
+
#ifndef ASCEND_310P
|
|
2480
|
+
case GGML_TYPE_BF16:
|
|
2481
|
+
#endif
|
|
2323
2482
|
case GGML_TYPE_Q8_0:
|
|
2324
2483
|
return true;
|
|
2325
2484
|
default:
|
|
@@ -2332,6 +2491,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2332
2491
|
switch (op->type) {
|
|
2333
2492
|
case GGML_TYPE_F32:
|
|
2334
2493
|
case GGML_TYPE_F16:
|
|
2494
|
+
#ifndef ASCEND_310P
|
|
2495
|
+
case GGML_TYPE_BF16:
|
|
2496
|
+
#endif
|
|
2335
2497
|
return true;
|
|
2336
2498
|
default:
|
|
2337
2499
|
return false;
|
|
@@ -2341,20 +2503,30 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2341
2503
|
case GGML_OP_CPY:
|
|
2342
2504
|
{
|
|
2343
2505
|
ggml_tensor * src = op->src[0];
|
|
2506
|
+
#ifdef ASCEND_310P
|
|
2344
2507
|
if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
|
|
2345
2508
|
(src->type != GGML_TYPE_F32 && src->type != GGML_TYPE_F16)) {
|
|
2346
|
-
// only support F32 and F16.
|
|
2509
|
+
// only support F32 and F16 on 310P.
|
|
2510
|
+
return false;
|
|
2511
|
+
}
|
|
2512
|
+
#else
|
|
2513
|
+
if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16 && op->type != GGML_TYPE_BF16) ||
|
|
2514
|
+
(src->type != GGML_TYPE_F32 && src->type != GGML_TYPE_F16 && src->type != GGML_TYPE_BF16)) {
|
|
2515
|
+
// only support F32, F16 and BF16.
|
|
2347
2516
|
return false;
|
|
2348
2517
|
}
|
|
2518
|
+
#endif
|
|
2349
2519
|
return true;
|
|
2350
2520
|
}
|
|
2351
2521
|
break;
|
|
2352
2522
|
case GGML_OP_CONT:
|
|
2353
2523
|
{
|
|
2354
|
-
// TODO: support GGML_TYPE_BF16
|
|
2355
2524
|
switch (op->src[0]->type) {
|
|
2356
2525
|
case GGML_TYPE_F32:
|
|
2357
2526
|
case GGML_TYPE_F16:
|
|
2527
|
+
#ifndef ASCEND_310P
|
|
2528
|
+
case GGML_TYPE_BF16:
|
|
2529
|
+
#endif
|
|
2358
2530
|
return true;
|
|
2359
2531
|
default:
|
|
2360
2532
|
return false;
|
|
@@ -2435,6 +2607,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2435
2607
|
case GGML_OP_SUM_ROWS:
|
|
2436
2608
|
case GGML_OP_ARGSORT:
|
|
2437
2609
|
case GGML_OP_ACC:
|
|
2610
|
+
case GGML_OP_SET:
|
|
2438
2611
|
case GGML_OP_GROUP_NORM:
|
|
2439
2612
|
return true;
|
|
2440
2613
|
case GGML_OP_PAD:
|
|
@@ -2503,10 +2676,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2503
2676
|
// different head sizes of K and V are not supported yet
|
|
2504
2677
|
return false;
|
|
2505
2678
|
}
|
|
2506
|
-
if (op->src[0]->ne[0] % 16 != 0) {
|
|
2507
|
-
// TODO: padding to support
|
|
2508
|
-
return false;
|
|
2509
|
-
}
|
|
2510
2679
|
float logitSoftcap = 0.0f;
|
|
2511
2680
|
memcpy(&logitSoftcap, (const float *) (op->op_params) + 2, sizeof(float));
|
|
2512
2681
|
if (logitSoftcap != 0.0f) {
|
|
@@ -2516,6 +2685,16 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|
|
2516
2685
|
}
|
|
2517
2686
|
case GGML_OP_SSM_CONV:
|
|
2518
2687
|
return true;
|
|
2688
|
+
case GGML_OP_CUMSUM:
|
|
2689
|
+
return op->src[0]->type == GGML_TYPE_F32;
|
|
2690
|
+
case GGML_OP_TRI:
|
|
2691
|
+
return op->src[0]->type == GGML_TYPE_F32;
|
|
2692
|
+
case GGML_OP_FILL:
|
|
2693
|
+
return op->src[0]->type == GGML_TYPE_F32;
|
|
2694
|
+
case GGML_OP_DIAG:
|
|
2695
|
+
return op->src[0]->type == GGML_TYPE_F32;
|
|
2696
|
+
case GGML_OP_SOLVE_TRI:
|
|
2697
|
+
return op->src[0]->type == GGML_TYPE_F32;
|
|
2519
2698
|
default:
|
|
2520
2699
|
return false;
|
|
2521
2700
|
}
|
|
@@ -2567,6 +2746,8 @@ static const ggml_backend_i ggml_backend_cann_interface = {
|
|
|
2567
2746
|
/* .free = */ ggml_backend_cann_free,
|
|
2568
2747
|
/* .set_tensor_async = */ ggml_backend_cann_set_tensor_async,
|
|
2569
2748
|
/* .get_tensor_async = */ ggml_backend_cann_get_tensor_async,
|
|
2749
|
+
/* .set_tensor_2d_async = */ NULL,
|
|
2750
|
+
/* .get_tensor_2d_async = */ NULL,
|
|
2570
2751
|
/* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async,
|
|
2571
2752
|
/* .synchronize = */ ggml_backend_cann_synchronize,
|
|
2572
2753
|
/* .graph_plan_create = */ NULL,
|
|
@@ -93,6 +93,10 @@ typedef sycl::half2 ggml_half2;
|
|
|
93
93
|
// QR = QK / number of values before dequantization
|
|
94
94
|
// QI = number of 32 bit integers before dequantization
|
|
95
95
|
|
|
96
|
+
#define QI1_0 (QK1_0 / 32)
|
|
97
|
+
#define QR1_0 1
|
|
98
|
+
|
|
99
|
+
|
|
96
100
|
#define QI4_0 (QK4_0 / (4 * QR4_0))
|
|
97
101
|
#define QR4_0 2
|
|
98
102
|
|
|
@@ -170,6 +174,13 @@ typedef sycl::half2 ggml_half2;
|
|
|
170
174
|
#define GGML_EXTENSION __extension__
|
|
171
175
|
#endif // _MSC_VER
|
|
172
176
|
|
|
177
|
+
#define QK1_0 128
|
|
178
|
+
typedef struct {
|
|
179
|
+
ggml_half d; // delta
|
|
180
|
+
uint8_t qs[QK1_0 / 8]; // bits / quants
|
|
181
|
+
} block_q1_0;
|
|
182
|
+
static_assert(sizeof(block_q1_0) == sizeof(ggml_half) + QK1_0 / 8, "wrong q1_0 block size/padding");
|
|
183
|
+
|
|
173
184
|
#define QK4_0 32
|
|
174
185
|
typedef struct {
|
|
175
186
|
ggml_half d; // delta
|