whispercpp 1.3.6 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.document +3 -0
- data/.rdoc_options +2 -0
- data/README.md +38 -5
- data/Rakefile +18 -3
- data/ext/dependencies.rb +10 -4
- data/ext/dependencies_for_windows.rb +17 -0
- data/ext/extconf.rb +20 -8
- data/ext/options.rb +54 -14
- data/ext/options_for_windows.rb +51 -0
- data/ext/ruby_whisper.c +36 -42
- data/ext/ruby_whisper.h +135 -0
- data/ext/ruby_whisper_context.c +107 -28
- data/ext/ruby_whisper_log_queue.c +180 -0
- data/ext/ruby_whisper_log_settable.h +47 -0
- data/ext/ruby_whisper_parakeet.c +49 -0
- data/ext/ruby_whisper_parakeet_context.c +304 -0
- data/ext/ruby_whisper_parakeet_context_params.c +117 -0
- data/ext/ruby_whisper_parakeet_model.c +84 -0
- data/ext/ruby_whisper_parakeet_params.c +548 -0
- data/ext/ruby_whisper_parakeet_segment.c +157 -0
- data/ext/ruby_whisper_parakeet_token.c +188 -0
- data/ext/ruby_whisper_parakeet_transcribe.cpp +58 -0
- data/ext/ruby_whisper_params.c +256 -65
- data/ext/ruby_whisper_segment.c +6 -6
- data/ext/ruby_whisper_transcribe.cpp +42 -15
- data/ext/sources/CMakeLists.txt +41 -3
- data/ext/sources/CMakePresets.json +95 -0
- data/ext/sources/cmake/parakeet-config.cmake.in +30 -0
- data/ext/sources/cmake/parakeet.pc.in +10 -0
- data/ext/sources/cmake/whisper.pc.in +1 -1
- data/ext/sources/examples/CMakeLists.txt +4 -2
- data/ext/sources/examples/bench/bench.cpp +1 -1
- data/ext/sources/examples/cli/cli.cpp +43 -9
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +139 -67
- data/ext/sources/examples/common-whisper.h +11 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +211 -341
- data/ext/sources/examples/parakeet-cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/parakeet-cli/parakeet-cli.cpp +243 -0
- data/ext/sources/examples/parakeet-quantize/CMakeLists.txt +7 -0
- data/ext/sources/examples/parakeet-quantize/parakeet-quantize.cpp +230 -0
- data/ext/sources/examples/server/server.cpp +199 -163
- data/ext/sources/ggml/CMakeLists.txt +21 -13
- data/ext/sources/ggml/cmake/FindNCCL.cmake +36 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +12 -2
- data/ext/sources/ggml/include/ggml-alloc.h +1 -0
- data/ext/sources/ggml/include/ggml-backend.h +72 -10
- data/ext/sources/ggml/include/ggml-cuda.h +3 -0
- data/ext/sources/ggml/include/ggml-rpc.h +3 -3
- data/ext/sources/ggml/include/ggml.h +101 -9
- data/ext/sources/ggml/include/gguf.h +10 -2
- data/ext/sources/ggml/src/CMakeLists.txt +22 -5
- data/ext/sources/ggml/src/ggml-alloc.c +5 -1
- data/ext/sources/ggml/src/ggml-backend-impl.h +22 -2
- data/ext/sources/ggml/src/ggml-backend-meta.cpp +2263 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +12 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +110 -9
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +672 -257
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +71 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +20 -10
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +211 -30
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +58 -29
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +2 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +16 -16
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +116 -7
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +65 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +151 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +0 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +4279 -1292
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +5 -35
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +0 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +72 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +177 -27
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSMTIME.cmake +32 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +95 -5
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +146 -134
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +88 -70
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +372 -73
- data/ext/sources/ggml/src/ggml-cpu/ops.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +55 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +90 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +3 -16
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1402 -687
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +597 -2766
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime2_kernels.cpp +5768 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_env.cpp +320 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_env.h +55 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +182 -19
- data/ext/sources/ggml/src/ggml-cpu/spacemit/repack.cpp +1795 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/rvv_kernels.cpp +3178 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/rvv_kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_barrier.h +34 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_mem_pool.cpp +760 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_mem_pool.h +32 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/spine_tcm.h +409 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +37 -53
- data/ext/sources/ggml/src/ggml-cpu/vec.h +225 -240
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +17 -7
- data/ext/sources/ggml/src/ggml-cuda/allreduce.cu +971 -0
- data/ext/sources/ggml/src/ggml-cuda/allreduce.cuh +29 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +62 -26
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +44 -18
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +242 -28
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +120 -114
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +45 -21
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +53 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +14 -6
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +22 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +278 -44
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +331 -130
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +12 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +126 -27
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +40 -15
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +18 -9
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +152 -49
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/fwht.cu +101 -0
- data/ext/sources/ggml/src/ggml-cuda/fwht.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +84 -35
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +34 -12
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1069 -609
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +32 -29
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +4 -2
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +242 -195
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +18 -12
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +502 -423
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +19 -12
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +485 -57
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +6 -1
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +36 -10
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +23 -7
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +133 -26
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +5 -1
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +11 -4
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +4 -1
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +14 -6
- data/ext/sources/ggml/src/ggml-cuda/snake.cu +72 -0
- data/ext/sources/ggml/src/ggml-cuda/snake.cuh +8 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +4 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +45 -13
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +40 -18
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +8 -4
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +2 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq192-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq320-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq512-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-bf16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-bf16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-nvfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q1_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +26 -23
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +31 -2
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +80 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +7 -2
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -4
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +2 -1
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +1428 -743
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -7
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +53 -84
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +25 -12
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +165 -184
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +5 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/concat-ops.c +277 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +170 -127
- data/ext/sources/ggml/src/ggml-hexagon/htp/cumsum-ops.c +270 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/diag-ops.c +216 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/fill-ops.c +123 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +125 -97
- data/ext/sources/ggml/src/ggml-hexagon/htp/gated-delta-net-ops.c +1148 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +148 -42
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +2 -2
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +252 -62
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +9 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +87 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c +1878 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c +2066 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-ops.c +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-ops.h +88 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-profile.h +34 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-queue.c +158 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-queue.h +134 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hmx-utils.h +200 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +96 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +182 -57
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +9 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +71 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +27 -10
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +63 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +9 -8
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-flash-attn.h +47 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-log.h +65 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-pow.h +42 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-repl.h +74 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +1 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sin-cos.h +90 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +5 -8
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +529 -815
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2522 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/pad-ops.c +547 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/repeat-ops.c +148 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +291 -95
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +59 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +121 -133
- data/ext/sources/ggml/src/ggml-hexagon/htp/solve-tri-ops.c +267 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +244 -151
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +6 -6
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +719 -45
- data/ext/sources/ggml/src/ggml-hexagon/htp/vtcm-utils.h +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-opnode.h +272 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +3 -1
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +22 -9
- data/ext/sources/ggml/src/ggml-impl.h +6 -1
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +138 -13
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +32 -1
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +164 -28
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +80 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +190 -19
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +2 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +39 -26
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +823 -322
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +5 -6
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +54 -5
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +12248 -5907
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +67 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +59 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +1819 -112
- data/ext/sources/ggml/src/ggml-opencl/kernels/gated_delta_net.cl +249 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl +306 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl +256 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl +258 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q4_k_f32_ns.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl +260 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl +262 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q5_k_f32_ns.cl +288 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_q6_k_f32_ns.cl +267 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_iq4_nl_f32.cl +150 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mat_Ab_Bi_8x4.cl → gemm_noshuffle_q4_0_f32.cl} +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_k_f32.cl +172 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_0_f32.cl +131 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_1_f32.cl +134 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q5_k_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q6_k_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mm_q8_0_f32_8x4.cl → gemm_noshuffle_q8_0_f32.cl} +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_xmem_f16_f32_os8.cl +233 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl +120 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl +123 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q4_k_f32_ns.cl +155 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl +123 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q5_k_f32_ns.cl +160 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_q6_k_f32_ns.cl +141 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_iq4_nl_f32.cl +302 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle_general.cl → gemv_noshuffle_q4_0_f32.cl} +5 -5
- data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle.cl → gemv_noshuffle_q4_0_f32_spec.cl} +5 -5
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_k_f32.cl +318 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_0_f32.cl +291 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_1_f32.cl +294 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q5_k_f32.cl +326 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q6_k_f32.cl +293 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +15 -9
- data/ext/sources/ggml/src/ggml-opencl/kernels/moe_reorder_b.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/moe_sort_by_expert.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_iq4_nl_f32_l4_lm.cl +171 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_k_f32_l4_lm.cl +179 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_0_f32_l4_lm.cl +173 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_1_f32_l4_lm.cl +175 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q5_k_f32_l4_lm.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32.cl +164 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_iq4_nl_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32_flat.cl +196 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_0_f32.cl +241 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_0_f32_flat.cl +243 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_1_f32.cl +243 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_1_f32_flat.cl +247 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32.cl +187 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q5_k_f32_flat.cl +203 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +48 -64
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +15 -5
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +18 -11
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +35 -13
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +264 -192
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +33 -7
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_gelu.cpp +25 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +1 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +1 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/rt_info/weightless_caching_attributes.hpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +27 -3
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +67 -36
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +1 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +101 -44
- data/ext/sources/ggml/src/ggml-openvino/utils.h +23 -3
- data/ext/sources/ggml/src/ggml-opt.cpp +1 -0
- data/ext/sources/ggml/src/ggml-quants.c +289 -114
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +24 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +167 -311
- data/ext/sources/ggml/src/ggml-rpc/transport.cpp +683 -0
- data/ext/sources/ggml/src/ggml-rpc/transport.h +34 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +50 -4
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/common.cpp +74 -2
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +41 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +115 -13
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cumsum.cpp +148 -0
- data/ext/sources/ggml/src/ggml-sycl/cumsum.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +663 -0
- data/ext/sources/ggml/src/ggml-sycl/diag.cpp +67 -0
- data/ext/sources/ggml/src/ggml-sycl/diag.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +586 -6
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1 -90
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +0 -2
- data/ext/sources/ggml/src/ggml-sycl/fattn-buffers.cpp +56 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-buffers.hpp +63 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +7 -5
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +76 -168
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/fill.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fill.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +69 -31
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +79 -3
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +823 -190
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +353 -89
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +5 -3
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1344 -26
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +16 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +27 -27
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +71 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +7 -1
- data/ext/sources/ggml/src/ggml-sycl/solve_tri.cpp +172 -0
- data/ext/sources/ggml/src/ggml-sycl/solve_tri.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +6 -1
- data/ext/sources/ggml/src/ggml-sycl/ssm_scan.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_scan.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +62 -10
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +18 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq512-dv512.cpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/type.hpp +112 -0
- data/ext/sources/ggml/src/ggml-sycl/upscale.cpp +410 -0
- data/ext/sources/ggml/src/ggml-sycl/upscale.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +215 -53
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +4 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +2 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +2 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +1 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +1 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +0 -2
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +2060 -535
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +6 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +146 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +25 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +643 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_nvfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dot_product_funcs.glsl +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2_decode_vector.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +197 -48
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +60 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +115 -113
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +122 -31
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_dequant.glsl +131 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mmq_funcs.glsl +203 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fwht.comp +115 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +125 -64
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +10 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +16 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +76 -54
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +122 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +6 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +1 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +88 -55
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +43 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +159 -125
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +8 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +24 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +5 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +3 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/snake.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +11 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +0 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +79 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +171 -147
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +5 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +2202 -283
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2610 -1403
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +37 -7
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_id.wgsl +64 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +8 -7
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +76 -95
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +19 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/conv2d.wgsl +165 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{cpy.tmpl.wgsl → cpy.wgsl} +25 -50
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +107 -184
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_quant_staging.tmpl +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl +397 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl +84 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl +619 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/gated_delta_net.wgsl +149 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +183 -78
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.wgsl +155 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/im2col.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +655 -495
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id.wgsl +195 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_gather.wgsl +52 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_id_vec.wgsl +154 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +8 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +5 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +80 -409
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl +1432 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_q_acc.tmpl +303 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/quant_inner_loops.tmpl +21 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/quantize_q8.wgsl +173 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_mul.wgsl +152 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{rope.tmpl.wgsl → rope.wgsl} +71 -142
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/row_norm.wgsl +153 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +6 -4
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set.wgsl +109 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +2 -3
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows_quant.wgsl +224 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{soft_max.tmpl.wgsl → soft_max.wgsl} +106 -206
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/solve_tri.wgsl +121 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/ssm_conv.wgsl +65 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/ssm_scan.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +68 -48
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/upscale.wgsl +240 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +18 -14
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +244 -10
- data/ext/sources/ggml/src/ggml.c +110 -28
- data/ext/sources/ggml/src/gguf.cpp +173 -28
- data/ext/sources/include/parakeet.h +342 -0
- data/ext/sources/include/whisper.h +10 -0
- data/ext/sources/media/matmul.png +0 -0
- data/ext/sources/src/CMakeLists.txt +23 -0
- data/ext/sources/src/parakeet-arch.h +188 -0
- data/ext/sources/src/parakeet.cpp +3838 -0
- data/ext/sources/src/whisper.cpp +56 -12
- data/extsources.rb +26 -10
- data/lib/whisper/log_settable.rb +36 -0
- data/lib/whisper/model/uri.rb +13 -1
- data/lib/whisper/output.rb +74 -0
- data/sig/whisper.rbs +411 -62
- data/test/helper.rb +2 -0
- data/test/jfk_reader/jfk_reader.c +50 -7
- data/test/test_callback.rb +1 -0
- data/test/test_package.rb +6 -5
- data/test/test_parakeet.rb +28 -0
- data/test/test_parakeet_callback.rb +107 -0
- data/test/test_parakeet_context.rb +116 -0
- data/test/test_parakeet_context_params.rb +24 -0
- data/test/test_parakeet_model.rb +21 -0
- data/test/test_parakeet_params.rb +78 -0
- data/test/test_parakeet_segment.rb +42 -0
- data/test/test_parakeet_token.rb +73 -0
- data/test/test_params.rb +2 -0
- data/test/test_vad_segment.rb +1 -1
- data/test/test_whisper.rb +24 -6
- data/whispercpp.gemspec +2 -2
- metadata +215 -281
- data/ext/sources/bindings/javascript/CMakeLists.txt +0 -41
- data/ext/sources/bindings/javascript/emscripten.cpp +0 -93
- data/ext/sources/bindings/javascript/libwhisper.worker.js +0 -1
- data/ext/sources/bindings/javascript/package.json +0 -26
- data/ext/sources/bindings/javascript/whisper.js +0 -19
- data/ext/sources/examples/addon.node/CMakeLists.txt +0 -31
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +0 -133
- data/ext/sources/examples/addon.node/addon.cpp +0 -557
- data/ext/sources/examples/addon.node/index.js +0 -59
- data/ext/sources/examples/addon.node/package.json +0 -16
- data/ext/sources/examples/addon.node/vad-example.js +0 -132
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +0 -49
- data/ext/sources/examples/bench.wasm/emscripten.cpp +0 -87
- data/ext/sources/examples/bench.wasm/index-tmpl.html +0 -285
- data/ext/sources/examples/coi-serviceworker.js +0 -146
- data/ext/sources/examples/command/CMakeLists.txt +0 -10
- data/ext/sources/examples/command/command.cpp +0 -802
- data/ext/sources/examples/command/commands.txt +0 -9
- data/ext/sources/examples/command.wasm/CMakeLists.txt +0 -50
- data/ext/sources/examples/command.wasm/emscripten.cpp +0 -327
- data/ext/sources/examples/command.wasm/index-tmpl.html +0 -415
- data/ext/sources/examples/generate-karaoke.sh +0 -57
- data/ext/sources/examples/helpers.js +0 -191
- data/ext/sources/examples/livestream.sh +0 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +0 -10
- data/ext/sources/examples/lsp/lsp.cpp +0 -471
- data/ext/sources/examples/lsp/whisper.vim +0 -362
- data/ext/sources/examples/python/test_whisper_processor.py +0 -7
- data/ext/sources/examples/python/whisper_processor.py +0 -54
- data/ext/sources/examples/server/bench.js +0 -29
- data/ext/sources/examples/server.py +0 -120
- data/ext/sources/examples/stream/CMakeLists.txt +0 -10
- data/ext/sources/examples/stream/stream.cpp +0 -437
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +0 -49
- data/ext/sources/examples/stream.wasm/emscripten.cpp +0 -216
- data/ext/sources/examples/stream.wasm/index-tmpl.html +0 -491
- data/ext/sources/examples/sycl/CMakeLists.txt +0 -9
- data/ext/sources/examples/sycl/build.sh +0 -22
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +0 -11
- data/ext/sources/examples/sycl/run-whisper.sh +0 -17
- data/ext/sources/examples/talk-llama/CMakeLists.txt +0 -48
- data/ext/sources/examples/talk-llama/eleven-labs.py +0 -80
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +0 -488
- data/ext/sources/examples/talk-llama/llama-adapter.h +0 -89
- data/ext/sources/examples/talk-llama/llama-arch.cpp +0 -2877
- data/ext/sources/examples/talk-llama/llama-arch.h +0 -628
- data/ext/sources/examples/talk-llama/llama-batch.cpp +0 -919
- data/ext/sources/examples/talk-llama/llama-batch.h +0 -173
- data/ext/sources/examples/talk-llama/llama-chat.cpp +0 -896
- data/ext/sources/examples/talk-llama/llama-chat.h +0 -71
- data/ext/sources/examples/talk-llama/llama-context.cpp +0 -3633
- data/ext/sources/examples/talk-llama/llama-context.h +0 -359
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +0 -5
- data/ext/sources/examples/talk-llama/llama-cparams.h +0 -47
- data/ext/sources/examples/talk-llama/llama-ext.h +0 -12
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +0 -1464
- data/ext/sources/examples/talk-llama/llama-grammar.h +0 -194
- data/ext/sources/examples/talk-llama/llama-graph.cpp +0 -2735
- data/ext/sources/examples/talk-llama/llama-graph.h +0 -1031
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +0 -258
- data/ext/sources/examples/talk-llama/llama-hparams.h +0 -353
- data/ext/sources/examples/talk-llama/llama-impl.cpp +0 -171
- data/ext/sources/examples/talk-llama/llama-impl.h +0 -75
- data/ext/sources/examples/talk-llama/llama-io.cpp +0 -15
- data/ext/sources/examples/talk-llama/llama-io.h +0 -35
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +0 -330
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +0 -137
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2285
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +0 -389
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +0 -533
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +0 -275
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +0 -140
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +0 -268
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +0 -139
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +0 -1165
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +0 -182
- data/ext/sources/examples/talk-llama/llama-memory.cpp +0 -59
- data/ext/sources/examples/talk-llama/llama-memory.h +0 -122
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +0 -752
- data/ext/sources/examples/talk-llama/llama-mmap.h +0 -73
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +0 -1655
- data/ext/sources/examples/talk-llama/llama-model-loader.h +0 -206
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +0 -299
- data/ext/sources/examples/talk-llama/llama-model-saver.h +0 -40
- data/ext/sources/examples/talk-llama/llama-model.cpp +0 -9056
- data/ext/sources/examples/talk-llama/llama-model.h +0 -597
- data/ext/sources/examples/talk-llama/llama-quant.cpp +0 -1304
- data/ext/sources/examples/talk-llama/llama-quant.h +0 -1
- data/ext/sources/examples/talk-llama/llama-sampler.cpp +0 -3885
- data/ext/sources/examples/talk-llama/llama-sampler.h +0 -42
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +0 -3970
- data/ext/sources/examples/talk-llama/llama-vocab.h +0 -187
- data/ext/sources/examples/talk-llama/llama.cpp +0 -1194
- data/ext/sources/examples/talk-llama/llama.h +0 -1573
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +0 -190
- data/ext/sources/examples/talk-llama/models/apertus.cpp +0 -125
- data/ext/sources/examples/talk-llama/models/arcee.cpp +0 -135
- data/ext/sources/examples/talk-llama/models/arctic.cpp +0 -137
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +0 -143
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +0 -133
- data/ext/sources/examples/talk-llama/models/bert.cpp +0 -184
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +0 -145
- data/ext/sources/examples/talk-llama/models/bloom.cpp +0 -101
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +0 -178
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +0 -111
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +0 -102
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +0 -134
- data/ext/sources/examples/talk-llama/models/command-r.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/deci.cpp +0 -135
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +0 -142
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +0 -262
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +0 -445
- data/ext/sources/examples/talk-llama/models/dots1.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/dream.cpp +0 -105
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +0 -148
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +0 -110
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +0 -97
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +0 -145
- data/ext/sources/examples/talk-llama/models/exaone.cpp +0 -114
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +0 -111
- data/ext/sources/examples/talk-llama/models/falcon.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +0 -116
- data/ext/sources/examples/talk-llama/models/gemma.cpp +0 -112
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +0 -155
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +0 -384
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +0 -170
- data/ext/sources/examples/talk-llama/models/glm4.cpp +0 -157
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +0 -105
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +0 -144
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +0 -195
- data/ext/sources/examples/talk-llama/models/granite.cpp +0 -210
- data/ext/sources/examples/talk-llama/models/grok.cpp +0 -159
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +0 -139
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +0 -153
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/jais.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/jais2.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/jamba.cpp +0 -106
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +0 -381
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +0 -196
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/llada.cpp +0 -99
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +0 -178
- data/ext/sources/examples/talk-llama/models/llama.cpp +0 -175
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +0 -117
- data/ext/sources/examples/talk-llama/models/mamba-base.cpp +0 -289
- data/ext/sources/examples/talk-llama/models/mamba.cpp +0 -54
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +0 -129
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +0 -200
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +0 -160
- data/ext/sources/examples/talk-llama/models/models.h +0 -704
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +0 -109
- data/ext/sources/examples/talk-llama/models/mpt.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +0 -162
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +0 -104
- data/ext/sources/examples/talk-llama/models/olmo.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +0 -150
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +0 -127
- data/ext/sources/examples/talk-llama/models/openelm.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/orion.cpp +0 -123
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +0 -122
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/phi2.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/phi3.cpp +0 -152
- data/ext/sources/examples/talk-llama/models/plamo.cpp +0 -110
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +0 -320
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/plm.cpp +0 -169
- data/ext/sources/examples/talk-llama/models/qwen.cpp +0 -108
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +0 -151
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +0 -117
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +0 -120
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +0 -381
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +0 -422
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +0 -131
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +0 -525
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +0 -140
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +0 -132
- data/ext/sources/examples/talk-llama/models/refact.cpp +0 -94
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +0 -164
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +0 -94
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +0 -86
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +0 -137
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +0 -90
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +0 -124
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +0 -126
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +0 -128
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +0 -146
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +0 -100
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +0 -121
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +0 -165
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +0 -166
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +0 -96
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +0 -149
- data/ext/sources/examples/talk-llama/models/xverse.cpp +0 -108
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +0 -23
- data/ext/sources/examples/talk-llama/speak +0 -40
- data/ext/sources/examples/talk-llama/speak.bat +0 -1
- data/ext/sources/examples/talk-llama/speak.ps1 +0 -14
- data/ext/sources/examples/talk-llama/talk-llama.cpp +0 -813
- data/ext/sources/examples/talk-llama/unicode-data.cpp +0 -7034
- data/ext/sources/examples/talk-llama/unicode-data.h +0 -20
- data/ext/sources/examples/talk-llama/unicode.cpp +0 -1103
- data/ext/sources/examples/talk-llama/unicode.h +0 -111
- data/ext/sources/examples/wchess/CMakeLists.txt +0 -10
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +0 -19
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +0 -803
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +0 -33
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +0 -193
- data/ext/sources/examples/wchess/libwchess/WChess.h +0 -63
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +0 -117
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +0 -8
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +0 -253
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +0 -50
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +0 -118
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +0 -659
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +0 -99
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +0 -155
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +0 -153
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +0 -26
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +0 -123
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +0 -17
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +0 -333
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +0 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +0 -182
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +0 -323
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +0 -718
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +0 -123
- data/ext/sources/tests/CMakeLists.txt +0 -112
- data/ext/sources/tests/earnings21/eval.mk +0 -58
- data/ext/sources/tests/earnings21/eval.py +0 -68
- data/ext/sources/tests/earnings21/normalizers/__init__.py +0 -2
- data/ext/sources/tests/earnings21/normalizers/basic.py +0 -80
- data/ext/sources/tests/earnings21/normalizers/english.json +0 -1741
- data/ext/sources/tests/earnings21/normalizers/english.py +0 -550
- data/ext/sources/tests/earnings21/requirements.txt +0 -6
- data/ext/sources/tests/en-0-ref.txt +0 -1
- data/ext/sources/tests/en-1-ref.txt +0 -1
- data/ext/sources/tests/en-2-ref.txt +0 -1
- data/ext/sources/tests/es-0-ref.txt +0 -1
- data/ext/sources/tests/librispeech/eval.mk +0 -39
- data/ext/sources/tests/librispeech/eval.py +0 -47
- data/ext/sources/tests/librispeech/normalizers/__init__.py +0 -2
- data/ext/sources/tests/librispeech/normalizers/basic.py +0 -80
- data/ext/sources/tests/librispeech/normalizers/english.json +0 -1741
- data/ext/sources/tests/librispeech/normalizers/english.py +0 -550
- data/ext/sources/tests/librispeech/requirements.txt +0 -6
- data/ext/sources/tests/run-tests.sh +0 -130
- data/ext/sources/tests/test-c.c +0 -3
- data/ext/sources/tests/test-vad-full.cpp +0 -56
- data/ext/sources/tests/test-vad.cpp +0 -83
- data/ext/sources/tests/test-whisper.js +0 -58
- data/lib/whisper/context.rb +0 -15
- data/lib/whisper/segment.rb +0 -58
- /data/ext/sources/ggml/src/ggml-opencl/kernels/{gemv_noshuffle_general_q8_0_f32.cl → gemv_noshuffle_q8_0_f32.cl} +0 -0
|
@@ -2,30 +2,17 @@
|
|
|
2
2
|
#include "ggml-impl.h"
|
|
3
3
|
#include "ggml-backend-impl.h"
|
|
4
4
|
#include "ggml-cpp.h"
|
|
5
|
+
#include "transport.h"
|
|
5
6
|
|
|
7
|
+
#include <array>
|
|
6
8
|
#include <cinttypes>
|
|
9
|
+
#include <optional>
|
|
7
10
|
#include <string>
|
|
8
11
|
#include <vector>
|
|
9
12
|
#include <memory>
|
|
10
13
|
#include <mutex>
|
|
11
14
|
#include <unordered_map>
|
|
12
15
|
#include <unordered_set>
|
|
13
|
-
#ifdef _WIN32
|
|
14
|
-
# define WIN32_LEAN_AND_MEAN
|
|
15
|
-
# ifndef NOMINMAX
|
|
16
|
-
# define NOMINMAX
|
|
17
|
-
# endif
|
|
18
|
-
# include <windows.h>
|
|
19
|
-
# include <winsock2.h>
|
|
20
|
-
#else
|
|
21
|
-
# include <arpa/inet.h>
|
|
22
|
-
# include <sys/socket.h>
|
|
23
|
-
# include <sys/types.h>
|
|
24
|
-
# include <netinet/in.h>
|
|
25
|
-
# include <netinet/tcp.h>
|
|
26
|
-
# include <netdb.h>
|
|
27
|
-
# include <unistd.h>
|
|
28
|
-
#endif
|
|
29
16
|
#include <cstring>
|
|
30
17
|
#include <fstream>
|
|
31
18
|
#include <filesystem>
|
|
@@ -39,29 +26,6 @@ static const char * RPC_DEBUG = std::getenv("GGML_RPC_DEBUG");
|
|
|
39
26
|
|
|
40
27
|
namespace fs = std::filesystem;
|
|
41
28
|
|
|
42
|
-
static constexpr size_t MAX_CHUNK_SIZE = 1024ull * 1024ull * 1024ull; // 1 GiB
|
|
43
|
-
|
|
44
|
-
#ifdef _WIN32
|
|
45
|
-
typedef SOCKET sockfd_t;
|
|
46
|
-
using ssize_t = __int64;
|
|
47
|
-
#else
|
|
48
|
-
typedef int sockfd_t;
|
|
49
|
-
#endif
|
|
50
|
-
|
|
51
|
-
// cross-platform socket
|
|
52
|
-
struct socket_t {
|
|
53
|
-
sockfd_t fd;
|
|
54
|
-
socket_t(sockfd_t fd) : fd(fd) {}
|
|
55
|
-
~socket_t() {
|
|
56
|
-
LOG_DBG("[%s] closing socket %d\n", __func__, this->fd);
|
|
57
|
-
#ifdef _WIN32
|
|
58
|
-
closesocket(this->fd);
|
|
59
|
-
#else
|
|
60
|
-
close(this->fd);
|
|
61
|
-
#endif
|
|
62
|
-
}
|
|
63
|
-
};
|
|
64
|
-
|
|
65
29
|
// macro for nicer error messages on server crash
|
|
66
30
|
#define RPC_STATUS_ASSERT(x) if (!(x)) GGML_ABORT("Remote RPC server crashed or returned malformed response")
|
|
67
31
|
|
|
@@ -115,10 +79,16 @@ static_assert(RPC_CMD_HELLO == 14, "RPC_CMD_HELLO must be always 14");
|
|
|
115
79
|
// Try RPC_CMD_SET_TENSOR_HASH first when data size is larger than this threshold
|
|
116
80
|
const size_t HASH_THRESHOLD = 10 * 1024 * 1024;
|
|
117
81
|
|
|
82
|
+
struct rpc_msg_hello_req {
|
|
83
|
+
uint8_t conn_caps[RPC_CONN_CAPS_SIZE];
|
|
84
|
+
};
|
|
85
|
+
|
|
118
86
|
struct rpc_msg_hello_rsp {
|
|
119
87
|
uint8_t major;
|
|
120
88
|
uint8_t minor;
|
|
121
89
|
uint8_t patch;
|
|
90
|
+
uint8_t padding;
|
|
91
|
+
uint8_t conn_caps[RPC_CONN_CAPS_SIZE];
|
|
122
92
|
};
|
|
123
93
|
|
|
124
94
|
struct rpc_msg_device_count_rsp {
|
|
@@ -229,6 +199,14 @@ static ggml_guid_t ggml_backend_rpc_guid() {
|
|
|
229
199
|
return &guid;
|
|
230
200
|
}
|
|
231
201
|
|
|
202
|
+
struct ggml_backend_rpc_device_context {
|
|
203
|
+
std::string endpoint;
|
|
204
|
+
uint32_t device;
|
|
205
|
+
std::string name;
|
|
206
|
+
std::string description;
|
|
207
|
+
uint64_t last_graph_uid;
|
|
208
|
+
};
|
|
209
|
+
|
|
232
210
|
struct ggml_backend_rpc_buffer_type_context {
|
|
233
211
|
std::string endpoint;
|
|
234
212
|
uint32_t device;
|
|
@@ -237,35 +215,10 @@ struct ggml_backend_rpc_buffer_type_context {
|
|
|
237
215
|
size_t max_size;
|
|
238
216
|
};
|
|
239
217
|
|
|
240
|
-
struct graph_cache {
|
|
241
|
-
|
|
242
|
-
bool is_cached(const ggml_cgraph * cgraph) {
|
|
243
|
-
if ((int)last_graph.size() != cgraph->n_nodes) {
|
|
244
|
-
return false;
|
|
245
|
-
}
|
|
246
|
-
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
247
|
-
if (memcmp(&last_graph[i], cgraph->nodes[i], sizeof(ggml_tensor)) != 0) {
|
|
248
|
-
return false;
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
return true;
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
void add(const ggml_cgraph * cgraph) {
|
|
255
|
-
last_graph.resize(cgraph->n_nodes);
|
|
256
|
-
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
257
|
-
memcpy(&last_graph[i], cgraph->nodes[i], sizeof(ggml_tensor));
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
std::vector<ggml_tensor> last_graph;
|
|
262
|
-
};
|
|
263
|
-
|
|
264
218
|
struct ggml_backend_rpc_context {
|
|
265
219
|
std::string endpoint;
|
|
266
220
|
uint32_t device;
|
|
267
221
|
std::string name;
|
|
268
|
-
graph_cache gc;
|
|
269
222
|
};
|
|
270
223
|
|
|
271
224
|
struct ggml_backend_rpc_buffer_context {
|
|
@@ -288,153 +241,27 @@ static uint64_t fnv_hash(const uint8_t * data, size_t len) {
|
|
|
288
241
|
return hash;
|
|
289
242
|
}
|
|
290
243
|
|
|
291
|
-
static
|
|
292
|
-
|
|
293
|
-
if (fd == INVALID_SOCKET) {
|
|
294
|
-
return nullptr;
|
|
295
|
-
}
|
|
296
|
-
#else
|
|
297
|
-
if (fd < 0) {
|
|
298
|
-
return nullptr;
|
|
299
|
-
}
|
|
300
|
-
#endif
|
|
301
|
-
return std::make_shared<socket_t>(fd);
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
static bool set_no_delay(sockfd_t sockfd) {
|
|
305
|
-
int flag = 1;
|
|
306
|
-
// set TCP_NODELAY to disable Nagle's algorithm
|
|
307
|
-
int ret = setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, (char *)&flag, sizeof(int));
|
|
308
|
-
return ret == 0;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
static bool set_reuse_addr(sockfd_t sockfd) {
|
|
312
|
-
int flag = 1;
|
|
313
|
-
int ret = setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof(int));
|
|
314
|
-
return ret == 0;
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
static std::shared_ptr<socket_t> socket_connect(const char * host, int port) {
|
|
318
|
-
struct sockaddr_in addr;
|
|
319
|
-
auto sockfd = socket(AF_INET, SOCK_STREAM, 0);
|
|
320
|
-
auto sock_ptr = make_socket(sockfd);
|
|
321
|
-
if (sock_ptr == nullptr) {
|
|
322
|
-
return nullptr;
|
|
323
|
-
}
|
|
324
|
-
if (!set_no_delay(sockfd)) {
|
|
325
|
-
GGML_LOG_ERROR("Failed to set TCP_NODELAY\n");
|
|
326
|
-
return nullptr;
|
|
327
|
-
}
|
|
328
|
-
addr.sin_family = AF_INET;
|
|
329
|
-
addr.sin_port = htons(port);
|
|
330
|
-
struct hostent * server = gethostbyname(host);
|
|
331
|
-
if (server == NULL) {
|
|
332
|
-
GGML_LOG_ERROR("Cannot resolve host '%s'\n", host);
|
|
333
|
-
return nullptr;
|
|
334
|
-
}
|
|
335
|
-
memcpy(&addr.sin_addr.s_addr, server->h_addr, server->h_length);
|
|
336
|
-
if (connect(sock_ptr->fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
|
337
|
-
return nullptr;
|
|
338
|
-
}
|
|
339
|
-
return sock_ptr;
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
static std::shared_ptr<socket_t> socket_accept(sockfd_t srv_sockfd) {
|
|
343
|
-
auto client_socket_fd = accept(srv_sockfd, NULL, NULL);
|
|
344
|
-
auto client_socket = make_socket(client_socket_fd);
|
|
345
|
-
if (client_socket == nullptr) {
|
|
346
|
-
return nullptr;
|
|
347
|
-
}
|
|
348
|
-
if (!set_no_delay(client_socket_fd)) {
|
|
349
|
-
GGML_LOG_ERROR("Failed to set TCP_NODELAY\n");
|
|
350
|
-
return nullptr;
|
|
351
|
-
}
|
|
352
|
-
return client_socket;
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
static std::shared_ptr<socket_t> create_server_socket(const char * host, int port) {
|
|
356
|
-
auto sockfd = socket(AF_INET, SOCK_STREAM, 0);
|
|
357
|
-
auto sock = make_socket(sockfd);
|
|
358
|
-
if (sock == nullptr) {
|
|
359
|
-
return nullptr;
|
|
360
|
-
}
|
|
361
|
-
if (!set_reuse_addr(sockfd)) {
|
|
362
|
-
GGML_LOG_ERROR("Failed to set SO_REUSEADDR\n");
|
|
363
|
-
return nullptr;
|
|
364
|
-
}
|
|
365
|
-
if (inet_addr(host) == INADDR_NONE) {
|
|
366
|
-
GGML_LOG_ERROR("Invalid host address: %s\n", host);
|
|
367
|
-
return nullptr;
|
|
368
|
-
}
|
|
369
|
-
struct sockaddr_in serv_addr;
|
|
370
|
-
serv_addr.sin_family = AF_INET;
|
|
371
|
-
serv_addr.sin_addr.s_addr = inet_addr(host);
|
|
372
|
-
serv_addr.sin_port = htons(port);
|
|
373
|
-
|
|
374
|
-
if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
|
|
375
|
-
return nullptr;
|
|
376
|
-
}
|
|
377
|
-
if (listen(sockfd, 1) < 0) {
|
|
378
|
-
return nullptr;
|
|
379
|
-
}
|
|
380
|
-
return sock;
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
static bool send_data(sockfd_t sockfd, const void * data, size_t size) {
|
|
384
|
-
size_t bytes_sent = 0;
|
|
385
|
-
while (bytes_sent < size) {
|
|
386
|
-
size_t size_to_send = std::min(size - bytes_sent, MAX_CHUNK_SIZE);
|
|
387
|
-
ssize_t n = send(sockfd, (const char *)data + bytes_sent, size_to_send, 0);
|
|
388
|
-
if (n < 0) {
|
|
389
|
-
GGML_LOG_ERROR("send failed (bytes_sent=%zu, size_to_send=%zu)\n",
|
|
390
|
-
bytes_sent, size_to_send);
|
|
391
|
-
return false;
|
|
392
|
-
}
|
|
393
|
-
bytes_sent += (size_t)n;
|
|
394
|
-
}
|
|
395
|
-
return true;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
static bool recv_data(sockfd_t sockfd, void * data, size_t size) {
|
|
399
|
-
size_t bytes_recv = 0;
|
|
400
|
-
while (bytes_recv < size) {
|
|
401
|
-
size_t size_to_recv = std::min(size - bytes_recv, MAX_CHUNK_SIZE);
|
|
402
|
-
ssize_t n = recv(sockfd, (char *)data + bytes_recv, size_to_recv, 0);
|
|
403
|
-
if (n < 0) {
|
|
404
|
-
GGML_LOG_ERROR("recv failed (bytes_recv=%zu, size_to_recv=%zu)\n",
|
|
405
|
-
bytes_recv, size_to_recv);
|
|
406
|
-
return false;
|
|
407
|
-
}
|
|
408
|
-
if (n == 0) {
|
|
409
|
-
LOG_DBG("recv returned 0 (peer closed?)\n");
|
|
410
|
-
return false;
|
|
411
|
-
}
|
|
412
|
-
bytes_recv += (size_t)n;
|
|
413
|
-
}
|
|
414
|
-
return true;
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
static bool send_msg(sockfd_t sockfd, const void * msg, size_t msg_size) {
|
|
418
|
-
if (!send_data(sockfd, &msg_size, sizeof(msg_size))) {
|
|
244
|
+
static bool send_msg(socket_ptr sock, const void * msg, size_t msg_size) {
|
|
245
|
+
if (!sock->send_data(&msg_size, sizeof(msg_size))) {
|
|
419
246
|
return false;
|
|
420
247
|
}
|
|
421
|
-
return send_data(
|
|
248
|
+
return sock->send_data(msg, msg_size);
|
|
422
249
|
}
|
|
423
250
|
|
|
424
|
-
static bool recv_msg(
|
|
251
|
+
static bool recv_msg(socket_ptr sock, void * msg, size_t msg_size) {
|
|
425
252
|
uint64_t size;
|
|
426
|
-
if (!recv_data(
|
|
253
|
+
if (!sock->recv_data(&size, sizeof(size))) {
|
|
427
254
|
return false;
|
|
428
255
|
}
|
|
429
256
|
if (size != msg_size) {
|
|
430
257
|
return false;
|
|
431
258
|
}
|
|
432
|
-
return recv_data(
|
|
259
|
+
return sock->recv_data(msg, msg_size);
|
|
433
260
|
}
|
|
434
261
|
|
|
435
|
-
static bool recv_msg(
|
|
262
|
+
static bool recv_msg(socket_ptr sock, std::vector<uint8_t> & input) {
|
|
436
263
|
uint64_t size;
|
|
437
|
-
if (!recv_data(
|
|
264
|
+
if (!sock->recv_data(&size, sizeof(size))) {
|
|
438
265
|
return false;
|
|
439
266
|
}
|
|
440
267
|
try {
|
|
@@ -443,7 +270,7 @@ static bool recv_msg(sockfd_t sockfd, std::vector<uint8_t> & input) {
|
|
|
443
270
|
GGML_LOG_ERROR("Failed to allocate input buffer of size %" PRIu64 "\n", size);
|
|
444
271
|
return false;
|
|
445
272
|
}
|
|
446
|
-
return recv_data(
|
|
273
|
+
return sock->recv_data(input.data(), size);
|
|
447
274
|
}
|
|
448
275
|
|
|
449
276
|
static bool parse_endpoint(const std::string & endpoint, std::string & host, int & port) {
|
|
@@ -452,21 +279,25 @@ static bool parse_endpoint(const std::string & endpoint, std::string & host, int
|
|
|
452
279
|
return false;
|
|
453
280
|
}
|
|
454
281
|
host = endpoint.substr(0, pos);
|
|
455
|
-
|
|
282
|
+
try {
|
|
283
|
+
port = std::stoi(endpoint.substr(pos + 1));
|
|
284
|
+
} catch (...) {
|
|
285
|
+
return false;
|
|
286
|
+
}
|
|
456
287
|
return true;
|
|
457
288
|
}
|
|
458
289
|
|
|
459
290
|
// RPC request : | rpc_cmd (1 byte) | request_size (8 bytes) | request_data (request_size bytes) |
|
|
460
291
|
// No response
|
|
461
|
-
static bool send_rpc_cmd(
|
|
292
|
+
static bool send_rpc_cmd(socket_ptr sock, enum rpc_cmd cmd, const void * input, size_t input_size) {
|
|
462
293
|
uint8_t cmd_byte = cmd;
|
|
463
|
-
if (!send_data(
|
|
294
|
+
if (!sock->send_data(&cmd_byte, sizeof(cmd_byte))) {
|
|
464
295
|
return false;
|
|
465
296
|
}
|
|
466
|
-
if (!send_data(
|
|
297
|
+
if (!sock->send_data(&input_size, sizeof(input_size))) {
|
|
467
298
|
return false;
|
|
468
299
|
}
|
|
469
|
-
if (!send_data(
|
|
300
|
+
if (!sock->send_data(input, input_size)) {
|
|
470
301
|
return false;
|
|
471
302
|
}
|
|
472
303
|
return true;
|
|
@@ -474,20 +305,18 @@ static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cm
|
|
|
474
305
|
|
|
475
306
|
// RPC request : | rpc_cmd (1 byte) | request_size (8 bytes) | request_data (request_size bytes) |
|
|
476
307
|
// RPC response: | response_size (8 bytes) | response_data (response_size bytes) |
|
|
477
|
-
static bool send_rpc_cmd(
|
|
308
|
+
static bool send_rpc_cmd(socket_ptr sock, enum rpc_cmd cmd, const void * input, size_t input_size, void * output, size_t output_size) {
|
|
478
309
|
if (!send_rpc_cmd(sock, cmd, input, input_size)) {
|
|
479
310
|
return false;
|
|
480
311
|
}
|
|
481
|
-
// TODO: currently the output_size is always known, do we need support for commands with variable output size?
|
|
482
|
-
// even if we do, we can skip sending output_size from the server for commands with known output size
|
|
483
312
|
uint64_t out_size;
|
|
484
|
-
if (!recv_data(
|
|
313
|
+
if (!sock->recv_data(&out_size, sizeof(out_size))) {
|
|
485
314
|
return false;
|
|
486
315
|
}
|
|
487
316
|
if (out_size != output_size) {
|
|
488
317
|
return false;
|
|
489
318
|
}
|
|
490
|
-
if (!recv_data(
|
|
319
|
+
if (!sock->recv_data(output, output_size)) {
|
|
491
320
|
return false;
|
|
492
321
|
}
|
|
493
322
|
return true;
|
|
@@ -495,17 +324,25 @@ static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cm
|
|
|
495
324
|
|
|
496
325
|
// RPC client-side implementation
|
|
497
326
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
327
|
+
// Performs HELLO handshake with transport auto-negotiation.
|
|
328
|
+
// Advertises local capabilities via conn_caps; if the server responds with
|
|
329
|
+
// matching capabilities, the socket is upgraded transparently.
|
|
330
|
+
static bool negotiate_hello(const std::shared_ptr<socket_t> & sock) {
|
|
331
|
+
rpc_msg_hello_req request = {};
|
|
332
|
+
rpc_msg_hello_rsp response = {};
|
|
333
|
+
|
|
334
|
+
sock->get_caps(request.conn_caps);
|
|
335
|
+
|
|
336
|
+
bool status = send_rpc_cmd(sock, RPC_CMD_HELLO, &request, sizeof(request), &response, sizeof(response));
|
|
501
337
|
RPC_STATUS_ASSERT(status);
|
|
338
|
+
|
|
502
339
|
if (response.major != RPC_PROTO_MAJOR_VERSION || response.minor > RPC_PROTO_MINOR_VERSION) {
|
|
503
|
-
GGML_LOG_ERROR("RPC server version mismatch: %d.%d.%d\n",
|
|
340
|
+
GGML_LOG_ERROR("RPC server version mismatch: %d.%d.%d\n",
|
|
341
|
+
response.major, response.minor, response.patch);
|
|
504
342
|
return false;
|
|
505
343
|
}
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
}
|
|
344
|
+
|
|
345
|
+
sock->update_caps(response.conn_caps);
|
|
509
346
|
return true;
|
|
510
347
|
}
|
|
511
348
|
|
|
@@ -513,7 +350,6 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
|
|
|
513
350
|
static std::mutex mutex;
|
|
514
351
|
std::lock_guard<std::mutex> lock(mutex);
|
|
515
352
|
static std::unordered_map<std::string, std::weak_ptr<socket_t>> sockets;
|
|
516
|
-
static bool initialized = false;
|
|
517
353
|
|
|
518
354
|
auto it = sockets.find(endpoint);
|
|
519
355
|
if (it != sockets.end()) {
|
|
@@ -527,26 +363,18 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
|
|
|
527
363
|
GGML_LOG_ERROR("Failed to parse endpoint: %s\n", endpoint.c_str());
|
|
528
364
|
return nullptr;
|
|
529
365
|
}
|
|
530
|
-
|
|
531
|
-
if (!
|
|
532
|
-
|
|
533
|
-
int res = WSAStartup(MAKEWORD(2, 2), &wsaData);
|
|
534
|
-
if (res != 0) {
|
|
535
|
-
return nullptr;
|
|
536
|
-
}
|
|
537
|
-
initialized = true;
|
|
366
|
+
|
|
367
|
+
if (!rpc_transport_init()) {
|
|
368
|
+
return nullptr;
|
|
538
369
|
}
|
|
539
|
-
|
|
540
|
-
GGML_UNUSED(initialized);
|
|
541
|
-
#endif
|
|
542
|
-
auto sock = socket_connect(host.c_str(), port);
|
|
370
|
+
auto sock = socket_t::connect(host.c_str(), port);
|
|
543
371
|
if (sock == nullptr) {
|
|
544
372
|
return nullptr;
|
|
545
373
|
}
|
|
546
|
-
if (!
|
|
374
|
+
if (!negotiate_hello(sock)) {
|
|
547
375
|
return nullptr;
|
|
548
376
|
}
|
|
549
|
-
LOG_DBG("[%s] connected to %s
|
|
377
|
+
LOG_DBG("[%s] connected to %s\n", __func__, endpoint.c_str());
|
|
550
378
|
sockets[endpoint] = sock;
|
|
551
379
|
return sock;
|
|
552
380
|
}
|
|
@@ -589,8 +417,10 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
|
|
|
589
417
|
ggml_backend_buffer_t buffer = tensor->buffer;
|
|
590
418
|
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
|
|
591
419
|
result.buffer = ctx != nullptr ? ctx->remote_ptr : 0;
|
|
420
|
+
result.data = reinterpret_cast<uint64_t>(tensor->data);
|
|
592
421
|
} else {
|
|
593
422
|
result.buffer = 0;
|
|
423
|
+
result.data = 0;
|
|
594
424
|
}
|
|
595
425
|
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
|
|
596
426
|
result.ne[i] = tensor->ne[i];
|
|
@@ -606,7 +436,6 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
|
|
|
606
436
|
}
|
|
607
437
|
result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
|
|
608
438
|
result.view_offs = tensor->view_offs;
|
|
609
|
-
result.data = reinterpret_cast<uint64_t>(tensor->data);
|
|
610
439
|
|
|
611
440
|
// Avoid sending uninitialized data over the wire
|
|
612
441
|
memset(result.name, 0, sizeof(result.name));
|
|
@@ -705,6 +534,8 @@ static ggml_backend_buffer_i ggml_backend_rpc_buffer_interface = {
|
|
|
705
534
|
/* .memset_tensor = */ NULL,
|
|
706
535
|
/* .set_tensor = */ ggml_backend_rpc_buffer_set_tensor,
|
|
707
536
|
/* .get_tensor = */ ggml_backend_rpc_buffer_get_tensor,
|
|
537
|
+
/* .set_tensor_2d = */ NULL,
|
|
538
|
+
/* .get_tensor_2d = */ NULL,
|
|
708
539
|
/* .cpy_tensor = */ ggml_backend_rpc_buffer_cpy_tensor,
|
|
709
540
|
/* .clear = */ ggml_backend_rpc_buffer_clear,
|
|
710
541
|
/* .reset = */ NULL,
|
|
@@ -867,9 +698,11 @@ static void serialize_graph(uint32_t device, const ggml_cgraph * cgraph, std::ve
|
|
|
867
698
|
|
|
868
699
|
static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
869
700
|
ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context;
|
|
701
|
+
ggml_backend_dev_t rpc_dev = ggml_backend_get_device(backend);
|
|
702
|
+
ggml_backend_rpc_device_context * rpc_dev_ctx = (ggml_backend_rpc_device_context *)rpc_dev->context;
|
|
870
703
|
|
|
871
704
|
GGML_ASSERT(cgraph->n_nodes > 0);
|
|
872
|
-
bool reuse =
|
|
705
|
+
bool reuse = cgraph->uid != 0 && rpc_dev_ctx->last_graph_uid == cgraph->uid;
|
|
873
706
|
if (reuse) {
|
|
874
707
|
rpc_msg_graph_recompute_req request;
|
|
875
708
|
request.device = rpc_ctx->device;
|
|
@@ -877,7 +710,7 @@ static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, g
|
|
|
877
710
|
bool status = send_rpc_cmd(sock, RPC_CMD_GRAPH_RECOMPUTE, &request, sizeof(request));
|
|
878
711
|
RPC_STATUS_ASSERT(status);
|
|
879
712
|
} else {
|
|
880
|
-
|
|
713
|
+
rpc_dev_ctx->last_graph_uid = cgraph->uid;
|
|
881
714
|
std::vector<uint8_t> input;
|
|
882
715
|
serialize_graph(rpc_ctx->device, cgraph, input);
|
|
883
716
|
auto sock = get_socket(rpc_ctx->endpoint);
|
|
@@ -892,6 +725,8 @@ static ggml_backend_i ggml_backend_rpc_interface = {
|
|
|
892
725
|
/* .free = */ ggml_backend_rpc_free,
|
|
893
726
|
/* .set_tensor_async = */ NULL,
|
|
894
727
|
/* .get_tensor_async = */ NULL,
|
|
728
|
+
/* .set_tensor_2d_async = */ NULL,
|
|
729
|
+
/* .get_tensor_2d_async = */ NULL,
|
|
895
730
|
/* .cpy_tensor_async = */ NULL,
|
|
896
731
|
/* .synchronize = */ ggml_backend_rpc_synchronize,
|
|
897
732
|
/* .graph_plan_create = */ NULL,
|
|
@@ -941,10 +776,9 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, u
|
|
|
941
776
|
ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device) {
|
|
942
777
|
std::string dev_name = "RPC" + std::to_string(device) + "[" + std::string(endpoint) + "]";
|
|
943
778
|
ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
|
|
944
|
-
/* .endpoint
|
|
945
|
-
/* .device
|
|
946
|
-
/* .name
|
|
947
|
-
/* .gc = */ {},
|
|
779
|
+
/* .endpoint = */ endpoint,
|
|
780
|
+
/* .device = */ device,
|
|
781
|
+
/* .name = */ dev_name,
|
|
948
782
|
};
|
|
949
783
|
auto reg = ggml_backend_rpc_add_server(endpoint);
|
|
950
784
|
ggml_backend_t backend = new ggml_backend {
|
|
@@ -1008,8 +842,8 @@ public:
|
|
|
1008
842
|
bool get_device_memory(const rpc_msg_get_device_memory_req & request, rpc_msg_get_device_memory_rsp & response);
|
|
1009
843
|
|
|
1010
844
|
struct stored_graph {
|
|
1011
|
-
|
|
1012
|
-
ggml_cgraph
|
|
845
|
+
std::vector<uint8_t> buffer;
|
|
846
|
+
ggml_cgraph * graph;
|
|
1013
847
|
};
|
|
1014
848
|
|
|
1015
849
|
private:
|
|
@@ -1162,12 +996,18 @@ ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rp
|
|
|
1162
996
|
return nullptr;
|
|
1163
997
|
}
|
|
1164
998
|
|
|
999
|
+
// Fix: Prevent division by zero if blck_size is 0 (e.g., deprecated types)
|
|
1000
|
+
if (ggml_blck_size((enum ggml_type)tensor->type) == 0) {
|
|
1001
|
+
GGML_LOG_ERROR("[%s] invalid tensor type received (blck_size is 0): %u\n", __func__, tensor->type);
|
|
1002
|
+
return nullptr;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1165
1005
|
ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type,
|
|
1166
1006
|
tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
|
1167
1007
|
|
|
1168
1008
|
// ggml_new_tensor_4d might fail if dimensions are invalid, although less likely to crash than invalid type
|
|
1169
1009
|
if (result == nullptr) {
|
|
1170
|
-
GGML_LOG_ERROR("[%s] ggml_new_tensor_4d failed for type %u
|
|
1010
|
+
GGML_LOG_ERROR("[%s] ggml_new_tensor_4d failed for type %u\n", __func__, tensor->type);
|
|
1171
1011
|
return nullptr;
|
|
1172
1012
|
}
|
|
1173
1013
|
|
|
@@ -1245,7 +1085,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
|
|
|
1245
1085
|
fs::path cache_file = fs::path(cache_dir) / hash_str;
|
|
1246
1086
|
std::ofstream ofs(cache_file, std::ios::binary);
|
|
1247
1087
|
ofs.write((const char *)data, size);
|
|
1248
|
-
GGML_LOG_INFO("[%s] saved to '%s'\n", __func__, cache_file.c_str());
|
|
1088
|
+
GGML_LOG_INFO("[%s] saved to '%s'\n", __func__, cache_file.string().c_str());
|
|
1249
1089
|
}
|
|
1250
1090
|
ggml_backend_tensor_set(tensor, data, offset, size);
|
|
1251
1091
|
return true;
|
|
@@ -1333,7 +1173,9 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
|
|
|
1333
1173
|
if (buffer && buffer->iface.init_tensor) {
|
|
1334
1174
|
buffer->iface.init_tensor(buffer, tensor);
|
|
1335
1175
|
} else {
|
|
1336
|
-
|
|
1176
|
+
if (!buffer) {
|
|
1177
|
+
GGML_LOG_ERROR("Tensor with null buffer passed to init_tensor function\n");
|
|
1178
|
+
}
|
|
1337
1179
|
}
|
|
1338
1180
|
|
|
1339
1181
|
if (tensor->extra != nullptr) {
|
|
@@ -1440,6 +1282,10 @@ ggml_tensor * rpc_server::create_node(uint64_t id,
|
|
|
1440
1282
|
if (result == nullptr) {
|
|
1441
1283
|
return nullptr;
|
|
1442
1284
|
}
|
|
1285
|
+
if (result->buffer == nullptr && result->data != nullptr) {
|
|
1286
|
+
GGML_LOG_ERROR("[%s] invalid data ptr", __func__);
|
|
1287
|
+
return nullptr;
|
|
1288
|
+
}
|
|
1443
1289
|
tensor_map[id] = result;
|
|
1444
1290
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
1445
1291
|
// Check if the source ID is 0 before calling create_node recursively
|
|
@@ -1505,10 +1351,12 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input) {
|
|
|
1505
1351
|
LOG_DBG("[%s] device: %u, n_nodes: %u, n_tensors: %u\n", __func__, device, n_nodes, n_tensors);
|
|
1506
1352
|
|
|
1507
1353
|
size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
|
|
1508
|
-
|
|
1354
|
+
if (stored_graphs[device].buffer.size() < buf_size) {
|
|
1355
|
+
stored_graphs[device].buffer.resize(buf_size);
|
|
1356
|
+
}
|
|
1509
1357
|
struct ggml_init_params params = {
|
|
1510
1358
|
/*.mem_size =*/ buf_size,
|
|
1511
|
-
/*.mem_buffer =*/
|
|
1359
|
+
/*.mem_buffer =*/ stored_graphs[device].buffer.data(),
|
|
1512
1360
|
/*.no_alloc =*/ true,
|
|
1513
1361
|
};
|
|
1514
1362
|
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
|
@@ -1538,7 +1386,6 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input) {
|
|
|
1538
1386
|
}
|
|
1539
1387
|
ggml_status status = ggml_backend_graph_compute(backends[device], graph);
|
|
1540
1388
|
GGML_ASSERT(status == GGML_STATUS_SUCCESS && "Unsuccessful graph computations are not supported with RPC");
|
|
1541
|
-
stored_graphs[device].ctx_ptr.swap(ctx_ptr);
|
|
1542
1389
|
stored_graphs[device].graph = graph;
|
|
1543
1390
|
return true;
|
|
1544
1391
|
}
|
|
@@ -1579,27 +1426,46 @@ rpc_server::~rpc_server() {
|
|
|
1579
1426
|
}
|
|
1580
1427
|
|
|
1581
1428
|
static void rpc_serve_client(const std::vector<ggml_backend_t> & backends, const char * cache_dir,
|
|
1582
|
-
|
|
1429
|
+
socket_ptr sock) {
|
|
1583
1430
|
rpc_server server(backends, cache_dir);
|
|
1584
1431
|
uint8_t cmd;
|
|
1585
|
-
if (!recv_data(
|
|
1432
|
+
if (!sock->recv_data(&cmd, 1)) {
|
|
1586
1433
|
return;
|
|
1587
1434
|
}
|
|
1588
|
-
// the first command sent by the client must be HELLO
|
|
1589
1435
|
if (cmd != RPC_CMD_HELLO) {
|
|
1590
1436
|
GGML_LOG_ERROR("Expected HELLO command, update client\n");
|
|
1591
1437
|
return;
|
|
1592
1438
|
}
|
|
1593
|
-
|
|
1439
|
+
|
|
1440
|
+
// Read input_size and validate protocol version
|
|
1441
|
+
uint64_t hello_input_size;
|
|
1442
|
+
if (!sock->recv_data(&hello_input_size, sizeof(hello_input_size))) {
|
|
1443
|
+
return;
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
if (hello_input_size != sizeof(rpc_msg_hello_req)) {
|
|
1447
|
+
GGML_LOG_ERROR("HELLO request size mismatch (%zu vs %zu) — client needs upgrade to protocol v%d.x\n",
|
|
1448
|
+
(size_t)hello_input_size, sizeof(rpc_msg_hello_req), RPC_PROTO_MAJOR_VERSION);
|
|
1449
|
+
return;
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
rpc_msg_hello_req req = {};
|
|
1453
|
+
if (!sock->recv_data(&req, sizeof(req))) {
|
|
1594
1454
|
return;
|
|
1595
1455
|
}
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1456
|
+
|
|
1457
|
+
rpc_msg_hello_rsp rsp = {};
|
|
1458
|
+
server.hello(rsp);
|
|
1459
|
+
// Advertise server transport capabilities based on client's caps
|
|
1460
|
+
sock->get_caps(rsp.conn_caps);
|
|
1461
|
+
if (!send_msg(sock, &rsp, sizeof(rsp))) {
|
|
1599
1462
|
return;
|
|
1600
1463
|
}
|
|
1464
|
+
|
|
1465
|
+
// Activate transport upgrade using client's caps
|
|
1466
|
+
sock->update_caps(req.conn_caps);
|
|
1601
1467
|
while (true) {
|
|
1602
|
-
if (!recv_data(
|
|
1468
|
+
if (!sock->recv_data(&cmd, 1)) {
|
|
1603
1469
|
break;
|
|
1604
1470
|
}
|
|
1605
1471
|
if (cmd >= RPC_CMD_COUNT) {
|
|
@@ -1613,115 +1479,115 @@ static void rpc_serve_client(const std::vector<ggml_backend_t> & backends, const
|
|
|
1613
1479
|
return;
|
|
1614
1480
|
}
|
|
1615
1481
|
case RPC_CMD_DEVICE_COUNT: {
|
|
1616
|
-
if (!recv_msg(
|
|
1482
|
+
if (!recv_msg(sock, nullptr, 0)) {
|
|
1617
1483
|
return;
|
|
1618
1484
|
}
|
|
1619
1485
|
rpc_msg_device_count_rsp response;
|
|
1620
1486
|
response.device_count = backends.size();
|
|
1621
|
-
if (!send_msg(
|
|
1487
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1622
1488
|
return;
|
|
1623
1489
|
}
|
|
1624
1490
|
break;
|
|
1625
1491
|
}
|
|
1626
1492
|
case RPC_CMD_ALLOC_BUFFER: {
|
|
1627
1493
|
rpc_msg_alloc_buffer_req request;
|
|
1628
|
-
if (!recv_msg(
|
|
1494
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1629
1495
|
return;
|
|
1630
1496
|
}
|
|
1631
1497
|
rpc_msg_alloc_buffer_rsp response;
|
|
1632
1498
|
if (!server.alloc_buffer(request, response)) {
|
|
1633
1499
|
return;
|
|
1634
1500
|
}
|
|
1635
|
-
if (!send_msg(
|
|
1501
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1636
1502
|
return;
|
|
1637
1503
|
}
|
|
1638
1504
|
break;
|
|
1639
1505
|
}
|
|
1640
1506
|
case RPC_CMD_GET_ALLOC_SIZE: {
|
|
1641
1507
|
rpc_msg_get_alloc_size_req request;
|
|
1642
|
-
if (!recv_msg(
|
|
1508
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1643
1509
|
return;
|
|
1644
1510
|
}
|
|
1645
1511
|
rpc_msg_get_alloc_size_rsp response;
|
|
1646
1512
|
if (!server.get_alloc_size(request, response)) {
|
|
1647
1513
|
return;
|
|
1648
1514
|
}
|
|
1649
|
-
if (!send_msg(
|
|
1515
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1650
1516
|
return;
|
|
1651
1517
|
}
|
|
1652
1518
|
break;
|
|
1653
1519
|
}
|
|
1654
1520
|
case RPC_CMD_GET_ALIGNMENT: {
|
|
1655
1521
|
rpc_msg_get_alignment_req request;
|
|
1656
|
-
if (!recv_msg(
|
|
1522
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1657
1523
|
return;
|
|
1658
1524
|
}
|
|
1659
1525
|
rpc_msg_get_alignment_rsp response;
|
|
1660
1526
|
if (!server.get_alignment(request, response)) {
|
|
1661
1527
|
return;
|
|
1662
1528
|
}
|
|
1663
|
-
if (!send_msg(
|
|
1529
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1664
1530
|
return;
|
|
1665
1531
|
}
|
|
1666
1532
|
break;
|
|
1667
1533
|
}
|
|
1668
1534
|
case RPC_CMD_GET_MAX_SIZE: {
|
|
1669
1535
|
rpc_msg_get_max_size_req request;
|
|
1670
|
-
if (!recv_msg(
|
|
1536
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1671
1537
|
return;
|
|
1672
1538
|
}
|
|
1673
1539
|
rpc_msg_get_max_size_rsp response;
|
|
1674
1540
|
if (!server.get_max_size(request, response)) {
|
|
1675
1541
|
return;
|
|
1676
1542
|
}
|
|
1677
|
-
if (!send_msg(
|
|
1543
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1678
1544
|
return;
|
|
1679
1545
|
}
|
|
1680
1546
|
break;
|
|
1681
1547
|
}
|
|
1682
1548
|
case RPC_CMD_BUFFER_GET_BASE: {
|
|
1683
1549
|
rpc_msg_buffer_get_base_req request;
|
|
1684
|
-
if (!recv_msg(
|
|
1550
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1685
1551
|
return;
|
|
1686
1552
|
}
|
|
1687
1553
|
rpc_msg_buffer_get_base_rsp response;
|
|
1688
1554
|
if (!server.buffer_get_base(request, response)) {
|
|
1689
1555
|
return;
|
|
1690
1556
|
}
|
|
1691
|
-
if (!send_msg(
|
|
1557
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1692
1558
|
return;
|
|
1693
1559
|
}
|
|
1694
1560
|
break;
|
|
1695
1561
|
}
|
|
1696
1562
|
case RPC_CMD_FREE_BUFFER: {
|
|
1697
1563
|
rpc_msg_free_buffer_req request;
|
|
1698
|
-
if (!recv_msg(
|
|
1564
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1699
1565
|
return;
|
|
1700
1566
|
}
|
|
1701
1567
|
if (!server.free_buffer(request)) {
|
|
1702
1568
|
return;
|
|
1703
1569
|
}
|
|
1704
|
-
if (!send_msg(
|
|
1570
|
+
if (!send_msg(sock, nullptr, 0)) {
|
|
1705
1571
|
return;
|
|
1706
1572
|
}
|
|
1707
1573
|
break;
|
|
1708
1574
|
}
|
|
1709
1575
|
case RPC_CMD_BUFFER_CLEAR: {
|
|
1710
1576
|
rpc_msg_buffer_clear_req request;
|
|
1711
|
-
if (!recv_msg(
|
|
1577
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1712
1578
|
return;
|
|
1713
1579
|
}
|
|
1714
1580
|
if (!server.buffer_clear(request)) {
|
|
1715
1581
|
return;
|
|
1716
1582
|
}
|
|
1717
|
-
if (!send_msg(
|
|
1583
|
+
if (!send_msg(sock, nullptr, 0)) {
|
|
1718
1584
|
return;
|
|
1719
1585
|
}
|
|
1720
1586
|
break;
|
|
1721
1587
|
}
|
|
1722
1588
|
case RPC_CMD_SET_TENSOR: {
|
|
1723
1589
|
std::vector<uint8_t> input;
|
|
1724
|
-
if (!recv_msg(
|
|
1590
|
+
if (!recv_msg(sock, input)) {
|
|
1725
1591
|
return;
|
|
1726
1592
|
}
|
|
1727
1593
|
if (!server.set_tensor(input)) {
|
|
@@ -1731,62 +1597,62 @@ static void rpc_serve_client(const std::vector<ggml_backend_t> & backends, const
|
|
|
1731
1597
|
}
|
|
1732
1598
|
case RPC_CMD_SET_TENSOR_HASH: {
|
|
1733
1599
|
rpc_msg_set_tensor_hash_req request;
|
|
1734
|
-
if (!recv_msg(
|
|
1600
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1735
1601
|
return;
|
|
1736
1602
|
}
|
|
1737
1603
|
rpc_msg_set_tensor_hash_rsp response;
|
|
1738
1604
|
if (!server.set_tensor_hash(request, response)) {
|
|
1739
1605
|
return;
|
|
1740
1606
|
}
|
|
1741
|
-
if (!send_msg(
|
|
1607
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1742
1608
|
return;
|
|
1743
1609
|
}
|
|
1744
1610
|
break;
|
|
1745
1611
|
}
|
|
1746
1612
|
case RPC_CMD_INIT_TENSOR: {
|
|
1747
1613
|
rpc_msg_init_tensor_req request;
|
|
1748
|
-
if (!recv_msg(
|
|
1614
|
+
if (!recv_msg(sock, &request,sizeof(request))) {
|
|
1749
1615
|
return;
|
|
1750
1616
|
}
|
|
1751
1617
|
if (!server.init_tensor(request)) {
|
|
1752
1618
|
return;
|
|
1753
1619
|
}
|
|
1754
|
-
if (!send_msg(
|
|
1620
|
+
if (!send_msg(sock, nullptr, 0)) {
|
|
1755
1621
|
return;
|
|
1756
1622
|
}
|
|
1757
1623
|
break;
|
|
1758
1624
|
}
|
|
1759
1625
|
case RPC_CMD_GET_TENSOR: {
|
|
1760
1626
|
rpc_msg_get_tensor_req request;
|
|
1761
|
-
if (!recv_msg(
|
|
1627
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1762
1628
|
return;
|
|
1763
1629
|
}
|
|
1764
1630
|
std::vector<uint8_t> response;
|
|
1765
1631
|
if (!server.get_tensor(request, response)) {
|
|
1766
1632
|
return;
|
|
1767
1633
|
}
|
|
1768
|
-
if (!send_msg(
|
|
1634
|
+
if (!send_msg(sock, response.data(), response.size())) {
|
|
1769
1635
|
return;
|
|
1770
1636
|
}
|
|
1771
1637
|
break;
|
|
1772
1638
|
}
|
|
1773
1639
|
case RPC_CMD_COPY_TENSOR: {
|
|
1774
1640
|
rpc_msg_copy_tensor_req request;
|
|
1775
|
-
if (!recv_msg(
|
|
1641
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1776
1642
|
return;
|
|
1777
1643
|
}
|
|
1778
1644
|
rpc_msg_copy_tensor_rsp response;
|
|
1779
1645
|
if (!server.copy_tensor(request, response)) {
|
|
1780
1646
|
return;
|
|
1781
1647
|
}
|
|
1782
|
-
if (!send_msg(
|
|
1648
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1783
1649
|
return;
|
|
1784
1650
|
}
|
|
1785
1651
|
break;
|
|
1786
1652
|
}
|
|
1787
1653
|
case RPC_CMD_GRAPH_COMPUTE: {
|
|
1788
1654
|
std::vector<uint8_t> input;
|
|
1789
|
-
if (!recv_msg(
|
|
1655
|
+
if (!recv_msg(sock, input)) {
|
|
1790
1656
|
return;
|
|
1791
1657
|
}
|
|
1792
1658
|
if (!server.graph_compute(input)) {
|
|
@@ -1796,7 +1662,7 @@ static void rpc_serve_client(const std::vector<ggml_backend_t> & backends, const
|
|
|
1796
1662
|
}
|
|
1797
1663
|
case RPC_CMD_GRAPH_RECOMPUTE: {
|
|
1798
1664
|
rpc_msg_graph_recompute_req request;
|
|
1799
|
-
if (!recv_msg(
|
|
1665
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1800
1666
|
return;
|
|
1801
1667
|
}
|
|
1802
1668
|
if (!server.graph_recompute(request)) {
|
|
@@ -1806,14 +1672,14 @@ static void rpc_serve_client(const std::vector<ggml_backend_t> & backends, const
|
|
|
1806
1672
|
}
|
|
1807
1673
|
case RPC_CMD_GET_DEVICE_MEMORY: {
|
|
1808
1674
|
rpc_msg_get_device_memory_req request;
|
|
1809
|
-
if (!recv_msg(
|
|
1675
|
+
if (!recv_msg(sock, &request, sizeof(request))) {
|
|
1810
1676
|
return;
|
|
1811
1677
|
}
|
|
1812
1678
|
rpc_msg_get_device_memory_rsp response;
|
|
1813
1679
|
if (!server.get_device_memory(request, response)) {
|
|
1814
1680
|
return;
|
|
1815
1681
|
}
|
|
1816
|
-
if (!send_msg(
|
|
1682
|
+
if (!send_msg(sock, &response, sizeof(response))) {
|
|
1817
1683
|
return;
|
|
1818
1684
|
}
|
|
1819
1685
|
break;
|
|
@@ -1866,50 +1732,39 @@ void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir
|
|
|
1866
1732
|
if (!parse_endpoint(endpoint, host, port)) {
|
|
1867
1733
|
return;
|
|
1868
1734
|
}
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1735
|
+
|
|
1736
|
+
#ifdef GGML_RPC_RDMA
|
|
1737
|
+
printf(" transport : TCP (RDMA auto-negotiate enabled)\n");
|
|
1738
|
+
#else
|
|
1739
|
+
printf(" transport : TCP\n");
|
|
1740
|
+
#endif // GGML_RPC_RDMA
|
|
1741
|
+
if (!rpc_transport_init()) {
|
|
1742
|
+
fprintf(stderr, "Failed to initialize RPC transport\n");
|
|
1743
|
+
return;
|
|
1877
1744
|
}
|
|
1878
|
-
|
|
1879
|
-
auto server_socket = create_server_socket(host.c_str(), port);
|
|
1745
|
+
auto server_socket = socket_t::create_server(host.c_str(), port);
|
|
1880
1746
|
if (server_socket == nullptr) {
|
|
1881
1747
|
fprintf(stderr, "Failed to create server socket\n");
|
|
1882
1748
|
return;
|
|
1883
1749
|
}
|
|
1884
1750
|
while (true) {
|
|
1885
|
-
auto client_socket =
|
|
1751
|
+
auto client_socket = server_socket->accept();
|
|
1886
1752
|
if (client_socket == nullptr) {
|
|
1887
1753
|
fprintf(stderr, "Failed to accept client connection\n");
|
|
1888
1754
|
return;
|
|
1889
1755
|
}
|
|
1890
1756
|
printf("Accepted client connection\n");
|
|
1891
1757
|
fflush(stdout);
|
|
1892
|
-
rpc_serve_client(backends, cache_dir, client_socket
|
|
1758
|
+
rpc_serve_client(backends, cache_dir, client_socket);
|
|
1893
1759
|
printf("Client connection closed\n");
|
|
1894
1760
|
fflush(stdout);
|
|
1895
1761
|
}
|
|
1896
|
-
|
|
1897
|
-
WSACleanup();
|
|
1898
|
-
#endif
|
|
1762
|
+
rpc_transport_shutdown();
|
|
1899
1763
|
for (auto backend : backends) {
|
|
1900
1764
|
ggml_backend_free(backend);
|
|
1901
1765
|
}
|
|
1902
1766
|
}
|
|
1903
1767
|
|
|
1904
|
-
// device interface
|
|
1905
|
-
|
|
1906
|
-
struct ggml_backend_rpc_device_context {
|
|
1907
|
-
std::string endpoint;
|
|
1908
|
-
uint32_t device;
|
|
1909
|
-
std::string name;
|
|
1910
|
-
std::string description;
|
|
1911
|
-
};
|
|
1912
|
-
|
|
1913
1768
|
static const char * ggml_backend_rpc_device_get_name(ggml_backend_dev_t dev) {
|
|
1914
1769
|
ggml_backend_rpc_device_context * ctx = (ggml_backend_rpc_device_context *)dev->context;
|
|
1915
1770
|
|
|
@@ -2091,10 +1946,11 @@ ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint) {
|
|
|
2091
1946
|
std::string dev_name = "RPC" + std::to_string(dev_id);
|
|
2092
1947
|
std::string dev_desc = std::string(endpoint);
|
|
2093
1948
|
ggml_backend_rpc_device_context * dev_ctx = new ggml_backend_rpc_device_context {
|
|
2094
|
-
/* .endpoint = */
|
|
2095
|
-
/* .device = */
|
|
2096
|
-
/* .name = */
|
|
2097
|
-
/* .description = */
|
|
1949
|
+
/* .endpoint = */ endpoint,
|
|
1950
|
+
/* .device = */ ind,
|
|
1951
|
+
/* .name = */ dev_name,
|
|
1952
|
+
/* .description = */ dev_desc,
|
|
1953
|
+
/* .last_graph_uid = */ 0,
|
|
2098
1954
|
};
|
|
2099
1955
|
|
|
2100
1956
|
ggml_backend_dev_t dev = new ggml_backend_device {
|