whispercpp 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +59 -27
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +154 -35
- data/ext/sources/examples/addon.node/index.js +10 -5
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +29 -18
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +7 -4
- data/ext/sources/examples/command/command.cpp +58 -32
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +21 -17
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +193 -35
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +10 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +756 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +85 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +773 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +126 -55
- data/ext/sources/examples/talk-llama/llama-chat.cpp +150 -13
- data/ext/sources/examples/talk-llama/llama-chat.h +8 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +814 -542
- data/ext/sources/examples/talk-llama/llama-context.h +68 -32
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +787 -440
- data/ext/sources/examples/talk-llama/llama-graph.h +333 -153
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +128 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +80 -17
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +137 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +1248 -1967
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +218 -345
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +164 -52
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +266 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +139 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1154 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +182 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +94 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +44 -17
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +11377 -5248
- data/ext/sources/examples/talk-llama/llama-model.h +87 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +137 -16
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +502 -38
- data/ext/sources/examples/talk-llama/llama-vocab.h +46 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -17
- data/ext/sources/examples/talk-llama/llama.h +176 -151
- data/ext/sources/examples/talk-llama/talk-llama.cpp +11 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +212 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +6 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +106 -33
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +18 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +365 -21
- data/ext/sources/ggml/src/CMakeLists.txt +98 -25
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +35 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +266 -60
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +4 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +15 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +149 -2
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +521 -78
- data/ext/sources/ggml/src/ggml-common.h +21 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +165 -50
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -3
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +3650 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1891 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2160 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1897 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +214 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +18 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +23 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +179 -110
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +44 -33
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +228 -98
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +532 -1124
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +3374 -2081
- data/ext/sources/ggml/src/ggml-cpu/ops.h +13 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1193 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +34 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1982 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +120 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +367 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +3 -3
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +272 -35
- data/ext/sources/ggml/src/ggml-cpu/vec.h +794 -142
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +291 -81
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +117 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +20 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +499 -368
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +142 -93
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +90 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +636 -222
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +73 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +4 -5
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -98
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +23 -19
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +179 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -2
- data/ext/sources/ggml/src/ggml-impl.h +229 -175
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +21 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +163 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3208 -1575
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +32 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4430 -792
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +117 -24
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +85 -62
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +13 -17
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +21 -2
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +116 -211
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +700 -1041
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +20 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +17 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +2 -96
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +393 -250
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -11
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +125 -21
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +4 -3
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +105 -17
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4198 -1145
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +64 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +338 -71
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +802 -142
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +32 -4
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +241 -215
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +57 -2
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +75 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/{tests → test}/test_params.rb +8 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +246 -191
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -6431
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -336
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -5998
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -19,9 +19,8 @@
|
|
19
19
|
#include <stdio.h>
|
20
20
|
#include <stdlib.h>
|
21
21
|
#include <string.h>
|
22
|
-
#include <string>
|
23
|
-
#include <vector>
|
24
22
|
#include <algorithm>
|
23
|
+
#include <vector>
|
25
24
|
|
26
25
|
#ifdef __APPLE__
|
27
26
|
#include <sys/types.h>
|
@@ -32,6 +31,7 @@
|
|
32
31
|
// backend buffer type
|
33
32
|
|
34
33
|
const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
|
34
|
+
GGML_ASSERT(buft);
|
35
35
|
return buft->iface.get_name(buft);
|
36
36
|
}
|
37
37
|
|
@@ -41,14 +41,17 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t
|
|
41
41
|
return ggml_backend_buffer_init(buft, {}, NULL, 0);
|
42
42
|
}
|
43
43
|
|
44
|
+
GGML_ASSERT(buft);
|
44
45
|
return buft->iface.alloc_buffer(buft, size);
|
45
46
|
}
|
46
47
|
|
47
48
|
size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
|
49
|
+
GGML_ASSERT(buft);
|
48
50
|
return buft->iface.get_alignment(buft);
|
49
51
|
}
|
50
52
|
|
51
53
|
size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
|
54
|
+
GGML_ASSERT(buft);
|
52
55
|
// get_max_size is optional, defaults to SIZE_MAX
|
53
56
|
if (buft->iface.get_max_size) {
|
54
57
|
return buft->iface.get_max_size(buft);
|
@@ -57,6 +60,7 @@ size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
57
60
|
}
|
58
61
|
|
59
62
|
size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
|
63
|
+
GGML_ASSERT(buft);
|
60
64
|
// get_alloc_size is optional, defaults to ggml_nbytes
|
61
65
|
if (buft->iface.get_alloc_size) {
|
62
66
|
size_t size = buft->iface.get_alloc_size(buft, tensor);
|
@@ -67,6 +71,7 @@ size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const s
|
|
67
71
|
}
|
68
72
|
|
69
73
|
bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
|
74
|
+
GGML_ASSERT(buft);
|
70
75
|
if (buft->iface.is_host) {
|
71
76
|
return buft->iface.is_host(buft);
|
72
77
|
}
|
@@ -74,6 +79,7 @@ bool ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) {
|
|
74
79
|
}
|
75
80
|
|
76
81
|
ggml_backend_dev_t ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) {
|
82
|
+
GGML_ASSERT(buft);
|
77
83
|
return buft->device;
|
78
84
|
}
|
79
85
|
|
@@ -111,10 +117,12 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
|
111
117
|
}
|
112
118
|
|
113
119
|
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
120
|
+
GGML_ASSERT(buffer);
|
114
121
|
return buffer->size;
|
115
122
|
}
|
116
123
|
|
117
124
|
void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
125
|
+
GGML_ASSERT(buffer);
|
118
126
|
// get_base is optional if the buffer is zero-sized
|
119
127
|
if (buffer->size == 0) {
|
120
128
|
return NULL;
|
@@ -128,6 +136,7 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
128
136
|
}
|
129
137
|
|
130
138
|
enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
139
|
+
GGML_ASSERT(buffer);
|
131
140
|
// init_tensor is optional
|
132
141
|
if (buffer->iface.init_tensor) {
|
133
142
|
return buffer->iface.init_tensor(buffer, tensor);
|
@@ -136,6 +145,7 @@ enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, s
|
|
136
145
|
}
|
137
146
|
|
138
147
|
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
148
|
+
GGML_ASSERT(buffer);
|
139
149
|
// clear is optional if the buffer is zero-sized
|
140
150
|
if (buffer->size == 0) {
|
141
151
|
return;
|
@@ -161,6 +171,7 @@ bool ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) {
|
|
161
171
|
}
|
162
172
|
|
163
173
|
void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
|
174
|
+
GGML_ASSERT(buffer);
|
164
175
|
buffer->usage = usage;
|
165
176
|
|
166
177
|
// FIXME: add a generic callback to the buffer interface
|
@@ -170,14 +181,17 @@ void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backe
|
|
170
181
|
}
|
171
182
|
|
172
183
|
enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage(ggml_backend_buffer_t buffer) {
|
184
|
+
GGML_ASSERT(buffer);
|
173
185
|
return buffer->usage;
|
174
186
|
}
|
175
187
|
|
176
188
|
ggml_backend_buffer_type_t ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) {
|
189
|
+
GGML_ASSERT(buffer);
|
177
190
|
return buffer->buft;
|
178
191
|
}
|
179
192
|
|
180
193
|
void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
|
194
|
+
GGML_ASSERT(buffer);
|
181
195
|
if (buffer->iface.reset) {
|
182
196
|
buffer->iface.reset(buffer);
|
183
197
|
}
|
@@ -216,6 +230,7 @@ void ggml_backend_free(ggml_backend_t backend) {
|
|
216
230
|
}
|
217
231
|
|
218
232
|
ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend) {
|
233
|
+
GGML_ASSERT(backend);
|
219
234
|
return ggml_backend_dev_buffer_type(backend->device);
|
220
235
|
}
|
221
236
|
|
@@ -232,6 +247,8 @@ size_t ggml_backend_get_max_size(ggml_backend_t backend) {
|
|
232
247
|
}
|
233
248
|
|
234
249
|
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
250
|
+
GGML_ASSERT(backend);
|
251
|
+
GGML_ASSERT(tensor);
|
235
252
|
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
236
253
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
237
254
|
|
@@ -243,6 +260,8 @@ void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor *
|
|
243
260
|
}
|
244
261
|
|
245
262
|
void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
263
|
+
GGML_ASSERT(backend);
|
264
|
+
GGML_ASSERT(tensor);
|
246
265
|
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
247
266
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
248
267
|
|
@@ -284,6 +303,7 @@ void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, siz
|
|
284
303
|
}
|
285
304
|
|
286
305
|
void ggml_backend_tensor_memset(struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
306
|
+
GGML_ASSERT(tensor);
|
287
307
|
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
288
308
|
|
289
309
|
if (size == 0) {
|
@@ -299,6 +319,7 @@ void ggml_backend_tensor_memset(struct ggml_tensor * tensor, uint8_t value, size
|
|
299
319
|
}
|
300
320
|
|
301
321
|
void ggml_backend_synchronize(ggml_backend_t backend) {
|
322
|
+
GGML_ASSERT(backend);
|
302
323
|
if (backend->iface.synchronize == NULL) {
|
303
324
|
return;
|
304
325
|
}
|
@@ -307,18 +328,21 @@ void ggml_backend_synchronize(ggml_backend_t backend) {
|
|
307
328
|
}
|
308
329
|
|
309
330
|
ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
331
|
+
GGML_ASSERT(backend);
|
310
332
|
GGML_ASSERT(backend->iface.graph_plan_create != NULL);
|
311
333
|
|
312
334
|
return backend->iface.graph_plan_create(backend, cgraph);
|
313
335
|
}
|
314
336
|
|
315
337
|
void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
338
|
+
GGML_ASSERT(backend);
|
316
339
|
GGML_ASSERT(backend->iface.graph_plan_free != NULL);
|
317
340
|
|
318
341
|
backend->iface.graph_plan_free(backend, plan);
|
319
342
|
}
|
320
343
|
|
321
344
|
enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
345
|
+
GGML_ASSERT(backend);
|
322
346
|
GGML_ASSERT(backend->iface.graph_plan_compute != NULL);
|
323
347
|
|
324
348
|
return backend->iface.graph_plan_compute(backend, plan);
|
@@ -331,42 +355,32 @@ enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_
|
|
331
355
|
}
|
332
356
|
|
333
357
|
enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
358
|
+
GGML_ASSERT(backend);
|
334
359
|
return backend->iface.graph_compute(backend, cgraph);
|
335
360
|
}
|
336
361
|
|
337
362
|
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
363
|
+
GGML_ASSERT(backend);
|
338
364
|
return ggml_backend_dev_supports_op(backend->device, op);
|
339
365
|
}
|
340
366
|
|
341
367
|
bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
368
|
+
GGML_ASSERT(backend);
|
342
369
|
return ggml_backend_dev_supports_buft(backend->device, buft);
|
343
370
|
}
|
344
371
|
|
345
372
|
bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
373
|
+
GGML_ASSERT(backend);
|
346
374
|
return ggml_backend_dev_offload_op(backend->device, op);
|
347
375
|
}
|
348
376
|
|
349
377
|
ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {
|
378
|
+
GGML_ASSERT(backend);
|
350
379
|
return backend->device;
|
351
380
|
}
|
352
381
|
|
353
382
|
// backend copy
|
354
383
|
|
355
|
-
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
356
|
-
if (a->type != b->type) {
|
357
|
-
return false;
|
358
|
-
}
|
359
|
-
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
360
|
-
if (a->ne[i] != b->ne[i]) {
|
361
|
-
return false;
|
362
|
-
}
|
363
|
-
if (a->nb[i] != b->nb[i]) {
|
364
|
-
return false;
|
365
|
-
}
|
366
|
-
}
|
367
|
-
return true;
|
368
|
-
}
|
369
|
-
|
370
384
|
void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
|
371
385
|
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
|
372
386
|
|
@@ -397,6 +411,7 @@ void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t b
|
|
397
411
|
return;
|
398
412
|
}
|
399
413
|
|
414
|
+
GGML_ASSERT(backend_dst);
|
400
415
|
if (backend_dst->iface.cpy_tensor_async != NULL) {
|
401
416
|
if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) {
|
402
417
|
return;
|
@@ -428,38 +443,52 @@ void ggml_backend_event_free(ggml_backend_event_t event) {
|
|
428
443
|
}
|
429
444
|
|
430
445
|
void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend) {
|
446
|
+
GGML_ASSERT(backend);
|
431
447
|
GGML_ASSERT(backend->iface.event_record != NULL);
|
432
448
|
|
433
449
|
backend->iface.event_record(backend, event);
|
434
450
|
}
|
435
451
|
|
436
452
|
void ggml_backend_event_synchronize(ggml_backend_event_t event) {
|
453
|
+
GGML_ASSERT(event);
|
437
454
|
GGML_ASSERT(event->device->iface.event_synchronize);
|
438
455
|
|
439
456
|
event->device->iface.event_synchronize(event->device, event);
|
440
457
|
}
|
441
458
|
|
442
459
|
void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
460
|
+
GGML_ASSERT(backend);
|
443
461
|
GGML_ASSERT(backend->iface.event_wait != NULL);
|
444
462
|
|
445
463
|
backend->iface.event_wait(backend, event);
|
446
464
|
}
|
447
465
|
|
466
|
+
static void ggml_backend_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
467
|
+
GGML_ASSERT(backend);
|
468
|
+
if (backend->iface.graph_optimize != NULL) {
|
469
|
+
backend->iface.graph_optimize(backend, cgraph);
|
470
|
+
}
|
471
|
+
}
|
472
|
+
|
448
473
|
// Backend device
|
449
474
|
|
450
475
|
const char * ggml_backend_dev_name(ggml_backend_dev_t device) {
|
476
|
+
GGML_ASSERT(device);
|
451
477
|
return device->iface.get_name(device);
|
452
478
|
}
|
453
479
|
|
454
480
|
const char * ggml_backend_dev_description(ggml_backend_dev_t device) {
|
481
|
+
GGML_ASSERT(device);
|
455
482
|
return device->iface.get_description(device);
|
456
483
|
}
|
457
484
|
|
458
485
|
void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total) {
|
486
|
+
GGML_ASSERT(device);
|
459
487
|
device->iface.get_memory(device, free, total);
|
460
488
|
}
|
461
489
|
|
462
490
|
enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
|
491
|
+
GGML_ASSERT(device);
|
463
492
|
return device->iface.get_type(device);
|
464
493
|
}
|
465
494
|
|
@@ -469,18 +498,22 @@ void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_d
|
|
469
498
|
}
|
470
499
|
|
471
500
|
ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device) {
|
501
|
+
GGML_ASSERT(device);
|
472
502
|
return device->reg;
|
473
503
|
}
|
474
504
|
|
475
505
|
ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params) {
|
506
|
+
GGML_ASSERT(device);
|
476
507
|
return device->iface.init_backend(device, params);
|
477
508
|
}
|
478
509
|
|
479
510
|
ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device) {
|
511
|
+
GGML_ASSERT(device);
|
480
512
|
return device->iface.get_buffer_type(device);
|
481
513
|
}
|
482
514
|
|
483
515
|
ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) {
|
516
|
+
GGML_ASSERT(device);
|
484
517
|
if (device->iface.get_host_buffer_type == NULL) {
|
485
518
|
return NULL;
|
486
519
|
}
|
@@ -489,18 +522,22 @@ ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t
|
|
489
522
|
}
|
490
523
|
|
491
524
|
ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size) {
|
525
|
+
GGML_ASSERT(device);
|
492
526
|
return device->iface.buffer_from_host_ptr(device, ptr, size, max_tensor_size);
|
493
527
|
}
|
494
528
|
|
495
529
|
bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
|
530
|
+
GGML_ASSERT(device);
|
496
531
|
return device->iface.supports_op(device, op);
|
497
532
|
}
|
498
533
|
|
499
534
|
bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) {
|
535
|
+
GGML_ASSERT(device);
|
500
536
|
return device->iface.supports_buft(device, buft);
|
501
537
|
}
|
502
538
|
|
503
539
|
bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
|
540
|
+
GGML_ASSERT(device);
|
504
541
|
if (device->iface.offload_op != NULL) {
|
505
542
|
return device->iface.offload_op(device, op);
|
506
543
|
}
|
@@ -511,18 +548,22 @@ bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_te
|
|
511
548
|
// Backend (reg)
|
512
549
|
|
513
550
|
const char * ggml_backend_reg_name(ggml_backend_reg_t reg) {
|
551
|
+
GGML_ASSERT(reg);
|
514
552
|
return reg->iface.get_name(reg);
|
515
553
|
}
|
516
554
|
|
517
555
|
size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg) {
|
556
|
+
GGML_ASSERT(reg);
|
518
557
|
return reg->iface.get_device_count(reg);
|
519
558
|
}
|
520
559
|
|
521
560
|
ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index) {
|
561
|
+
GGML_ASSERT(reg);
|
522
562
|
return reg->iface.get_device(reg, index);
|
523
563
|
}
|
524
564
|
|
525
565
|
void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
566
|
+
GGML_ASSERT(reg);
|
526
567
|
if (!reg->iface.get_proc_address) {
|
527
568
|
return NULL;
|
528
569
|
}
|
@@ -537,6 +578,7 @@ struct ggml_backend_multi_buffer_context {
|
|
537
578
|
};
|
538
579
|
|
539
580
|
static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
581
|
+
GGML_ASSERT(buffer);
|
540
582
|
ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context;
|
541
583
|
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
542
584
|
ggml_backend_buffer_free(ctx->buffers[i]);
|
@@ -547,6 +589,7 @@ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
|
547
589
|
}
|
548
590
|
|
549
591
|
static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
592
|
+
GGML_ASSERT(buffer);
|
550
593
|
ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context;
|
551
594
|
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
552
595
|
ggml_backend_buffer_clear(ctx->buffers[i], value);
|
@@ -582,10 +625,12 @@ ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer
|
|
582
625
|
}
|
583
626
|
|
584
627
|
bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer) {
|
628
|
+
GGML_ASSERT(buffer);
|
585
629
|
return buffer->iface.free_buffer == ggml_backend_multi_buffer_free_buffer;
|
586
630
|
}
|
587
631
|
|
588
632
|
void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
|
633
|
+
GGML_ASSERT(buffer);
|
589
634
|
GGML_ASSERT(ggml_backend_buffer_is_multi_buffer(buffer));
|
590
635
|
ggml_backend_multi_buffer_context * ctx = (ggml_backend_multi_buffer_context *) buffer->context;
|
591
636
|
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
@@ -613,7 +658,7 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
|
613
658
|
#endif
|
614
659
|
|
615
660
|
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
|
616
|
-
#define GGML_SCHED_MAX_SPLIT_INPUTS
|
661
|
+
#define GGML_SCHED_MAX_SPLIT_INPUTS 30
|
617
662
|
#endif
|
618
663
|
|
619
664
|
#ifndef GGML_SCHED_MAX_COPIES
|
@@ -662,6 +707,7 @@ struct ggml_backend_sched {
|
|
662
707
|
// pipeline parallelism support
|
663
708
|
int n_copies;
|
664
709
|
int cur_copy;
|
710
|
+
int next_copy;
|
665
711
|
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
666
712
|
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
667
713
|
int n_graph_inputs;
|
@@ -817,8 +863,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
|
817
863
|
}
|
818
864
|
if (sched->debug > 1) {
|
819
865
|
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
820
|
-
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
|
821
|
-
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)
|
866
|
+
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name,
|
867
|
+
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
|
868
|
+
graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]);
|
822
869
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
823
870
|
struct ggml_tensor * src = node->src[j];
|
824
871
|
if (src == NULL) {
|
@@ -862,7 +909,7 @@ static void ggml_backend_sched_set_if_supported(ggml_backend_sched_t sched, stru
|
|
862
909
|
}
|
863
910
|
|
864
911
|
// assigns backends to ops and splits the graph into subgraphs that can be computed on the same backend
|
865
|
-
|
912
|
+
void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
866
913
|
// reset splits
|
867
914
|
sched->n_splits = 0;
|
868
915
|
sched->n_graph_inputs = 0;
|
@@ -1084,6 +1131,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
1084
1131
|
}
|
1085
1132
|
}
|
1086
1133
|
}
|
1134
|
+
// if the node is still unassigned, assign it to the first backend that supports it
|
1135
|
+
for (int b = 0; b < sched->n_backends && *cur_backend_id == -1; b++) {
|
1136
|
+
ggml_backend_sched_set_if_supported(sched, node, b, cur_backend_id);
|
1137
|
+
}
|
1138
|
+
GGML_ASSERT(*cur_backend_id != -1);
|
1087
1139
|
}
|
1088
1140
|
|
1089
1141
|
// pass 5: split graph, find tensors that need to be copied
|
@@ -1111,7 +1163,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
1111
1163
|
|
1112
1164
|
const int node_backend_id = tensor_backend_id(node);
|
1113
1165
|
|
1114
|
-
|
1166
|
+
GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
|
1115
1167
|
|
1116
1168
|
// check if we should start a new split based on the sources of the current node
|
1117
1169
|
bool need_new_split = false;
|
@@ -1169,7 +1221,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
1169
1221
|
|
1170
1222
|
size_t src_id = hash_id(src);
|
1171
1223
|
const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
|
1172
|
-
|
1224
|
+
GGML_ASSERT(src_backend_id != -1); // all inputs should be assigned by now
|
1173
1225
|
|
1174
1226
|
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
|
1175
1227
|
if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
|
@@ -1253,6 +1305,10 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
1253
1305
|
struct ggml_backend_sched_split * split = &sched->splits[i];
|
1254
1306
|
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
|
1255
1307
|
|
1308
|
+
// Optimize this split of the graph. This needs to happen before we make graph_copy,
|
1309
|
+
// so they are in sync.
|
1310
|
+
ggml_backend_graph_optimize(sched->backends[split->backend_id], &split->graph);
|
1311
|
+
|
1256
1312
|
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
|
1257
1313
|
for (int j = 0; j < split->n_inputs; j++) {
|
1258
1314
|
assert(graph_copy->size > (graph_copy->n_nodes + 1));
|
@@ -1340,7 +1396,10 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|
1340
1396
|
// allocate graph
|
1341
1397
|
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
1342
1398
|
// the re-allocation may cause the split inputs to be moved to a different address
|
1343
|
-
ggml_backend_sched_synchronize
|
1399
|
+
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
|
1400
|
+
for (int i = 0; i < sched->n_backends; i++) {
|
1401
|
+
ggml_backend_synchronize(sched->backends[i]);
|
1402
|
+
}
|
1344
1403
|
#ifndef NDEBUG
|
1345
1404
|
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
1346
1405
|
#endif
|
@@ -1355,17 +1414,22 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|
1355
1414
|
}
|
1356
1415
|
|
1357
1416
|
static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
1417
|
+
GGML_ASSERT(sched);
|
1358
1418
|
struct ggml_backend_sched_split * splits = sched->splits;
|
1359
1419
|
|
1360
|
-
|
1361
|
-
|
1420
|
+
ggml_tensor * prev_ids_tensor = nullptr;
|
1421
|
+
std::vector<int32_t> ids;
|
1422
|
+
std::vector<ggml_bitset_t> used_ids;
|
1423
|
+
|
1424
|
+
for (int split_id = 0; split_id < sched->n_splits; split_id++) {
|
1425
|
+
struct ggml_backend_sched_split * split = &splits[split_id];
|
1362
1426
|
int split_backend_id = split->backend_id;
|
1363
1427
|
ggml_backend_t split_backend = sched->backends[split_backend_id];
|
1364
1428
|
|
1365
1429
|
// copy the input tensors to the split backend
|
1366
|
-
for (int
|
1367
|
-
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[
|
1368
|
-
struct ggml_tensor * input = split->inputs[
|
1430
|
+
for (int input_id = 0; input_id < split->n_inputs; input_id++) {
|
1431
|
+
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[input_id]);
|
1432
|
+
struct ggml_tensor * input = split->inputs[input_id];
|
1369
1433
|
struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy);
|
1370
1434
|
|
1371
1435
|
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
|
@@ -1383,16 +1447,104 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|
1383
1447
|
} else {
|
1384
1448
|
ggml_backend_synchronize(split_backend);
|
1385
1449
|
}
|
1386
|
-
|
1387
|
-
//
|
1388
|
-
|
1450
|
+
|
1451
|
+
// when offloading MoE weights, we can reduce the amount of data copied by copying only the experts that are used
|
1452
|
+
ggml_tensor * node = split->graph.nodes[0];
|
1453
|
+
if (split->graph.n_nodes > 0 &&
|
1454
|
+
ggml_backend_buffer_get_usage(input->buffer) == GGML_BACKEND_BUFFER_USAGE_WEIGHTS &&
|
1455
|
+
ggml_backend_buffer_is_host(input->buffer) && (
|
1456
|
+
(node->src[0] == input_cpy && node->op == GGML_OP_MUL_MAT_ID)
|
1457
|
+
//|| (node->src[1] == input_cpy && node->op == GGML_OP_ADD_ID) /* GGML_OP_ADD_ID weights are small and not worth splitting */
|
1458
|
+
)) {
|
1459
|
+
|
1460
|
+
const int64_t n_expert = node->op == GGML_OP_MUL_MAT_ID ? input->ne[2] : input->ne[1];
|
1461
|
+
const size_t expert_size = node->op == GGML_OP_MUL_MAT_ID ? input->nb[2] : input->nb[1];
|
1462
|
+
|
1389
1463
|
ggml_backend_synchronize(input_backend);
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1464
|
+
|
1465
|
+
// get the ids
|
1466
|
+
ggml_tensor * ids_tensor = node->src[2];
|
1467
|
+
ggml_backend_t ids_backend = split_backend;
|
1468
|
+
|
1469
|
+
// if the ids tensor is also an input of the split, it may not have been copied yet to the split backend
|
1470
|
+
// in that case, we use the original ids tensor
|
1471
|
+
for (int i = input_id + 1; i < split->n_inputs; i++) {
|
1472
|
+
if (ids_tensor == tensor_copy(split->inputs[i], split_backend_id, sched->cur_copy)) {
|
1473
|
+
ids_tensor = split->inputs[i];
|
1474
|
+
ids_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[i]);
|
1475
|
+
break;
|
1476
|
+
}
|
1477
|
+
}
|
1478
|
+
|
1479
|
+
if (ids_tensor != prev_ids_tensor) {
|
1480
|
+
ids.resize(ggml_nbytes(ids_tensor) / sizeof(int32_t));
|
1481
|
+
ggml_backend_tensor_get_async(ids_backend, ids_tensor, ids.data(), 0, ggml_nbytes(ids_tensor));
|
1482
|
+
ggml_backend_synchronize(ids_backend);
|
1483
|
+
|
1484
|
+
// find the used experts
|
1485
|
+
used_ids.clear();
|
1486
|
+
used_ids.resize(ggml_bitset_size(n_expert));
|
1487
|
+
for (int64_t i1 = 0; i1 < ids_tensor->ne[1]; i1++) {
|
1488
|
+
for (int64_t i0 = 0; i0 < ids_tensor->ne[0]; i0++) {
|
1489
|
+
int32_t id = ids[i1 * ids_tensor->nb[1]/sizeof(int32_t) + i0 * ids_tensor->nb[0]/sizeof(int32_t)];
|
1490
|
+
GGML_ASSERT(id >= 0 && id < n_expert);
|
1491
|
+
ggml_bitset_set(used_ids.data(), id);
|
1492
|
+
}
|
1493
|
+
}
|
1494
|
+
|
1495
|
+
prev_ids_tensor = ids_tensor;
|
1496
|
+
}
|
1497
|
+
|
1498
|
+
// group consecutive experts and copy them together
|
1499
|
+
auto copy_experts = [&](int32_t first_id, int32_t last_id) {
|
1500
|
+
const size_t expert_offset = first_id * expert_size;
|
1501
|
+
const size_t expert_size_copy = (last_id - first_id + 1) * expert_size;
|
1502
|
+
const size_t padding = std::min<size_t>(expert_size, 512);
|
1503
|
+
const size_t padding_end = last_id < n_expert - 1 ? padding : 0;
|
1504
|
+
|
1505
|
+
ggml_backend_tensor_set_async(split_backend,
|
1506
|
+
input_cpy,
|
1507
|
+
(const uint8_t *)input->data + expert_offset, expert_offset,
|
1508
|
+
// copy a bit extra at the to ensure there are no NaNs in the padding of the last expert
|
1509
|
+
// this is necessary for MMQ in the CUDA backend
|
1510
|
+
expert_size_copy + padding_end);
|
1511
|
+
};
|
1512
|
+
|
1513
|
+
int id = 0;
|
1514
|
+
while (!ggml_bitset_get(used_ids.data(), id)) {
|
1515
|
+
id++;
|
1516
|
+
}
|
1517
|
+
int32_t first_id = id;
|
1518
|
+
int32_t last_id = first_id;
|
1519
|
+
|
1520
|
+
for (++id; id < n_expert; ++id) {
|
1521
|
+
if (!ggml_bitset_get(used_ids.data(), id)) {
|
1522
|
+
continue;
|
1523
|
+
}
|
1524
|
+
|
1525
|
+
if (id == last_id + 1) {
|
1526
|
+
last_id = id;
|
1527
|
+
continue;
|
1528
|
+
}
|
1529
|
+
|
1530
|
+
copy_experts(first_id, last_id);
|
1531
|
+
|
1532
|
+
first_id = id;
|
1533
|
+
last_id = id;
|
1534
|
+
}
|
1535
|
+
copy_experts(first_id, last_id);
|
1536
|
+
} else {
|
1537
|
+
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
|
1538
|
+
// TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
|
1539
|
+
if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
|
1540
|
+
ggml_backend_synchronize(input_backend);
|
1541
|
+
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
1542
|
+
ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
|
1543
|
+
} else {
|
1544
|
+
ggml_backend_synchronize(split_backend);
|
1545
|
+
}
|
1546
|
+
ggml_backend_tensor_copy(input, input_cpy);
|
1394
1547
|
}
|
1395
|
-
ggml_backend_tensor_copy(input, input_cpy);
|
1396
1548
|
}
|
1397
1549
|
}
|
1398
1550
|
}
|
@@ -1444,8 +1596,6 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
|
1444
1596
|
}
|
1445
1597
|
}
|
1446
1598
|
|
1447
|
-
sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
|
1448
|
-
|
1449
1599
|
return GGML_STATUS_SUCCESS;
|
1450
1600
|
}
|
1451
1601
|
|
@@ -1533,6 +1683,7 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
|
1533
1683
|
}
|
1534
1684
|
|
1535
1685
|
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
1686
|
+
GGML_ASSERT(sched);
|
1536
1687
|
// reset state for the next run
|
1537
1688
|
if (!sched->is_reset) {
|
1538
1689
|
ggml_hash_set_reset(&sched->hash_set);
|
@@ -1544,12 +1695,15 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
|
1544
1695
|
}
|
1545
1696
|
|
1546
1697
|
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
1698
|
+
GGML_ASSERT(sched);
|
1547
1699
|
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
1548
1700
|
|
1549
|
-
|
1701
|
+
ggml_backend_sched_reset(sched);
|
1550
1702
|
|
1551
1703
|
ggml_backend_sched_synchronize(sched);
|
1552
1704
|
|
1705
|
+
ggml_backend_sched_split_graph(sched, measure_graph);
|
1706
|
+
|
1553
1707
|
if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
1554
1708
|
return false;
|
1555
1709
|
}
|
@@ -1560,10 +1714,14 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
|
1560
1714
|
}
|
1561
1715
|
|
1562
1716
|
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
1717
|
+
GGML_ASSERT(sched);
|
1563
1718
|
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
1719
|
+
GGML_ASSERT(!sched->is_alloc);
|
1564
1720
|
|
1565
|
-
|
1721
|
+
sched->cur_copy = sched->next_copy;
|
1722
|
+
sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
|
1566
1723
|
|
1724
|
+
ggml_backend_sched_split_graph(sched, graph);
|
1567
1725
|
|
1568
1726
|
if (!ggml_backend_sched_alloc_splits(sched)) {
|
1569
1727
|
return false;
|
@@ -1581,6 +1739,7 @@ enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, st
|
|
1581
1739
|
}
|
1582
1740
|
|
1583
1741
|
enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
1742
|
+
GGML_ASSERT(sched);
|
1584
1743
|
if (!sched->is_reset && !sched->is_alloc) {
|
1585
1744
|
ggml_backend_sched_reset(sched);
|
1586
1745
|
}
|
@@ -1595,37 +1754,55 @@ enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sch
|
|
1595
1754
|
}
|
1596
1755
|
|
1597
1756
|
void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
1757
|
+
GGML_ASSERT(sched);
|
1598
1758
|
for (int i = 0; i < sched->n_backends; i++) {
|
1599
1759
|
ggml_backend_synchronize(sched->backends[i]);
|
1600
1760
|
}
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1761
|
+
if (!sched->is_alloc) {
|
1762
|
+
// if the graph is not already allocated, always use copy 0 after a synchronization
|
1763
|
+
// this ensures that during generation the same copy is used every time,
|
1764
|
+
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
|
1765
|
+
sched->next_copy = 0;
|
1766
|
+
}
|
1604
1767
|
}
|
1605
1768
|
|
1606
1769
|
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
1770
|
+
GGML_ASSERT(sched);
|
1607
1771
|
sched->callback_eval = callback;
|
1608
1772
|
sched->callback_eval_user_data = user_data;
|
1609
1773
|
}
|
1610
1774
|
|
1611
1775
|
int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
|
1776
|
+
GGML_ASSERT(sched);
|
1612
1777
|
return sched->n_splits;
|
1613
1778
|
}
|
1614
1779
|
|
1615
1780
|
int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
|
1781
|
+
GGML_ASSERT(sched);
|
1616
1782
|
return sched->n_copies;
|
1617
1783
|
}
|
1618
1784
|
|
1619
1785
|
int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) {
|
1786
|
+
GGML_ASSERT(sched);
|
1620
1787
|
return sched->n_backends;
|
1621
1788
|
}
|
1622
1789
|
|
1623
1790
|
ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) {
|
1791
|
+
GGML_ASSERT(sched);
|
1624
1792
|
GGML_ASSERT(i >= 0 && i < sched->n_backends);
|
1625
1793
|
return sched->backends[i];
|
1626
1794
|
}
|
1627
1795
|
|
1796
|
+
ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
1797
|
+
GGML_ASSERT(sched);
|
1798
|
+
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
1799
|
+
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
1800
|
+
|
1801
|
+
return sched->bufts[backend_index];
|
1802
|
+
}
|
1803
|
+
|
1628
1804
|
size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
1805
|
+
GGML_ASSERT(sched);
|
1629
1806
|
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
1630
1807
|
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
1631
1808
|
|
@@ -1633,6 +1810,7 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
|
|
1633
1810
|
}
|
1634
1811
|
|
1635
1812
|
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
|
1813
|
+
GGML_ASSERT(sched);
|
1636
1814
|
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
1637
1815
|
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
1638
1816
|
tensor_backend_id(node) = backend_index;
|
@@ -1641,6 +1819,7 @@ void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct gg
|
|
1641
1819
|
}
|
1642
1820
|
|
1643
1821
|
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
1822
|
+
GGML_ASSERT(sched);
|
1644
1823
|
int backend_index = tensor_backend_id(node);
|
1645
1824
|
if (backend_index == -1) {
|
1646
1825
|
return NULL;
|
@@ -1651,6 +1830,7 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
|
|
1651
1830
|
// utils
|
1652
1831
|
|
1653
1832
|
enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
|
1833
|
+
GGML_ASSERT(tensor);
|
1654
1834
|
GGML_ASSERT(tensor->buffer == NULL);
|
1655
1835
|
GGML_ASSERT(tensor->view_src != NULL);
|
1656
1836
|
GGML_ASSERT(tensor->view_src->buffer != NULL);
|
@@ -1662,6 +1842,7 @@ enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
|
|
1662
1842
|
}
|
1663
1843
|
|
1664
1844
|
enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
|
1845
|
+
GGML_ASSERT(tensor);
|
1665
1846
|
GGML_ASSERT(tensor->buffer == NULL);
|
1666
1847
|
GGML_ASSERT(tensor->data == NULL);
|
1667
1848
|
GGML_ASSERT(tensor->view_src == NULL);
|
@@ -1735,6 +1916,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_
|
|
1735
1916
|
}
|
1736
1917
|
|
1737
1918
|
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
|
1919
|
+
GGML_ASSERT(graph);
|
1738
1920
|
struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size);
|
1739
1921
|
struct ggml_tensor ** node_copies = (ggml_tensor **) calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
|
1740
1922
|
bool * node_init = (bool *) calloc(hash_set.size, sizeof(node_init[0]));
|
@@ -1821,7 +2003,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) {
|
|
1821
2003
|
ggml_free(copy.ctx_unallocated);
|
1822
2004
|
}
|
1823
2005
|
|
1824
|
-
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) {
|
2006
|
+
bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) {
|
1825
2007
|
struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph);
|
1826
2008
|
if (copy.buffer == NULL) {
|
1827
2009
|
return false;
|
@@ -1832,28 +2014,45 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
1832
2014
|
|
1833
2015
|
assert(g1->n_nodes == g2->n_nodes);
|
1834
2016
|
|
1835
|
-
|
1836
|
-
|
1837
|
-
|
2017
|
+
if (test_node != nullptr) {
|
2018
|
+
// Compute the whole graph and only test the output for a specific tensor
|
2019
|
+
ggml_backend_graph_compute(backend1, g1);
|
2020
|
+
ggml_backend_graph_compute(backend2, g2);
|
1838
2021
|
|
1839
|
-
|
2022
|
+
int test_node_idx = -1;
|
2023
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
2024
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
2025
|
+
if (t1 == test_node) {
|
2026
|
+
test_node_idx = i;
|
2027
|
+
break;
|
2028
|
+
}
|
2029
|
+
}
|
2030
|
+
GGML_ASSERT(test_node_idx != -1);
|
1840
2031
|
|
1841
|
-
|
1842
|
-
|
2032
|
+
callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
|
2033
|
+
} else {
|
2034
|
+
for (int i = 0; i < g1->n_nodes; i++) {
|
2035
|
+
struct ggml_tensor * t1 = g1->nodes[i];
|
2036
|
+
struct ggml_tensor * t2 = g2->nodes[i];
|
1843
2037
|
|
1844
|
-
|
1845
|
-
ggml_backend_graph_compute(backend2, &g2v);
|
2038
|
+
assert(t1->op == t2->op && ggml_are_same_layout(t1, t2));
|
1846
2039
|
|
1847
|
-
|
1848
|
-
|
1849
|
-
}
|
2040
|
+
struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1);
|
2041
|
+
struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1);
|
1850
2042
|
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
2043
|
+
ggml_backend_graph_compute(backend1, &g1v);
|
2044
|
+
ggml_backend_graph_compute(backend2, &g2v);
|
2045
|
+
|
2046
|
+
if (ggml_is_view_op(t1->op)) {
|
2047
|
+
continue;
|
2048
|
+
}
|
2049
|
+
|
2050
|
+
// compare results, calculate rms etc
|
2051
|
+
if (!callback(i, t1, t2, user_data)) {
|
2052
|
+
break;
|
2053
|
+
}
|
1854
2054
|
}
|
1855
2055
|
}
|
1856
|
-
|
1857
2056
|
ggml_backend_graph_copy_free(copy);
|
1858
2057
|
|
1859
2058
|
return true;
|
@@ -1862,6 +2061,7 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t
|
|
1862
2061
|
// CPU backend - buffer
|
1863
2062
|
|
1864
2063
|
static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
2064
|
+
GGML_ASSERT(buffer);
|
1865
2065
|
uintptr_t data = (uintptr_t)buffer->context;
|
1866
2066
|
|
1867
2067
|
// align the buffer
|
@@ -1873,28 +2073,33 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
1873
2073
|
}
|
1874
2074
|
|
1875
2075
|
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
2076
|
+
GGML_ASSERT(buffer);
|
1876
2077
|
ggml_aligned_free(buffer->context, buffer->size);
|
1877
2078
|
}
|
1878
2079
|
|
1879
2080
|
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
2081
|
+
GGML_ASSERT(tensor);
|
1880
2082
|
memset((char *)tensor->data + offset, value, size);
|
1881
2083
|
|
1882
2084
|
GGML_UNUSED(buffer);
|
1883
2085
|
}
|
1884
2086
|
|
1885
2087
|
static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
2088
|
+
GGML_ASSERT(tensor);
|
1886
2089
|
memcpy((char *)tensor->data + offset, data, size);
|
1887
2090
|
|
1888
2091
|
GGML_UNUSED(buffer);
|
1889
2092
|
}
|
1890
2093
|
|
1891
2094
|
static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
2095
|
+
GGML_ASSERT(tensor);
|
1892
2096
|
memcpy(data, (const char *)tensor->data + offset, size);
|
1893
2097
|
|
1894
2098
|
GGML_UNUSED(buffer);
|
1895
2099
|
}
|
1896
2100
|
|
1897
2101
|
static bool ggml_backend_cpu_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
2102
|
+
GGML_ASSERT(src);
|
1898
2103
|
if (ggml_backend_buffer_is_host(src->buffer)) {
|
1899
2104
|
memcpy(dst->data, src->data, ggml_nbytes(src));
|
1900
2105
|
return true;
|
@@ -1905,6 +2110,7 @@ static bool ggml_backend_cpu_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
|
|
1905
2110
|
}
|
1906
2111
|
|
1907
2112
|
static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
2113
|
+
GGML_ASSERT(buffer);
|
1908
2114
|
memset(buffer->context, value, buffer->size);
|
1909
2115
|
}
|
1910
2116
|
|