whispercpp 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +59 -27
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/build-xcframework.sh +24 -0
- data/ext/sources/examples/CMakeLists.txt +1 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +154 -35
- data/ext/sources/examples/addon.node/index.js +10 -5
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +29 -18
- data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
- data/ext/sources/examples/cli/cli.cpp +7 -4
- data/ext/sources/examples/command/command.cpp +58 -32
- data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +21 -17
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +193 -35
- data/ext/sources/examples/server.py +6 -1
- data/ext/sources/examples/stream/stream.cpp +10 -2
- data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
- data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
- data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +756 -15
- data/ext/sources/examples/talk-llama/llama-arch.h +85 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +773 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +126 -55
- data/ext/sources/examples/talk-llama/llama-chat.cpp +150 -13
- data/ext/sources/examples/talk-llama/llama-chat.h +8 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +814 -542
- data/ext/sources/examples/talk-llama/llama-context.h +68 -32
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
- data/ext/sources/examples/talk-llama/llama-graph.cpp +787 -440
- data/ext/sources/examples/talk-llama/llama-graph.h +333 -153
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +128 -6
- data/ext/sources/examples/talk-llama/llama-hparams.h +80 -17
- data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +137 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +1248 -1967
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +218 -345
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +164 -52
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +266 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +139 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1154 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +182 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +94 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +44 -17
- data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +11377 -5248
- data/ext/sources/examples/talk-llama/llama-model.h +87 -9
- data/ext/sources/examples/talk-llama/llama-quant.cpp +137 -16
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +502 -38
- data/ext/sources/examples/talk-llama/llama-vocab.h +46 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -17
- data/ext/sources/examples/talk-llama/llama.h +176 -151
- data/ext/sources/examples/talk-llama/talk-llama.cpp +11 -6
- data/ext/sources/examples/talk-llama/unicode.cpp +212 -0
- data/ext/sources/examples/talk-llama/unicode.h +45 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +6 -2
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
- data/ext/sources/ggml/CMakeLists.txt +106 -33
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
- data/ext/sources/ggml/include/ggml-backend.h +18 -2
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml-metal.h +1 -6
- data/ext/sources/ggml/include/ggml-opt.h +25 -6
- data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
- data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
- data/ext/sources/ggml/include/ggml.h +365 -21
- data/ext/sources/ggml/src/CMakeLists.txt +98 -25
- data/ext/sources/ggml/src/ggml-alloc.c +265 -141
- data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +35 -13
- data/ext/sources/ggml/src/ggml-backend.cpp +266 -60
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +4 -4
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +15 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
- data/ext/sources/ggml/src/ggml-cann/common.h +149 -2
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +521 -78
- data/ext/sources/ggml/src/ggml-common.h +21 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +165 -50
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -3
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +3650 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1891 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2160 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1897 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +214 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +18 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +23 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +179 -110
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +44 -33
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +228 -98
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +532 -1124
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +3374 -2081
- data/ext/sources/ggml/src/ggml-cpu/ops.h +13 -8
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1193 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +34 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1982 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +120 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +367 -46
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +3 -3
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +272 -35
- data/ext/sources/ggml/src/ggml-cpu/vec.h +794 -142
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
- data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
- data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +291 -81
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +117 -22
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +20 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +499 -368
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +142 -93
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +90 -50
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +636 -222
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +73 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +4 -5
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
- data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -98
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +23 -19
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +179 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -2
- data/ext/sources/ggml/src/ggml-impl.h +229 -175
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +21 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +163 -63
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3208 -1575
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +32 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4430 -792
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
- data/ext/sources/ggml/src/ggml-quants.c +117 -24
- data/ext/sources/ggml/src/ggml-quants.h +6 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +85 -62
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +13 -17
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +21 -2
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +116 -211
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +700 -1041
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +20 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +17 -26
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +2 -96
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +393 -250
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -11
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +125 -21
- data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
- data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +4 -3
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +105 -17
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4198 -1145
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +64 -11
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +338 -71
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
- data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
- data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
- data/ext/sources/ggml/src/ggml.c +802 -142
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +32 -4
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +241 -215
- data/ext/sources/tests/CMakeLists.txt +8 -1
- data/ext/sources/tests/test-vad-full.cpp +3 -3
- data/ext/sources/tests/test-vad.cpp +2 -2
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +57 -2
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +75 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/{tests → test}/test_params.rb +8 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +246 -191
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/ggml/include/ggml-kompute.h +0 -50
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
- data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -6431
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -336
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -5998
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -3,11 +3,11 @@
|
|
3
3
|
|
4
4
|
#include "ggml-backend-impl.h"
|
5
5
|
#include "ggml-backend.h"
|
6
|
-
#include "
|
6
|
+
#include "traits.h"
|
7
7
|
#include "ggml-cpu-impl.h"
|
8
8
|
#include "ggml-cpu.h"
|
9
9
|
#include "ggml-impl.h"
|
10
|
-
#include "
|
10
|
+
#include "quants.h"
|
11
11
|
#include "ggml-threading.h"
|
12
12
|
#include "unary-ops.h"
|
13
13
|
#include "binary-ops.h"
|
@@ -72,15 +72,13 @@
|
|
72
72
|
#define UNUSED GGML_UNUSED
|
73
73
|
#define SWAP(x, y, T) do { T SWAP = x; (x) = y; (y) = SWAP; } while (0)
|
74
74
|
|
75
|
+
// precomputed f32 table for f16 (256 KB) (simd-mappings.h)
|
76
|
+
float ggml_table_f32_f16[1 << 16];
|
77
|
+
|
75
78
|
#if defined(__ARM_ARCH)
|
76
79
|
struct ggml_arm_arch_features_type {
|
77
|
-
int has_neon;
|
78
|
-
int has_dotprod;
|
79
|
-
int has_i8mm;
|
80
|
-
int has_sve;
|
81
80
|
int sve_cnt;
|
82
|
-
|
83
|
-
} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
|
81
|
+
} ggml_arm_arch_features = { 0 };
|
84
82
|
#endif
|
85
83
|
|
86
84
|
|
@@ -197,6 +195,7 @@ typedef pthread_t ggml_thread_t;
|
|
197
195
|
|
198
196
|
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
199
197
|
[GGML_TYPE_F32] = {
|
198
|
+
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
|
200
199
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
201
200
|
.vec_dot_type = GGML_TYPE_F32,
|
202
201
|
.nrows = 1,
|
@@ -254,6 +253,12 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
254
253
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
255
254
|
.nrows = 1,
|
256
255
|
},
|
256
|
+
[GGML_TYPE_MXFP4] = {
|
257
|
+
.from_float = quantize_row_mxfp4,
|
258
|
+
.vec_dot = ggml_vec_dot_mxfp4_q8_0,
|
259
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
260
|
+
.nrows = 1,
|
261
|
+
},
|
257
262
|
[GGML_TYPE_Q2_K] = {
|
258
263
|
.from_float = quantize_row_q2_K,
|
259
264
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
@@ -270,7 +275,11 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
270
275
|
.from_float = quantize_row_q4_K,
|
271
276
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
272
277
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
278
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
279
|
+
.nrows = 2,
|
280
|
+
#else
|
273
281
|
.nrows = 1,
|
282
|
+
#endif
|
274
283
|
},
|
275
284
|
[GGML_TYPE_Q5_K] = {
|
276
285
|
.from_float = quantize_row_q5_K,
|
@@ -364,6 +373,9 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
364
373
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
365
374
|
.nrows = 1,
|
366
375
|
},
|
376
|
+
[GGML_TYPE_I32] = {
|
377
|
+
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_i32,
|
378
|
+
},
|
367
379
|
};
|
368
380
|
|
369
381
|
const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type) {
|
@@ -461,10 +473,10 @@ struct ggml_threadpool {
|
|
461
473
|
struct ggml_compute_state {
|
462
474
|
#ifndef GGML_USE_OPENMP
|
463
475
|
ggml_thread_t thrd;
|
464
|
-
bool cpumask[GGML_MAX_N_THREADS];
|
465
476
|
int last_graph;
|
466
477
|
bool pending;
|
467
478
|
#endif
|
479
|
+
bool cpumask[GGML_MAX_N_THREADS];
|
468
480
|
struct ggml_threadpool * threadpool;
|
469
481
|
int ith;
|
470
482
|
};
|
@@ -555,6 +567,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
|
|
555
567
|
#endif
|
556
568
|
}
|
557
569
|
|
570
|
+
void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) {
|
571
|
+
atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
572
|
+
}
|
573
|
+
|
574
|
+
int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) {
|
575
|
+
return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
576
|
+
}
|
577
|
+
|
558
578
|
#if defined(__gnu_linux__)
|
559
579
|
static cpu_set_t ggml_get_numa_affinity(void) {
|
560
580
|
cpu_set_t cpuset;
|
@@ -666,87 +686,15 @@ bool ggml_is_numa(void) {
|
|
666
686
|
|
667
687
|
#if defined(__linux__) && defined(__aarch64__)
|
668
688
|
#include <sys/auxv.h>
|
669
|
-
#elif defined(__APPLE__)
|
670
|
-
#include <sys/sysctl.h>
|
671
|
-
#endif
|
672
|
-
|
673
|
-
#if !defined(HWCAP2_I8MM)
|
674
|
-
#define HWCAP2_I8MM (1 << 13)
|
675
|
-
#endif
|
676
|
-
|
677
|
-
#if !defined(HWCAP2_SME)
|
678
|
-
#define HWCAP2_SME (1 << 23)
|
679
689
|
#endif
|
680
690
|
|
681
691
|
static void ggml_init_arm_arch_features(void) {
|
682
|
-
#if defined(__linux__) && defined(__aarch64__)
|
683
|
-
uint32_t hwcap = getauxval(AT_HWCAP);
|
684
|
-
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
685
|
-
|
686
|
-
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
687
|
-
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
688
|
-
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
689
|
-
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
690
|
-
ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
|
691
|
-
|
692
|
-
#if defined(__ARM_FEATURE_SVE)
|
692
|
+
#if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
693
693
|
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
694
694
|
#endif
|
695
|
-
#elif defined(__APPLE__)
|
696
|
-
int oldp = 0;
|
697
|
-
size_t size = sizeof(oldp);
|
698
|
-
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
699
|
-
oldp = 0;
|
700
|
-
}
|
701
|
-
ggml_arm_arch_features.has_neon = oldp;
|
702
|
-
|
703
|
-
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
|
704
|
-
oldp = 0;
|
705
|
-
}
|
706
|
-
ggml_arm_arch_features.has_dotprod = oldp;
|
707
|
-
|
708
|
-
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
709
|
-
oldp = 0;
|
710
|
-
}
|
711
|
-
ggml_arm_arch_features.has_i8mm = oldp;
|
712
|
-
|
713
|
-
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
|
714
|
-
oldp = 0;
|
715
|
-
}
|
716
|
-
ggml_arm_arch_features.has_sme = oldp;
|
717
|
-
|
718
|
-
ggml_arm_arch_features.has_sve = 0;
|
719
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
720
|
-
#else
|
721
|
-
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
722
|
-
#if defined(__ARM_NEON)
|
723
|
-
ggml_arm_arch_features.has_neon = 1;
|
724
|
-
#else
|
725
|
-
ggml_arm_arch_features.has_neon = 0;
|
726
|
-
#endif
|
727
|
-
|
728
|
-
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
729
|
-
ggml_arm_arch_features.has_i8mm = 1;
|
730
|
-
#else
|
731
|
-
ggml_arm_arch_features.has_i8mm = 0;
|
732
|
-
#endif
|
733
|
-
|
734
|
-
#if defined(__ARM_FEATURE_SVE)
|
735
|
-
ggml_arm_arch_features.has_sve = 1;
|
736
|
-
ggml_arm_arch_features.sve_cnt = 16;
|
737
|
-
#else
|
738
|
-
ggml_arm_arch_features.has_sve = 0;
|
739
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
740
|
-
#endif
|
741
|
-
|
742
|
-
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
|
743
|
-
ggml_arm_arch_features.has_sme = 1;
|
744
|
-
#else
|
745
|
-
ggml_arm_arch_features.has_sme = 0;
|
746
|
-
#endif
|
747
|
-
#endif
|
748
695
|
}
|
749
|
-
|
696
|
+
|
697
|
+
#endif // __ARM_ARCH
|
750
698
|
|
751
699
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
752
700
|
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
@@ -801,7 +749,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
|
|
801
749
|
{
|
802
750
|
assert(tensor->nb[0] == sizeof(ggml_fp16_t));
|
803
751
|
for (int i = 0; i < n; i++) {
|
804
|
-
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1),
|
752
|
+
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
|
805
753
|
}
|
806
754
|
} break;
|
807
755
|
case GGML_TYPE_BF16:
|
@@ -860,7 +808,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
|
|
860
808
|
{
|
861
809
|
assert(tensor->nb[0] == sizeof(ggml_fp16_t));
|
862
810
|
for (int i = 0; i < n; i++) {
|
863
|
-
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1),
|
811
|
+
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
|
864
812
|
}
|
865
813
|
} break;
|
866
814
|
case GGML_TYPE_BF16:
|
@@ -911,7 +859,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
|
|
911
859
|
case GGML_TYPE_F16:
|
912
860
|
{
|
913
861
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
914
|
-
return
|
862
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
915
863
|
}
|
916
864
|
case GGML_TYPE_BF16:
|
917
865
|
{
|
@@ -956,7 +904,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
|
|
956
904
|
case GGML_TYPE_F16:
|
957
905
|
{
|
958
906
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
959
|
-
((ggml_fp16_t *)(tensor->data))[i] =
|
907
|
+
((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
|
960
908
|
} break;
|
961
909
|
case GGML_TYPE_BF16:
|
962
910
|
{
|
@@ -985,7 +933,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
|
|
985
933
|
case GGML_TYPE_I32:
|
986
934
|
return ((int32_t *) data)[0];
|
987
935
|
case GGML_TYPE_F16:
|
988
|
-
return
|
936
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
989
937
|
case GGML_TYPE_BF16:
|
990
938
|
return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
|
991
939
|
case GGML_TYPE_F32:
|
@@ -1012,7 +960,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
1012
960
|
} break;
|
1013
961
|
case GGML_TYPE_F16:
|
1014
962
|
{
|
1015
|
-
((ggml_fp16_t *)(data))[0] =
|
963
|
+
((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
|
1016
964
|
} break;
|
1017
965
|
case GGML_TYPE_BF16:
|
1018
966
|
{
|
@@ -1050,7 +998,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
|
1050
998
|
}
|
1051
999
|
case GGML_TYPE_F16:
|
1052
1000
|
{
|
1053
|
-
return
|
1001
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
1054
1002
|
}
|
1055
1003
|
case GGML_TYPE_BF16:
|
1056
1004
|
{
|
@@ -1089,7 +1037,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
1089
1037
|
} break;
|
1090
1038
|
case GGML_TYPE_F16:
|
1091
1039
|
{
|
1092
|
-
((ggml_fp16_t *)(tensor->data))[i] =
|
1040
|
+
((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
|
1093
1041
|
} break;
|
1094
1042
|
case GGML_TYPE_BF16:
|
1095
1043
|
{
|
@@ -1116,7 +1064,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
1116
1064
|
case GGML_TYPE_I32:
|
1117
1065
|
return ((int32_t *) data)[0];
|
1118
1066
|
case GGML_TYPE_F16:
|
1119
|
-
return
|
1067
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
1120
1068
|
case GGML_TYPE_BF16:
|
1121
1069
|
return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
|
1122
1070
|
case GGML_TYPE_F32:
|
@@ -1143,7 +1091,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
1143
1091
|
} break;
|
1144
1092
|
case GGML_TYPE_F16:
|
1145
1093
|
{
|
1146
|
-
((ggml_fp16_t *)(data))[0] =
|
1094
|
+
((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
|
1147
1095
|
} break;
|
1148
1096
|
case GGML_TYPE_BF16:
|
1149
1097
|
{
|
@@ -1254,7 +1202,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
|
1254
1202
|
}
|
1255
1203
|
}
|
1256
1204
|
|
1257
|
-
|
1205
|
+
void ggml_compute_forward_mul_mat(
|
1258
1206
|
const struct ggml_compute_params * params,
|
1259
1207
|
struct ggml_tensor * dst) {
|
1260
1208
|
|
@@ -1731,6 +1679,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1731
1679
|
{
|
1732
1680
|
ggml_compute_forward_add(params, tensor);
|
1733
1681
|
} break;
|
1682
|
+
case GGML_OP_ADD_ID:
|
1683
|
+
{
|
1684
|
+
ggml_compute_forward_add_id(params, tensor);
|
1685
|
+
} break;
|
1734
1686
|
case GGML_OP_ADD1:
|
1735
1687
|
{
|
1736
1688
|
ggml_compute_forward_add1(params, tensor);
|
@@ -1879,6 +1831,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1879
1831
|
{
|
1880
1832
|
ggml_compute_forward_get_rows_back(params, tensor);
|
1881
1833
|
} break;
|
1834
|
+
case GGML_OP_SET_ROWS:
|
1835
|
+
{
|
1836
|
+
ggml_compute_forward_set_rows(params, tensor);
|
1837
|
+
} break;
|
1882
1838
|
case GGML_OP_DIAG:
|
1883
1839
|
{
|
1884
1840
|
ggml_compute_forward_diag(params, tensor);
|
@@ -1923,6 +1879,18 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1923
1879
|
{
|
1924
1880
|
ggml_compute_forward_im2col_back_f32(params, tensor);
|
1925
1881
|
} break;
|
1882
|
+
case GGML_OP_IM2COL_3D:
|
1883
|
+
{
|
1884
|
+
ggml_compute_forward_im2col_3d(params, tensor);
|
1885
|
+
} break;
|
1886
|
+
case GGML_OP_CONV_2D:
|
1887
|
+
{
|
1888
|
+
ggml_compute_forward_conv_2d(params, tensor);
|
1889
|
+
} break;
|
1890
|
+
case GGML_OP_CONV_3D:
|
1891
|
+
{
|
1892
|
+
ggml_compute_forward_conv_3d(params, tensor);
|
1893
|
+
} break;
|
1926
1894
|
case GGML_OP_CONV_2D_DW:
|
1927
1895
|
{
|
1928
1896
|
ggml_compute_forward_conv_2d_dw(params, tensor);
|
@@ -1955,6 +1923,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1955
1923
|
{
|
1956
1924
|
ggml_compute_forward_pad_reflect_1d(params, tensor);
|
1957
1925
|
} break;
|
1926
|
+
case GGML_OP_ROLL:
|
1927
|
+
{
|
1928
|
+
ggml_compute_forward_roll(params, tensor);
|
1929
|
+
} break;
|
1958
1930
|
case GGML_OP_ARANGE:
|
1959
1931
|
{
|
1960
1932
|
ggml_compute_forward_arange(params, tensor);
|
@@ -1973,7 +1945,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
1973
1945
|
} break;
|
1974
1946
|
case GGML_OP_FLASH_ATTN_EXT:
|
1975
1947
|
{
|
1976
|
-
ggml_compute_forward_flash_attn_ext(params, tensor
|
1948
|
+
ggml_compute_forward_flash_attn_ext(params, tensor);
|
1977
1949
|
} break;
|
1978
1950
|
case GGML_OP_FLASH_ATTN_BACK:
|
1979
1951
|
{
|
@@ -2002,6 +1974,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
2002
1974
|
{
|
2003
1975
|
ggml_compute_forward_unary(params, tensor);
|
2004
1976
|
} break;
|
1977
|
+
case GGML_OP_GLU:
|
1978
|
+
{
|
1979
|
+
ggml_compute_forward_glu(params, tensor);
|
1980
|
+
} break;
|
2005
1981
|
case GGML_OP_GET_REL_POS:
|
2006
1982
|
{
|
2007
1983
|
ggml_compute_forward_get_rel_pos(params, tensor);
|
@@ -2057,6 +2033,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
2057
2033
|
ggml_compute_forward_opt_step_adamw(params, tensor);
|
2058
2034
|
}
|
2059
2035
|
break;
|
2036
|
+
case GGML_OP_OPT_STEP_SGD:
|
2037
|
+
{
|
2038
|
+
ggml_compute_forward_opt_step_sgd(params, tensor);
|
2039
|
+
}
|
2040
|
+
break;
|
2060
2041
|
case GGML_OP_NONE:
|
2061
2042
|
{
|
2062
2043
|
// nop
|
@@ -2156,6 +2137,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2156
2137
|
case GGML_OP_DUP:
|
2157
2138
|
case GGML_OP_CONT:
|
2158
2139
|
case GGML_OP_ADD:
|
2140
|
+
case GGML_OP_ADD_ID:
|
2159
2141
|
case GGML_OP_ADD1:
|
2160
2142
|
case GGML_OP_ACC:
|
2161
2143
|
{
|
@@ -2212,6 +2194,21 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2212
2194
|
GGML_ABORT("fatal error");
|
2213
2195
|
}
|
2214
2196
|
break;
|
2197
|
+
case GGML_OP_GLU:
|
2198
|
+
switch (ggml_get_glu_op(node)) {
|
2199
|
+
case GGML_GLU_OP_REGLU:
|
2200
|
+
case GGML_GLU_OP_GEGLU:
|
2201
|
+
case GGML_GLU_OP_SWIGLU:
|
2202
|
+
case GGML_GLU_OP_SWIGLU_OAI:
|
2203
|
+
case GGML_GLU_OP_GEGLU_ERF:
|
2204
|
+
case GGML_GLU_OP_GEGLU_QUICK:
|
2205
|
+
{
|
2206
|
+
n_tasks = n_threads;
|
2207
|
+
} break;
|
2208
|
+
default:
|
2209
|
+
GGML_ABORT("fatal error");
|
2210
|
+
}
|
2211
|
+
break;
|
2215
2212
|
case GGML_OP_SILU_BACK:
|
2216
2213
|
case GGML_OP_MUL:
|
2217
2214
|
case GGML_OP_DIV:
|
@@ -2228,6 +2225,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2228
2225
|
n_tasks = n_threads;
|
2229
2226
|
} break;
|
2230
2227
|
case GGML_OP_GET_ROWS:
|
2228
|
+
case GGML_OP_SET_ROWS:
|
2231
2229
|
{
|
2232
2230
|
// FIXME: get_rows can use additional threads, but the cost of launching additional threads
|
2233
2231
|
// decreases performance with GPU offloading
|
@@ -2264,6 +2262,9 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2264
2262
|
} break;
|
2265
2263
|
case GGML_OP_IM2COL:
|
2266
2264
|
case GGML_OP_IM2COL_BACK:
|
2265
|
+
case GGML_OP_IM2COL_3D:
|
2266
|
+
case GGML_OP_CONV_2D:
|
2267
|
+
case GGML_OP_CONV_3D:
|
2267
2268
|
case GGML_OP_CONV_2D_DW:
|
2268
2269
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
2269
2270
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
@@ -2279,6 +2280,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2279
2280
|
case GGML_OP_UPSCALE:
|
2280
2281
|
case GGML_OP_PAD:
|
2281
2282
|
case GGML_OP_PAD_REFLECT_1D:
|
2283
|
+
case GGML_OP_ROLL:
|
2282
2284
|
case GGML_OP_ARANGE:
|
2283
2285
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
2284
2286
|
case GGML_OP_ARGSORT:
|
@@ -2341,6 +2343,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
2341
2343
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
2342
2344
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
2343
2345
|
case GGML_OP_OPT_STEP_ADAMW:
|
2346
|
+
case GGML_OP_OPT_STEP_SGD:
|
2344
2347
|
{
|
2345
2348
|
n_tasks = n_threads;
|
2346
2349
|
} break;
|
@@ -2414,12 +2417,32 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
2414
2417
|
// This is up to the applications.
|
2415
2418
|
DWORD p = THREAD_PRIORITY_NORMAL;
|
2416
2419
|
switch (prio) {
|
2420
|
+
case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
|
2417
2421
|
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
|
2418
2422
|
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
|
2419
2423
|
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
|
2420
2424
|
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
|
2421
2425
|
}
|
2422
2426
|
|
2427
|
+
if (prio != GGML_SCHED_PRIO_LOW) {
|
2428
|
+
// Tell Windows that this thread should not be throttled (needs its own CPU core).
|
2429
|
+
// Newer Windows 11 versions aggresively park (offline) CPU cores and often place
|
2430
|
+
// all our threads onto the first 4 cores which results in terrible performance with
|
2431
|
+
// n_threads > 4
|
2432
|
+
#if _WIN32_WINNT >= 0x0602
|
2433
|
+
THREAD_POWER_THROTTLING_STATE t;
|
2434
|
+
ZeroMemory(&t, sizeof(t));
|
2435
|
+
t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
|
2436
|
+
t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
|
2437
|
+
t.StateMask = 0;
|
2438
|
+
|
2439
|
+
if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
|
2440
|
+
GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
|
2441
|
+
return false;
|
2442
|
+
}
|
2443
|
+
#endif
|
2444
|
+
}
|
2445
|
+
|
2423
2446
|
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
2424
2447
|
// Keep inherited policy/priority
|
2425
2448
|
return true;
|
@@ -2447,6 +2470,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
2447
2470
|
struct sched_param p;
|
2448
2471
|
int32_t policy = SCHED_OTHER;
|
2449
2472
|
switch (prio) {
|
2473
|
+
// TODO: there seems to be no way to set lower prio on Apple platforms
|
2474
|
+
case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
2450
2475
|
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
2451
2476
|
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
2452
2477
|
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
@@ -2503,6 +2528,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
|
|
2503
2528
|
struct sched_param p;
|
2504
2529
|
int32_t policy = SCHED_OTHER;
|
2505
2530
|
switch (prio) {
|
2531
|
+
case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
|
2506
2532
|
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
|
2507
2533
|
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
|
2508
2534
|
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
|
@@ -2673,11 +2699,15 @@ struct ggml_cplan ggml_graph_plan(
|
|
2673
2699
|
if (ggml_is_quantized(node->type) ||
|
2674
2700
|
// F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
|
2675
2701
|
(node->src[0]->type == GGML_TYPE_F16 && node->src[1] && node->src[1]->type == GGML_TYPE_BF16) ||
|
2676
|
-
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16)
|
2702
|
+
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16) ||
|
2703
|
+
// conversion between F32 and I32
|
2704
|
+
(node->src[0]->type == GGML_TYPE_F32 && node->src[1] && node->src[1]->type == GGML_TYPE_I32) ||
|
2705
|
+
(node->src[0]->type == GGML_TYPE_I32 && node->src[1] && node->src[1]->type == GGML_TYPE_F32)) {
|
2677
2706
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
2678
2707
|
}
|
2679
2708
|
} break;
|
2680
2709
|
case GGML_OP_ADD:
|
2710
|
+
case GGML_OP_ADD_ID:
|
2681
2711
|
case GGML_OP_ADD1:
|
2682
2712
|
{
|
2683
2713
|
if (ggml_is_quantized(node->src[0]->type)) {
|
@@ -2758,6 +2788,11 @@ struct ggml_cplan ggml_graph_plan(
|
|
2758
2788
|
GGML_ABORT("fatal error");
|
2759
2789
|
}
|
2760
2790
|
} break;
|
2791
|
+
case GGML_OP_CONV_2D:
|
2792
|
+
case GGML_OP_CONV_3D:
|
2793
|
+
{
|
2794
|
+
cur = GGML_IM2COL_WORK_SIZE;
|
2795
|
+
} break;
|
2761
2796
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
2762
2797
|
{
|
2763
2798
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|
@@ -3046,7 +3081,14 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
|
|
3046
3081
|
|
3047
3082
|
threadpool->workers = workers;
|
3048
3083
|
|
3049
|
-
#
|
3084
|
+
#ifdef GGML_USE_OPENMP
|
3085
|
+
int32_t cpumask_iter = 0;
|
3086
|
+
|
3087
|
+
// Compute CPU masks for each thread
|
3088
|
+
for (int j = 0; j < tpp->n_threads; j++) {
|
3089
|
+
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
3090
|
+
}
|
3091
|
+
#else // GGML_USE_OPENMP
|
3050
3092
|
ggml_mutex_init(&threadpool->mutex);
|
3051
3093
|
ggml_cond_init(&threadpool->cond);
|
3052
3094
|
|
@@ -3119,7 +3161,14 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|
3119
3161
|
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
|
3120
3162
|
}
|
3121
3163
|
|
3122
|
-
|
3164
|
+
// Apply thread CPU mask and priority
|
3165
|
+
int ith = omp_get_thread_num();
|
3166
|
+
|
3167
|
+
ggml_thread_apply_priority(threadpool->prio);
|
3168
|
+
if (ggml_thread_cpumask_is_valid(threadpool->workers[ith].cpumask)) {
|
3169
|
+
ggml_thread_apply_affinity(threadpool->workers[ith].cpumask);
|
3170
|
+
}
|
3171
|
+
ggml_graph_compute_thread(&threadpool->workers[ith]);
|
3123
3172
|
}
|
3124
3173
|
} else {
|
3125
3174
|
atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);
|
@@ -3158,6 +3207,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
|
|
3158
3207
|
return ggml_graph_compute(cgraph, &cplan);
|
3159
3208
|
}
|
3160
3209
|
|
3210
|
+
void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
|
3211
|
+
memcpy(y, x, n * sizeof(float));
|
3212
|
+
}
|
3213
|
+
|
3161
3214
|
void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
3162
3215
|
int64_t i = 0;
|
3163
3216
|
#if defined(__F16C__)
|
@@ -3178,9 +3231,16 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
|
3178
3231
|
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
3179
3232
|
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
3180
3233
|
}
|
3234
|
+
#elif defined(__riscv_zvfh)
|
3235
|
+
for (int vl; i < n; i += vl) {
|
3236
|
+
vl = __riscv_vsetvl_e32m2(n - i);
|
3237
|
+
vfloat32m2_t vx = __riscv_vle32_v_f32m2(&x[i], vl);
|
3238
|
+
vfloat16m1_t vy = __riscv_vfncvt_f_f_w_f16m1(vx, vl);
|
3239
|
+
__riscv_vse16_v_f16m1((_Float16 *)&y[i], vy, vl);
|
3240
|
+
}
|
3181
3241
|
#endif
|
3182
3242
|
for (; i < n; ++i) {
|
3183
|
-
y[i] =
|
3243
|
+
y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
|
3184
3244
|
}
|
3185
3245
|
}
|
3186
3246
|
|
@@ -3205,8 +3265,9 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
|
|
3205
3265
|
_mm_storeu_ps(y + i, y_vec);
|
3206
3266
|
}
|
3207
3267
|
#endif
|
3268
|
+
|
3208
3269
|
for (; i < n; ++i) {
|
3209
|
-
y[i] =
|
3270
|
+
y[i] = GGML_CPU_FP16_TO_FP32(x[i]);
|
3210
3271
|
}
|
3211
3272
|
}
|
3212
3273
|
|
@@ -3217,6 +3278,13 @@ void ggml_cpu_fp32_to_bf16(const float * x, ggml_bf16_t * y, int64_t n) {
|
|
3217
3278
|
}
|
3218
3279
|
}
|
3219
3280
|
|
3281
|
+
void ggml_cpu_fp32_to_i32(const float * x, int32_t * y, int64_t n) {
|
3282
|
+
int64_t i = 0;
|
3283
|
+
for (; i < n; ++i) {
|
3284
|
+
y[i] = x[i];
|
3285
|
+
}
|
3286
|
+
}
|
3287
|
+
|
3220
3288
|
void ggml_cpu_bf16_to_fp32(const ggml_bf16_t * x, float * y, int64_t n) {
|
3221
3289
|
int64_t i = 0;
|
3222
3290
|
#if defined(__AVX2__)
|
@@ -3408,7 +3476,7 @@ int ggml_cpu_has_vxe(void) {
|
|
3408
3476
|
|
3409
3477
|
int ggml_cpu_has_neon(void) {
|
3410
3478
|
#if defined(__ARM_ARCH) && defined(__ARM_NEON)
|
3411
|
-
return
|
3479
|
+
return 1;
|
3412
3480
|
#else
|
3413
3481
|
return 0;
|
3414
3482
|
#endif
|
@@ -3416,7 +3484,7 @@ int ggml_cpu_has_neon(void) {
|
|
3416
3484
|
|
3417
3485
|
int ggml_cpu_has_dotprod(void) {
|
3418
3486
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
|
3419
|
-
return
|
3487
|
+
return 1;
|
3420
3488
|
#else
|
3421
3489
|
return 0;
|
3422
3490
|
#endif
|
@@ -3424,7 +3492,7 @@ int ggml_cpu_has_dotprod(void) {
|
|
3424
3492
|
|
3425
3493
|
int ggml_cpu_has_sve(void) {
|
3426
3494
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
3427
|
-
return
|
3495
|
+
return 1;
|
3428
3496
|
#else
|
3429
3497
|
return 0;
|
3430
3498
|
#endif
|
@@ -3432,7 +3500,7 @@ int ggml_cpu_has_sve(void) {
|
|
3432
3500
|
|
3433
3501
|
int ggml_cpu_has_matmul_int8(void) {
|
3434
3502
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
|
3435
|
-
return
|
3503
|
+
return 1;
|
3436
3504
|
#else
|
3437
3505
|
return 0;
|
3438
3506
|
#endif
|
@@ -3448,14 +3516,14 @@ int ggml_cpu_get_sve_cnt(void) {
|
|
3448
3516
|
|
3449
3517
|
int ggml_cpu_has_sme(void) {
|
3450
3518
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
|
3451
|
-
return
|
3519
|
+
return 1;
|
3452
3520
|
#else
|
3453
3521
|
return 0;
|
3454
3522
|
#endif
|
3455
3523
|
}
|
3456
3524
|
|
3457
3525
|
void ggml_cpu_init(void) {
|
3458
|
-
// needed to initialize
|
3526
|
+
// needed to initialize ggml_time
|
3459
3527
|
{
|
3460
3528
|
struct ggml_init_params params = { 0, NULL, false };
|
3461
3529
|
struct ggml_context * ctx = ggml_init(params);
|
@@ -3476,9 +3544,10 @@ void ggml_cpu_init(void) {
|
|
3476
3544
|
uint16_t u16;
|
3477
3545
|
ggml_fp16_t fp16;
|
3478
3546
|
} u = {i};
|
3479
|
-
float f =
|
3480
|
-
|
3481
|
-
|
3547
|
+
float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
3548
|
+
ggml_table_f32_f16[i] = f;
|
3549
|
+
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
|
3550
|
+
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
3482
3551
|
}
|
3483
3552
|
|
3484
3553
|
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|