whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -81,6 +81,11 @@ struct ggml_arm_arch_features_type {
|
|
|
81
81
|
} ggml_arm_arch_features = { 0 };
|
|
82
82
|
#endif
|
|
83
83
|
|
|
84
|
+
#if defined(__riscv)
|
|
85
|
+
struct ggml_riscv_arch_features_type {
|
|
86
|
+
int rvv_vlen;
|
|
87
|
+
} ggml_riscv_arch_features = { 0 };
|
|
88
|
+
#endif
|
|
84
89
|
|
|
85
90
|
#if defined(_WIN32)
|
|
86
91
|
|
|
@@ -187,6 +192,9 @@ typedef void * thread_ret_t;
|
|
|
187
192
|
|
|
188
193
|
typedef pthread_t ggml_thread_t;
|
|
189
194
|
|
|
195
|
+
#define GGML_THREADPOOL_N_THREADS_MASK (0xffffU)
|
|
196
|
+
#define GGML_THREADPOOL_N_THREADS_BITS (16)
|
|
197
|
+
|
|
190
198
|
#if defined(__APPLE__)
|
|
191
199
|
#include <unistd.h>
|
|
192
200
|
#include <mach/mach.h>
|
|
@@ -449,7 +457,7 @@ struct ggml_threadpool {
|
|
|
449
457
|
struct ggml_cplan * cplan;
|
|
450
458
|
|
|
451
459
|
// synchronization primitives
|
|
452
|
-
atomic_int n_graph; //
|
|
460
|
+
atomic_int n_graph; // updated when there is work to be done (i.e each graph) holds graph and active thread counts.
|
|
453
461
|
atomic_int GGML_CACHE_ALIGN n_barrier;
|
|
454
462
|
atomic_int GGML_CACHE_ALIGN n_barrier_passed;
|
|
455
463
|
atomic_int GGML_CACHE_ALIGN current_chunk; // currently processing chunk during Mat_Mul, shared between all the threads.
|
|
@@ -457,12 +465,10 @@ struct ggml_threadpool {
|
|
|
457
465
|
// these are atomic as an annotation for thread-sanitizer
|
|
458
466
|
atomic_bool stop; // Used for stopping the threadpool altogether
|
|
459
467
|
atomic_bool pause; // Used for pausing the threadpool or individual threads
|
|
460
|
-
atomic_int
|
|
468
|
+
atomic_int abort; // Used for aborting processing of a graph
|
|
461
469
|
|
|
462
470
|
struct ggml_compute_state * workers; // per thread state
|
|
463
|
-
int
|
|
464
|
-
atomic_int n_threads_cur; // number of threads used in the current graph
|
|
465
|
-
|
|
471
|
+
int n_threads; // Number of threads in the pool
|
|
466
472
|
int32_t prio; // Scheduling priority
|
|
467
473
|
uint32_t poll; // Polling level (0 - no polling)
|
|
468
474
|
|
|
@@ -490,6 +496,15 @@ static inline void ggml_thread_cpu_relax(void) {
|
|
|
490
496
|
static inline void ggml_thread_cpu_relax(void) {
|
|
491
497
|
_mm_pause();
|
|
492
498
|
}
|
|
499
|
+
#elif defined(__riscv)
|
|
500
|
+
static inline void ggml_thread_cpu_relax(void) {
|
|
501
|
+
#ifdef __riscv_zihintpause
|
|
502
|
+
__asm__ __volatile__ ("pause");
|
|
503
|
+
#else
|
|
504
|
+
/* Encoding of the pause instruction */
|
|
505
|
+
__asm__ __volatile__ (".4byte 0x100000F");
|
|
506
|
+
#endif
|
|
507
|
+
}
|
|
493
508
|
#else
|
|
494
509
|
static inline void ggml_thread_cpu_relax(void) {;}
|
|
495
510
|
#endif
|
|
@@ -530,7 +545,7 @@ struct ggml_state {
|
|
|
530
545
|
static struct ggml_state g_state = {0};
|
|
531
546
|
|
|
532
547
|
void ggml_barrier(struct ggml_threadpool * tp) {
|
|
533
|
-
int n_threads = atomic_load_explicit(&tp->
|
|
548
|
+
int n_threads = atomic_load_explicit(&tp->n_graph, memory_order_relaxed) & GGML_THREADPOOL_N_THREADS_MASK;
|
|
534
549
|
if (n_threads == 1) {
|
|
535
550
|
return;
|
|
536
551
|
}
|
|
@@ -547,7 +562,7 @@ void ggml_barrier(struct ggml_threadpool * tp) {
|
|
|
547
562
|
// last thread
|
|
548
563
|
atomic_store_explicit(&tp->n_barrier, 0, memory_order_relaxed);
|
|
549
564
|
|
|
550
|
-
// exit barrier (
|
|
565
|
+
// exit barrier (full seq-cst fence)
|
|
551
566
|
atomic_fetch_add_explicit(&tp->n_barrier_passed, 1, memory_order_seq_cst);
|
|
552
567
|
return;
|
|
553
568
|
}
|
|
@@ -683,19 +698,25 @@ bool ggml_is_numa(void) {
|
|
|
683
698
|
}
|
|
684
699
|
|
|
685
700
|
#if defined(__ARM_ARCH)
|
|
686
|
-
|
|
687
|
-
#
|
|
688
|
-
#include <sys/auxv.h>
|
|
689
|
-
#endif
|
|
690
|
-
|
|
701
|
+
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
|
702
|
+
#include <arm_sve.h>
|
|
691
703
|
static void ggml_init_arm_arch_features(void) {
|
|
692
|
-
|
|
693
|
-
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
694
|
-
#endif
|
|
704
|
+
ggml_arm_arch_features.sve_cnt = svcntb();
|
|
695
705
|
}
|
|
696
|
-
|
|
706
|
+
#else
|
|
707
|
+
static void ggml_init_arm_arch_features(void) {}
|
|
708
|
+
#endif
|
|
697
709
|
#endif // __ARM_ARCH
|
|
698
710
|
|
|
711
|
+
#if defined(__riscv) && defined(__riscv_v_intrinsic)
|
|
712
|
+
#include <riscv_vector.h>
|
|
713
|
+
static void ggml_init_riscv_arch_features(void) {
|
|
714
|
+
ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb();
|
|
715
|
+
}
|
|
716
|
+
#else
|
|
717
|
+
static void ggml_init_riscv_arch_features(void) {}
|
|
718
|
+
#endif
|
|
719
|
+
|
|
699
720
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
|
700
721
|
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
|
701
722
|
|
|
@@ -1608,13 +1629,8 @@ static void ggml_compute_forward_mul_mat_id(
|
|
|
1608
1629
|
chunk_size = 64;
|
|
1609
1630
|
}
|
|
1610
1631
|
|
|
1611
|
-
#if defined(__aarch64__)
|
|
1612
|
-
// disable for ARM
|
|
1613
|
-
const bool disable_chunking = true;
|
|
1614
|
-
#else
|
|
1615
1632
|
// disable for NUMA
|
|
1616
1633
|
const bool disable_chunking = ggml_is_numa();
|
|
1617
|
-
#endif // defined(__aarch64__)
|
|
1618
1634
|
|
|
1619
1635
|
int64_t nchunk0 = (nr0 + chunk_size - 1) / chunk_size;
|
|
1620
1636
|
int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
|
|
@@ -1731,6 +1747,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1731
1747
|
{
|
|
1732
1748
|
ggml_compute_forward_sum_rows(params, tensor);
|
|
1733
1749
|
} break;
|
|
1750
|
+
case GGML_OP_CUMSUM:
|
|
1751
|
+
{
|
|
1752
|
+
ggml_compute_forward_cumsum(params, tensor);
|
|
1753
|
+
} break;
|
|
1734
1754
|
case GGML_OP_MEAN:
|
|
1735
1755
|
{
|
|
1736
1756
|
ggml_compute_forward_mean(params, tensor);
|
|
@@ -1807,22 +1827,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1807
1827
|
{
|
|
1808
1828
|
ggml_compute_forward_cont(params, tensor);
|
|
1809
1829
|
} break;
|
|
1810
|
-
case GGML_OP_RESHAPE:
|
|
1811
|
-
{
|
|
1812
|
-
ggml_compute_forward_reshape(params, tensor);
|
|
1813
|
-
} break;
|
|
1814
|
-
case GGML_OP_VIEW:
|
|
1815
|
-
{
|
|
1816
|
-
ggml_compute_forward_view(params, tensor);
|
|
1817
|
-
} break;
|
|
1818
|
-
case GGML_OP_PERMUTE:
|
|
1819
|
-
{
|
|
1820
|
-
ggml_compute_forward_permute(params, tensor);
|
|
1821
|
-
} break;
|
|
1822
|
-
case GGML_OP_TRANSPOSE:
|
|
1823
|
-
{
|
|
1824
|
-
ggml_compute_forward_transpose(params, tensor);
|
|
1825
|
-
} break;
|
|
1826
1830
|
case GGML_OP_GET_ROWS:
|
|
1827
1831
|
{
|
|
1828
1832
|
ggml_compute_forward_get_rows(params, tensor);
|
|
@@ -1939,10 +1943,22 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1939
1943
|
{
|
|
1940
1944
|
ggml_compute_forward_argsort(params, tensor);
|
|
1941
1945
|
} break;
|
|
1946
|
+
case GGML_OP_TOP_K:
|
|
1947
|
+
{
|
|
1948
|
+
ggml_compute_forward_top_k(params, tensor);
|
|
1949
|
+
} break;
|
|
1942
1950
|
case GGML_OP_LEAKY_RELU:
|
|
1943
1951
|
{
|
|
1944
1952
|
ggml_compute_forward_leaky_relu(params, tensor);
|
|
1945
1953
|
} break;
|
|
1954
|
+
case GGML_OP_TRI:
|
|
1955
|
+
{
|
|
1956
|
+
ggml_compute_forward_tri(params, tensor);
|
|
1957
|
+
} break;
|
|
1958
|
+
case GGML_OP_FILL:
|
|
1959
|
+
{
|
|
1960
|
+
ggml_compute_forward_fill(params, tensor);
|
|
1961
|
+
} break;
|
|
1946
1962
|
case GGML_OP_FLASH_ATTN_EXT:
|
|
1947
1963
|
{
|
|
1948
1964
|
ggml_compute_forward_flash_attn_ext(params, tensor);
|
|
@@ -1998,6 +2014,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1998
2014
|
{
|
|
1999
2015
|
ggml_compute_forward_rwkv_wkv7(params, tensor);
|
|
2000
2016
|
} break;
|
|
2017
|
+
case GGML_OP_SOLVE_TRI:
|
|
2018
|
+
{
|
|
2019
|
+
ggml_compute_forward_solve_tri(params, tensor);
|
|
2020
|
+
} break;
|
|
2001
2021
|
case GGML_OP_MAP_CUSTOM1:
|
|
2002
2022
|
{
|
|
2003
2023
|
ggml_compute_forward_map_custom1(params, tensor);
|
|
@@ -2042,6 +2062,22 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
2042
2062
|
{
|
|
2043
2063
|
// nop
|
|
2044
2064
|
} break;
|
|
2065
|
+
case GGML_OP_RESHAPE:
|
|
2066
|
+
{
|
|
2067
|
+
// nop
|
|
2068
|
+
} break;
|
|
2069
|
+
case GGML_OP_PERMUTE:
|
|
2070
|
+
{
|
|
2071
|
+
// nop
|
|
2072
|
+
} break;
|
|
2073
|
+
case GGML_OP_VIEW:
|
|
2074
|
+
{
|
|
2075
|
+
// nop
|
|
2076
|
+
} break;
|
|
2077
|
+
case GGML_OP_TRANSPOSE:
|
|
2078
|
+
{
|
|
2079
|
+
// nop
|
|
2080
|
+
} break;
|
|
2045
2081
|
case GGML_OP_COUNT:
|
|
2046
2082
|
{
|
|
2047
2083
|
GGML_ABORT("fatal error");
|
|
@@ -2140,6 +2176,9 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2140
2176
|
case GGML_OP_ADD_ID:
|
|
2141
2177
|
case GGML_OP_ADD1:
|
|
2142
2178
|
case GGML_OP_ACC:
|
|
2179
|
+
case GGML_OP_CUMSUM:
|
|
2180
|
+
case GGML_OP_TRI:
|
|
2181
|
+
case GGML_OP_FILL:
|
|
2143
2182
|
{
|
|
2144
2183
|
n_tasks = n_threads;
|
|
2145
2184
|
} break;
|
|
@@ -2157,6 +2196,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2157
2196
|
n_tasks = 1;
|
|
2158
2197
|
} break;
|
|
2159
2198
|
case GGML_OP_COUNT_EQUAL:
|
|
2199
|
+
case GGML_OP_SOLVE_TRI:
|
|
2160
2200
|
{
|
|
2161
2201
|
n_tasks = n_threads;
|
|
2162
2202
|
} break;
|
|
@@ -2179,6 +2219,12 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2179
2219
|
case GGML_UNARY_OP_HARDSWISH:
|
|
2180
2220
|
case GGML_UNARY_OP_HARDSIGMOID:
|
|
2181
2221
|
case GGML_UNARY_OP_EXP:
|
|
2222
|
+
case GGML_UNARY_OP_SOFTPLUS:
|
|
2223
|
+
case GGML_UNARY_OP_EXPM1:
|
|
2224
|
+
case GGML_UNARY_OP_FLOOR:
|
|
2225
|
+
case GGML_UNARY_OP_CEIL:
|
|
2226
|
+
case GGML_UNARY_OP_ROUND:
|
|
2227
|
+
case GGML_UNARY_OP_TRUNC:
|
|
2182
2228
|
{
|
|
2183
2229
|
n_tasks = 1;
|
|
2184
2230
|
} break;
|
|
@@ -2187,6 +2233,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2187
2233
|
case GGML_UNARY_OP_GELU_ERF:
|
|
2188
2234
|
case GGML_UNARY_OP_GELU_QUICK:
|
|
2189
2235
|
case GGML_UNARY_OP_SILU:
|
|
2236
|
+
case GGML_UNARY_OP_XIELU:
|
|
2190
2237
|
{
|
|
2191
2238
|
n_tasks = n_threads;
|
|
2192
2239
|
} break;
|
|
@@ -2284,6 +2331,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2284
2331
|
case GGML_OP_ARANGE:
|
|
2285
2332
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
2286
2333
|
case GGML_OP_ARGSORT:
|
|
2334
|
+
case GGML_OP_TOP_K:
|
|
2287
2335
|
case GGML_OP_FLASH_ATTN_EXT:
|
|
2288
2336
|
case GGML_OP_FLASH_ATTN_BACK:
|
|
2289
2337
|
case GGML_OP_SSM_CONV:
|
|
@@ -2595,7 +2643,7 @@ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask
|
|
|
2595
2643
|
void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
|
|
2596
2644
|
if (!threadpool) return;
|
|
2597
2645
|
|
|
2598
|
-
const int n_threads = threadpool->
|
|
2646
|
+
const int n_threads = threadpool->n_threads;
|
|
2599
2647
|
|
|
2600
2648
|
#ifndef GGML_USE_OPENMP
|
|
2601
2649
|
struct ggml_compute_state* workers = threadpool->workers;
|
|
@@ -2671,9 +2719,14 @@ struct ggml_cplan ggml_graph_plan(
|
|
|
2671
2719
|
//GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
|
|
2672
2720
|
}
|
|
2673
2721
|
if (n_threads <= 0) {
|
|
2674
|
-
n_threads = threadpool ? threadpool->
|
|
2722
|
+
n_threads = threadpool ? threadpool->n_threads : GGML_DEFAULT_N_THREADS;
|
|
2675
2723
|
}
|
|
2676
2724
|
|
|
2725
|
+
#if defined(__EMSCRIPTEN__) && !defined(__EMSCRIPTEN_PTHREADS__)
|
|
2726
|
+
// Emscripten without pthreads support can only use a single thread
|
|
2727
|
+
n_threads = 1;
|
|
2728
|
+
#endif
|
|
2729
|
+
|
|
2677
2730
|
size_t work_size = 0;
|
|
2678
2731
|
|
|
2679
2732
|
struct ggml_cplan cplan;
|
|
@@ -2807,6 +2860,10 @@ struct ggml_cplan ggml_graph_plan(
|
|
|
2807
2860
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02*ne03;
|
|
2808
2861
|
cur += sizeof(ggml_fp16_t)*ne10*ne11*ne12;
|
|
2809
2862
|
} break;
|
|
2863
|
+
case GGML_OP_TOP_K:
|
|
2864
|
+
{
|
|
2865
|
+
cur += sizeof(int32_t)*node->src[0]->ne[0]*n_tasks;
|
|
2866
|
+
} break;
|
|
2810
2867
|
case GGML_OP_FLASH_ATTN_EXT:
|
|
2811
2868
|
{
|
|
2812
2869
|
const int64_t ne10 = node->src[1]->ne[0]; // DK
|
|
@@ -2870,15 +2927,22 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
2870
2927
|
|
|
2871
2928
|
struct ggml_compute_params params = {
|
|
2872
2929
|
/*.ith =*/ state->ith,
|
|
2873
|
-
/*.nth =*/ atomic_load_explicit(&tp->
|
|
2930
|
+
/*.nth =*/ atomic_load_explicit(&tp->n_graph, memory_order_relaxed) & GGML_THREADPOOL_N_THREADS_MASK,
|
|
2874
2931
|
/*.wsize =*/ cplan->work_size,
|
|
2875
2932
|
/*.wdata =*/ cplan->work_data,
|
|
2876
2933
|
/*.threadpool=*/ tp,
|
|
2877
2934
|
};
|
|
2878
2935
|
|
|
2936
|
+
GGML_PRINT_DEBUG("thread #%d compute-start cplan %p last-graph %d \n", state->ith, cplan, state->last_graph);
|
|
2937
|
+
|
|
2879
2938
|
for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
|
|
2880
2939
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
|
2881
2940
|
|
|
2941
|
+
if (ggml_op_is_empty(node->op)) {
|
|
2942
|
+
// skip NOPs
|
|
2943
|
+
continue;
|
|
2944
|
+
}
|
|
2945
|
+
|
|
2882
2946
|
ggml_compute_forward(¶ms, node);
|
|
2883
2947
|
|
|
2884
2948
|
if (state->ith == 0 && cplan->abort_callback &&
|
|
@@ -2892,6 +2956,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
2892
2956
|
}
|
|
2893
2957
|
}
|
|
2894
2958
|
|
|
2959
|
+
GGML_PRINT_DEBUG("thread #%d compute-done cplan %p last-graph %d \n", state->ith, cplan, state->last_graph);
|
|
2960
|
+
|
|
2895
2961
|
ggml_barrier(state->threadpool);
|
|
2896
2962
|
|
|
2897
2963
|
return 0;
|
|
@@ -2899,27 +2965,23 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
2899
2965
|
|
|
2900
2966
|
#ifndef GGML_USE_OPENMP
|
|
2901
2967
|
|
|
2902
|
-
// check if thread is active
|
|
2903
|
-
static inline bool ggml_graph_compute_thread_active(struct ggml_compute_state * state) {
|
|
2904
|
-
struct ggml_threadpool * threadpool = state->threadpool;
|
|
2905
|
-
int n_threads = atomic_load_explicit(&threadpool->n_threads_cur, memory_order_relaxed);
|
|
2906
|
-
return (state->ith < n_threads);
|
|
2907
|
-
}
|
|
2908
|
-
|
|
2909
2968
|
// check if thread is ready to proceed (exit from polling or sleeping)
|
|
2969
|
+
// returns true if loops should exit, sets state->pending to indicate new work
|
|
2910
2970
|
static inline bool ggml_graph_compute_thread_ready(struct ggml_compute_state * state) {
|
|
2911
2971
|
struct ggml_threadpool * threadpool = state->threadpool;
|
|
2912
2972
|
|
|
2913
2973
|
if (state->pending || threadpool->stop || threadpool->pause) { return true; }
|
|
2914
2974
|
|
|
2915
2975
|
// check for new graph/work
|
|
2916
|
-
int
|
|
2917
|
-
|
|
2918
|
-
|
|
2919
|
-
state->
|
|
2976
|
+
int n_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed);
|
|
2977
|
+
int n_threads = n_graph & GGML_THREADPOOL_N_THREADS_MASK;
|
|
2978
|
+
if (n_graph != state->last_graph) {
|
|
2979
|
+
state->pending = (state->ith < n_threads);
|
|
2980
|
+
state->last_graph = n_graph;
|
|
2981
|
+
return true;
|
|
2920
2982
|
}
|
|
2921
2983
|
|
|
2922
|
-
return
|
|
2984
|
+
return false;
|
|
2923
2985
|
}
|
|
2924
2986
|
|
|
2925
2987
|
// sync thread state after polling
|
|
@@ -2936,11 +2998,6 @@ static inline void ggml_graph_compute_thread_sync(struct ggml_compute_state * st
|
|
|
2936
2998
|
static inline bool ggml_graph_compute_poll_for_work(struct ggml_compute_state * state) {
|
|
2937
2999
|
struct ggml_threadpool * threadpool = state->threadpool;
|
|
2938
3000
|
|
|
2939
|
-
// Skip polling for unused threads
|
|
2940
|
-
if (!ggml_graph_compute_thread_active(state)) {
|
|
2941
|
-
return state->pending;
|
|
2942
|
-
}
|
|
2943
|
-
|
|
2944
3001
|
// This seems to make 0 ... 100 a decent range for polling level across modern processors.
|
|
2945
3002
|
// Perhaps, we can adjust it dynamically based on load and things.
|
|
2946
3003
|
const uint64_t n_rounds = 1024UL * 128 * threadpool->poll;
|
|
@@ -3002,7 +3059,6 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
|
|
|
3002
3059
|
ggml_graph_compute_check_for_work(state);
|
|
3003
3060
|
if (state->pending) {
|
|
3004
3061
|
state->pending = false;
|
|
3005
|
-
|
|
3006
3062
|
ggml_graph_compute_thread(state);
|
|
3007
3063
|
}
|
|
3008
3064
|
}
|
|
@@ -3017,14 +3073,15 @@ static void ggml_graph_compute_kickoff(struct ggml_threadpool * threadpool, int
|
|
|
3017
3073
|
|
|
3018
3074
|
ggml_mutex_lock(&threadpool->mutex);
|
|
3019
3075
|
|
|
3020
|
-
|
|
3076
|
+
// Update the number of active threads and the graph count
|
|
3077
|
+
int n_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed) >> GGML_THREADPOOL_N_THREADS_BITS;
|
|
3078
|
+
n_graph = ((n_graph + 1) << GGML_THREADPOOL_N_THREADS_BITS) | (n_threads & GGML_THREADPOOL_N_THREADS_MASK);
|
|
3021
3079
|
|
|
3022
|
-
|
|
3023
|
-
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
|
|
3080
|
+
GGML_PRINT_DEBUG("compute-kickoff: n_threads %d n_graph %d\n", n_threads, n_graph);
|
|
3024
3081
|
|
|
3025
3082
|
// Indicate the graph is ready to be processed
|
|
3026
3083
|
// We need the full seq-cst fence here because of the polling threads (used in thread_sync)
|
|
3027
|
-
|
|
3084
|
+
atomic_store_explicit(&threadpool->n_graph, n_graph, memory_order_seq_cst);
|
|
3028
3085
|
|
|
3029
3086
|
if (threadpool->pause) {
|
|
3030
3087
|
// Update main thread prio and affinity to match the threadpool settings
|
|
@@ -3062,8 +3119,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
|
|
|
3062
3119
|
threadpool->pause = tpp->paused;
|
|
3063
3120
|
threadpool->abort = -1;
|
|
3064
3121
|
threadpool->workers = NULL;
|
|
3065
|
-
threadpool->
|
|
3066
|
-
threadpool->n_threads_cur = tpp->n_threads;
|
|
3122
|
+
threadpool->n_threads = tpp->n_threads;
|
|
3067
3123
|
threadpool->poll = tpp->poll;
|
|
3068
3124
|
threadpool->prio = tpp->prio;
|
|
3069
3125
|
threadpool->ec = GGML_STATUS_SUCCESS;
|
|
@@ -3158,7 +3214,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|
|
3158
3214
|
{
|
|
3159
3215
|
// update the number of threads from the actual number of threads that we got from OpenMP
|
|
3160
3216
|
n_threads = omp_get_num_threads();
|
|
3161
|
-
atomic_store_explicit(&threadpool->
|
|
3217
|
+
atomic_store_explicit(&threadpool->n_graph, n_threads, memory_order_relaxed);
|
|
3162
3218
|
}
|
|
3163
3219
|
|
|
3164
3220
|
// Apply thread CPU mask and priority
|
|
@@ -3171,13 +3227,13 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|
|
3171
3227
|
ggml_graph_compute_thread(&threadpool->workers[ith]);
|
|
3172
3228
|
}
|
|
3173
3229
|
} else {
|
|
3174
|
-
atomic_store_explicit(&threadpool->
|
|
3230
|
+
atomic_store_explicit(&threadpool->n_graph, 1, memory_order_relaxed);
|
|
3175
3231
|
ggml_graph_compute_thread(&threadpool->workers[0]);
|
|
3176
3232
|
}
|
|
3177
3233
|
#else
|
|
3178
|
-
if (n_threads > threadpool->
|
|
3179
|
-
GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->
|
|
3180
|
-
n_threads = threadpool->
|
|
3234
|
+
if (n_threads > threadpool->n_threads) {
|
|
3235
|
+
GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->n_threads);
|
|
3236
|
+
n_threads = threadpool->n_threads;
|
|
3181
3237
|
}
|
|
3182
3238
|
|
|
3183
3239
|
// Kick all threads to start the new graph
|
|
@@ -3264,6 +3320,33 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
|
|
|
3264
3320
|
__m128 y_vec = _mm_cvtph_ps(x_vec);
|
|
3265
3321
|
_mm_storeu_ps(y + i, y_vec);
|
|
3266
3322
|
}
|
|
3323
|
+
|
|
3324
|
+
#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfhmin)
|
|
3325
|
+
// calculate step size
|
|
3326
|
+
const int epr = __riscv_vsetvlmax_e16m2();
|
|
3327
|
+
const int step = epr * 2;
|
|
3328
|
+
const int np = (n & ~(step - 1));
|
|
3329
|
+
|
|
3330
|
+
// unroll by 2
|
|
3331
|
+
for (; i < np; i += step) {
|
|
3332
|
+
vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, epr);
|
|
3333
|
+
vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, epr);
|
|
3334
|
+
__riscv_vse32_v_f32m4(y + i, ay0, epr);
|
|
3335
|
+
|
|
3336
|
+
vfloat16m2_t ax1 = __riscv_vle16_v_f16m2((const _Float16*)x + i + epr, epr);
|
|
3337
|
+
vfloat32m4_t ay1 = __riscv_vfwcvt_f_f_v_f32m4(ax1, epr);
|
|
3338
|
+
__riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
|
|
3339
|
+
}
|
|
3340
|
+
|
|
3341
|
+
// leftovers
|
|
3342
|
+
int vl;
|
|
3343
|
+
for (i = np; i < n; i += vl) {
|
|
3344
|
+
vl = __riscv_vsetvl_e16m2(n - i);
|
|
3345
|
+
vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, vl);
|
|
3346
|
+
vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, vl);
|
|
3347
|
+
__riscv_vse32_v_f32m4(y + i, ay0, vl);
|
|
3348
|
+
}
|
|
3349
|
+
|
|
3267
3350
|
#endif
|
|
3268
3351
|
|
|
3269
3352
|
for (; i < n; ++i) {
|
|
@@ -3308,6 +3391,31 @@ void ggml_cpu_bf16_to_fp32(const ggml_bf16_t * x, float * y, int64_t n) {
|
|
|
3308
3391
|
(const __m128i *)(x + i))),
|
|
3309
3392
|
16)));
|
|
3310
3393
|
}
|
|
3394
|
+
#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfbfmin)
|
|
3395
|
+
// calculate step size
|
|
3396
|
+
const int epr = __riscv_vsetvlmax_e16m2();
|
|
3397
|
+
const int step = epr * 2;
|
|
3398
|
+
const int np = (n & ~(step - 1));
|
|
3399
|
+
|
|
3400
|
+
// unroll by 2
|
|
3401
|
+
for (; i < np; i += step) {
|
|
3402
|
+
vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16*)x + i, epr);
|
|
3403
|
+
vfloat32m4_t ay0 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax0, epr);
|
|
3404
|
+
__riscv_vse32_v_f32m4(y + i, ay0, epr);
|
|
3405
|
+
|
|
3406
|
+
vbfloat16m2_t ax1 = __riscv_vle16_v_bf16m2((const __bf16*)x + i + epr, epr);
|
|
3407
|
+
vfloat32m4_t ay1 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax1, epr);
|
|
3408
|
+
__riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
|
|
3409
|
+
}
|
|
3410
|
+
|
|
3411
|
+
// leftovers
|
|
3412
|
+
int vl;
|
|
3413
|
+
for (i = np; i < n; i += vl) {
|
|
3414
|
+
vl = __riscv_vsetvl_e16m2(n - i);
|
|
3415
|
+
vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16*)x + i, vl);
|
|
3416
|
+
vfloat32m4_t ay0 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax0, vl);
|
|
3417
|
+
__riscv_vse32_v_f32m4(y + i, ay0, vl);
|
|
3418
|
+
}
|
|
3311
3419
|
#endif
|
|
3312
3420
|
for (; i < n; i++) {
|
|
3313
3421
|
y[i] = GGML_BF16_TO_FP32(x[i]);
|
|
@@ -3410,6 +3518,14 @@ int ggml_cpu_has_riscv_v(void) {
|
|
|
3410
3518
|
#endif
|
|
3411
3519
|
}
|
|
3412
3520
|
|
|
3521
|
+
int ggml_cpu_get_rvv_vlen(void) {
|
|
3522
|
+
#if defined(__riscv) && defined(__riscv_v_intrinsic)
|
|
3523
|
+
return ggml_riscv_arch_features.rvv_vlen;
|
|
3524
|
+
#else
|
|
3525
|
+
return 0;
|
|
3526
|
+
#endif
|
|
3527
|
+
}
|
|
3528
|
+
|
|
3413
3529
|
int ggml_cpu_has_f16c(void) {
|
|
3414
3530
|
#if defined(__F16C__)
|
|
3415
3531
|
return 1;
|
|
@@ -3557,13 +3673,17 @@ void ggml_cpu_init(void) {
|
|
|
3557
3673
|
#ifdef GGML_USE_OPENMP
|
|
3558
3674
|
//if (!getenv("OMP_WAIT_POLICY")) {
|
|
3559
3675
|
// // set the wait policy to active, so that OpenMP threads don't sleep
|
|
3560
|
-
//
|
|
3676
|
+
// setenv("OMP_WAIT_POLICY", "active", 0)
|
|
3561
3677
|
//}
|
|
3562
3678
|
|
|
3563
3679
|
if (!getenv("KMP_BLOCKTIME")) {
|
|
3564
3680
|
// set the time to wait before sleeping a thread
|
|
3565
3681
|
// this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases
|
|
3566
|
-
|
|
3682
|
+
#ifdef _WIN32
|
|
3683
|
+
_putenv_s("KMP_BLOCKTIME", "200"); // 200ms
|
|
3684
|
+
#else
|
|
3685
|
+
setenv("KMP_BLOCKTIME", "200", 0); // 200ms
|
|
3686
|
+
#endif
|
|
3567
3687
|
}
|
|
3568
3688
|
#endif
|
|
3569
3689
|
}
|
|
@@ -3572,6 +3692,10 @@ void ggml_cpu_init(void) {
|
|
|
3572
3692
|
ggml_init_arm_arch_features();
|
|
3573
3693
|
#endif
|
|
3574
3694
|
|
|
3695
|
+
#if defined(__riscv)
|
|
3696
|
+
ggml_init_riscv_arch_features();
|
|
3697
|
+
#endif
|
|
3698
|
+
|
|
3575
3699
|
is_first_call = false;
|
|
3576
3700
|
}
|
|
3577
3701
|
|
|
@@ -583,6 +583,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
583
583
|
if (ggml_cpu_has_riscv_v()) {
|
|
584
584
|
features.push_back({ "RISCV_V", "1" });
|
|
585
585
|
}
|
|
586
|
+
if (ggml_cpu_get_rvv_vlen() > 0) {
|
|
587
|
+
static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
|
|
588
|
+
features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
|
|
589
|
+
}
|
|
586
590
|
if (ggml_cpu_has_vsx()) {
|
|
587
591
|
features.push_back({ "VSX", "1" });
|
|
588
592
|
}
|