whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
|
@@ -204,6 +204,10 @@
|
|
|
204
204
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
205
205
|
#endif
|
|
206
206
|
|
|
207
|
+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
|
|
208
|
+
# define _WIN32_WINNT 0x0A00
|
|
209
|
+
#endif
|
|
210
|
+
|
|
207
211
|
#include <stdbool.h>
|
|
208
212
|
#include <stddef.h>
|
|
209
213
|
#include <stdint.h>
|
|
@@ -230,6 +234,11 @@
|
|
|
230
234
|
|
|
231
235
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
|
232
236
|
#define GGML_MEM_ALIGN 4
|
|
237
|
+
#elif defined(__EMSCRIPTEN__)
|
|
238
|
+
// emscripten uses max_align_t == 8, so we need GGML_MEM_ALIGN == 8 for 64-bit wasm.
|
|
239
|
+
// (for 32-bit wasm, the first conditional is true and GGML_MEM_ALIGN stays 4.)
|
|
240
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/18628
|
|
241
|
+
#define GGML_MEM_ALIGN 8
|
|
233
242
|
#else
|
|
234
243
|
#define GGML_MEM_ALIGN 16
|
|
235
244
|
#endif
|
|
@@ -237,9 +246,12 @@
|
|
|
237
246
|
#define GGML_EXIT_SUCCESS 0
|
|
238
247
|
#define GGML_EXIT_ABORTED 1
|
|
239
248
|
|
|
249
|
+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
|
|
250
|
+
#define GGML_ROPE_TYPE_NORMAL 0
|
|
240
251
|
#define GGML_ROPE_TYPE_NEOX 2
|
|
241
252
|
#define GGML_ROPE_TYPE_MROPE 8
|
|
242
253
|
#define GGML_ROPE_TYPE_VISION 24
|
|
254
|
+
#define GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
|
|
243
255
|
|
|
244
256
|
#define GGML_MROPE_SECTIONS 4
|
|
245
257
|
|
|
@@ -472,6 +484,7 @@ extern "C" {
|
|
|
472
484
|
GGML_OP_COS,
|
|
473
485
|
GGML_OP_SUM,
|
|
474
486
|
GGML_OP_SUM_ROWS,
|
|
487
|
+
GGML_OP_CUMSUM,
|
|
475
488
|
GGML_OP_MEAN,
|
|
476
489
|
GGML_OP_ARGMAX,
|
|
477
490
|
GGML_OP_COUNT_EQUAL,
|
|
@@ -526,7 +539,10 @@ extern "C" {
|
|
|
526
539
|
GGML_OP_ARANGE,
|
|
527
540
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
528
541
|
GGML_OP_ARGSORT,
|
|
542
|
+
GGML_OP_TOP_K,
|
|
529
543
|
GGML_OP_LEAKY_RELU,
|
|
544
|
+
GGML_OP_TRI,
|
|
545
|
+
GGML_OP_FILL,
|
|
530
546
|
|
|
531
547
|
GGML_OP_FLASH_ATTN_EXT,
|
|
532
548
|
GGML_OP_FLASH_ATTN_BACK,
|
|
@@ -539,6 +555,7 @@ extern "C" {
|
|
|
539
555
|
GGML_OP_RWKV_WKV6,
|
|
540
556
|
GGML_OP_GATED_LINEAR_ATTN,
|
|
541
557
|
GGML_OP_RWKV_WKV7,
|
|
558
|
+
GGML_OP_SOLVE_TRI,
|
|
542
559
|
|
|
543
560
|
GGML_OP_UNARY,
|
|
544
561
|
|
|
@@ -573,7 +590,14 @@ extern "C" {
|
|
|
573
590
|
GGML_UNARY_OP_HARDSWISH,
|
|
574
591
|
GGML_UNARY_OP_HARDSIGMOID,
|
|
575
592
|
GGML_UNARY_OP_EXP,
|
|
593
|
+
GGML_UNARY_OP_EXPM1,
|
|
594
|
+
GGML_UNARY_OP_SOFTPLUS,
|
|
576
595
|
GGML_UNARY_OP_GELU_ERF,
|
|
596
|
+
GGML_UNARY_OP_XIELU,
|
|
597
|
+
GGML_UNARY_OP_FLOOR,
|
|
598
|
+
GGML_UNARY_OP_CEIL,
|
|
599
|
+
GGML_UNARY_OP_ROUND,
|
|
600
|
+
GGML_UNARY_OP_TRUNC,
|
|
577
601
|
|
|
578
602
|
GGML_UNARY_OP_COUNT,
|
|
579
603
|
};
|
|
@@ -612,6 +636,13 @@ extern "C" {
|
|
|
612
636
|
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
613
637
|
};
|
|
614
638
|
|
|
639
|
+
enum ggml_tri_type {
|
|
640
|
+
GGML_TRI_TYPE_UPPER_DIAG = 0,
|
|
641
|
+
GGML_TRI_TYPE_UPPER = 1,
|
|
642
|
+
GGML_TRI_TYPE_LOWER_DIAG = 2,
|
|
643
|
+
GGML_TRI_TYPE_LOWER = 3
|
|
644
|
+
};
|
|
645
|
+
|
|
615
646
|
struct ggml_init_params {
|
|
616
647
|
// memory pool
|
|
617
648
|
size_t mem_size; // bytes
|
|
@@ -949,6 +980,22 @@ extern "C" {
|
|
|
949
980
|
struct ggml_context * ctx,
|
|
950
981
|
struct ggml_tensor * a);
|
|
951
982
|
|
|
983
|
+
GGML_API struct ggml_tensor * ggml_expm1(
|
|
984
|
+
struct ggml_context * ctx,
|
|
985
|
+
struct ggml_tensor * a);
|
|
986
|
+
|
|
987
|
+
GGML_API struct ggml_tensor * ggml_expm1_inplace(
|
|
988
|
+
struct ggml_context * ctx,
|
|
989
|
+
struct ggml_tensor * a);
|
|
990
|
+
|
|
991
|
+
GGML_API struct ggml_tensor * ggml_softplus(
|
|
992
|
+
struct ggml_context * ctx,
|
|
993
|
+
struct ggml_tensor * a);
|
|
994
|
+
|
|
995
|
+
GGML_API struct ggml_tensor * ggml_softplus_inplace(
|
|
996
|
+
struct ggml_context * ctx,
|
|
997
|
+
struct ggml_tensor * a);
|
|
998
|
+
|
|
952
999
|
GGML_API struct ggml_tensor * ggml_sin(
|
|
953
1000
|
struct ggml_context * ctx,
|
|
954
1001
|
struct ggml_tensor * a);
|
|
@@ -975,6 +1022,10 @@ extern "C" {
|
|
|
975
1022
|
struct ggml_context * ctx,
|
|
976
1023
|
struct ggml_tensor * a);
|
|
977
1024
|
|
|
1025
|
+
GGML_API struct ggml_tensor * ggml_cumsum(
|
|
1026
|
+
struct ggml_context * ctx,
|
|
1027
|
+
struct ggml_tensor * a);
|
|
1028
|
+
|
|
978
1029
|
// mean along rows
|
|
979
1030
|
GGML_API struct ggml_tensor * ggml_mean(
|
|
980
1031
|
struct ggml_context * ctx,
|
|
@@ -1148,6 +1199,58 @@ extern "C" {
|
|
|
1148
1199
|
struct ggml_context * ctx,
|
|
1149
1200
|
struct ggml_tensor * a);
|
|
1150
1201
|
|
|
1202
|
+
GGML_API struct ggml_tensor * ggml_floor(
|
|
1203
|
+
struct ggml_context * ctx,
|
|
1204
|
+
struct ggml_tensor * a);
|
|
1205
|
+
|
|
1206
|
+
GGML_API struct ggml_tensor * ggml_floor_inplace(
|
|
1207
|
+
struct ggml_context * ctx,
|
|
1208
|
+
struct ggml_tensor * a);
|
|
1209
|
+
|
|
1210
|
+
GGML_API struct ggml_tensor * ggml_ceil(
|
|
1211
|
+
struct ggml_context * ctx,
|
|
1212
|
+
struct ggml_tensor * a);
|
|
1213
|
+
|
|
1214
|
+
GGML_API struct ggml_tensor * ggml_ceil_inplace(
|
|
1215
|
+
struct ggml_context * ctx,
|
|
1216
|
+
struct ggml_tensor * a);
|
|
1217
|
+
|
|
1218
|
+
GGML_API struct ggml_tensor * ggml_round(
|
|
1219
|
+
struct ggml_context * ctx,
|
|
1220
|
+
struct ggml_tensor * a);
|
|
1221
|
+
|
|
1222
|
+
GGML_API struct ggml_tensor * ggml_round_inplace(
|
|
1223
|
+
struct ggml_context * ctx,
|
|
1224
|
+
struct ggml_tensor * a);
|
|
1225
|
+
|
|
1226
|
+
/**
|
|
1227
|
+
* Truncates the fractional part of each element in the tensor (towards zero).
|
|
1228
|
+
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
|
|
1229
|
+
* Similar to std::trunc in C/C++.
|
|
1230
|
+
*/
|
|
1231
|
+
|
|
1232
|
+
GGML_API struct ggml_tensor * ggml_trunc(
|
|
1233
|
+
struct ggml_context * ctx,
|
|
1234
|
+
struct ggml_tensor * a);
|
|
1235
|
+
|
|
1236
|
+
GGML_API struct ggml_tensor * ggml_trunc_inplace(
|
|
1237
|
+
struct ggml_context * ctx,
|
|
1238
|
+
struct ggml_tensor * a);
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
// xIELU activation function
|
|
1243
|
+
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
|
|
1244
|
+
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
|
|
1245
|
+
// that constrain the positive and negative source alpha values respectively
|
|
1246
|
+
GGML_API struct ggml_tensor * ggml_xielu(
|
|
1247
|
+
struct ggml_context * ctx,
|
|
1248
|
+
struct ggml_tensor * a,
|
|
1249
|
+
float alpha_n,
|
|
1250
|
+
float alpha_p,
|
|
1251
|
+
float beta,
|
|
1252
|
+
float eps);
|
|
1253
|
+
|
|
1151
1254
|
// gated linear unit ops
|
|
1152
1255
|
// A: n columns, r rows,
|
|
1153
1256
|
// result is n / 2 columns, r rows,
|
|
@@ -1615,6 +1718,13 @@ extern "C" {
|
|
|
1615
1718
|
float scale,
|
|
1616
1719
|
float max_bias);
|
|
1617
1720
|
|
|
1721
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_inplace(
|
|
1722
|
+
struct ggml_context * ctx,
|
|
1723
|
+
struct ggml_tensor * a,
|
|
1724
|
+
struct ggml_tensor * mask,
|
|
1725
|
+
float scale,
|
|
1726
|
+
float max_bias);
|
|
1727
|
+
|
|
1618
1728
|
GGML_API void ggml_soft_max_add_sinks(
|
|
1619
1729
|
struct ggml_tensor * a,
|
|
1620
1730
|
struct ggml_tensor * sinks);
|
|
@@ -2041,12 +2151,14 @@ extern "C" {
|
|
|
2041
2151
|
enum ggml_scale_mode {
|
|
2042
2152
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
2043
2153
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
2154
|
+
GGML_SCALE_MODE_BICUBIC = 2,
|
|
2044
2155
|
|
|
2045
2156
|
GGML_SCALE_MODE_COUNT
|
|
2046
2157
|
};
|
|
2047
2158
|
|
|
2048
2159
|
enum ggml_scale_flag {
|
|
2049
|
-
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
2160
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8),
|
|
2161
|
+
GGML_SCALE_FLAG_ANTIALIAS = (1 << 9),
|
|
2050
2162
|
};
|
|
2051
2163
|
|
|
2052
2164
|
// interpolate
|
|
@@ -2089,6 +2201,15 @@ extern "C" {
|
|
|
2089
2201
|
int p2,
|
|
2090
2202
|
int p3);
|
|
2091
2203
|
|
|
2204
|
+
// pad each dimension with values on the other side of the torus (looping around)
|
|
2205
|
+
GGML_API struct ggml_tensor * ggml_pad_circular(
|
|
2206
|
+
struct ggml_context * ctx,
|
|
2207
|
+
struct ggml_tensor * a,
|
|
2208
|
+
int p0,
|
|
2209
|
+
int p1,
|
|
2210
|
+
int p2,
|
|
2211
|
+
int p3);
|
|
2212
|
+
|
|
2092
2213
|
GGML_API struct ggml_tensor * ggml_pad_ext(
|
|
2093
2214
|
struct ggml_context * ctx,
|
|
2094
2215
|
struct ggml_tensor * a,
|
|
@@ -2102,6 +2223,19 @@ extern "C" {
|
|
|
2102
2223
|
int rp3
|
|
2103
2224
|
);
|
|
2104
2225
|
|
|
2226
|
+
// pad each dimension with values on the other side of the torus (looping around)
|
|
2227
|
+
GGML_API struct ggml_tensor * ggml_pad_ext_circular(
|
|
2228
|
+
struct ggml_context * ctx,
|
|
2229
|
+
struct ggml_tensor * a,
|
|
2230
|
+
int lp0,
|
|
2231
|
+
int rp0,
|
|
2232
|
+
int lp1,
|
|
2233
|
+
int rp1,
|
|
2234
|
+
int lp2,
|
|
2235
|
+
int rp2,
|
|
2236
|
+
int lp3,
|
|
2237
|
+
int rp3);
|
|
2238
|
+
|
|
2105
2239
|
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
2106
2240
|
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
|
2107
2241
|
struct ggml_context * ctx,
|
|
@@ -2119,6 +2253,23 @@ extern "C" {
|
|
|
2119
2253
|
int shift2,
|
|
2120
2254
|
int shift3);
|
|
2121
2255
|
|
|
2256
|
+
// Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
|
|
2257
|
+
// zeroes everywhere outside the masked area
|
|
2258
|
+
GGML_API struct ggml_tensor * ggml_tri(
|
|
2259
|
+
struct ggml_context * ctx,
|
|
2260
|
+
struct ggml_tensor * a,
|
|
2261
|
+
enum ggml_tri_type type);
|
|
2262
|
+
|
|
2263
|
+
// Fill tensor a with constant c
|
|
2264
|
+
GGML_API struct ggml_tensor * ggml_fill(
|
|
2265
|
+
struct ggml_context * ctx,
|
|
2266
|
+
struct ggml_tensor * a,
|
|
2267
|
+
float c);
|
|
2268
|
+
|
|
2269
|
+
GGML_API struct ggml_tensor * ggml_fill_inplace(
|
|
2270
|
+
struct ggml_context * ctx,
|
|
2271
|
+
struct ggml_tensor * a,
|
|
2272
|
+
float c);
|
|
2122
2273
|
|
|
2123
2274
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
2124
2275
|
// timesteps: [N,]
|
|
@@ -2140,25 +2291,30 @@ extern "C" {
|
|
|
2140
2291
|
struct ggml_tensor * a,
|
|
2141
2292
|
enum ggml_sort_order order);
|
|
2142
2293
|
|
|
2143
|
-
|
|
2294
|
+
// similar to ggml_top_k but implemented as `argsort` + `view`
|
|
2295
|
+
GGML_API struct ggml_tensor * ggml_argsort_top_k(
|
|
2144
2296
|
struct ggml_context * ctx,
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
float step);
|
|
2297
|
+
struct ggml_tensor * a,
|
|
2298
|
+
int k);
|
|
2148
2299
|
|
|
2149
2300
|
// top k elements per row
|
|
2301
|
+
// note: the resulting top k indices are in no particular order
|
|
2150
2302
|
GGML_API struct ggml_tensor * ggml_top_k(
|
|
2151
2303
|
struct ggml_context * ctx,
|
|
2152
2304
|
struct ggml_tensor * a,
|
|
2153
2305
|
int k);
|
|
2154
2306
|
|
|
2155
|
-
|
|
2307
|
+
GGML_API struct ggml_tensor * ggml_arange(
|
|
2308
|
+
struct ggml_context * ctx,
|
|
2309
|
+
float start,
|
|
2310
|
+
float stop,
|
|
2311
|
+
float step);
|
|
2156
2312
|
|
|
2157
|
-
// q: [n_embd_k, n_batch,
|
|
2158
|
-
// k: [n_embd_k, n_kv,
|
|
2159
|
-
// v: [n_embd_v, n_kv,
|
|
2160
|
-
// mask: [n_kv,
|
|
2161
|
-
// res: [n_embd_v, n_head,
|
|
2313
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2314
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2315
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2316
|
+
// mask: [n_kv, n_batch, ne32, ne33]
|
|
2317
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2162
2318
|
//
|
|
2163
2319
|
// broadcast:
|
|
2164
2320
|
// n_head % n_head_kv == 0
|
|
@@ -2288,6 +2444,27 @@ extern "C" {
|
|
|
2288
2444
|
struct ggml_tensor * b,
|
|
2289
2445
|
struct ggml_tensor * state);
|
|
2290
2446
|
|
|
2447
|
+
/* Solves a specific equation of the form Ax=B, where A is a triangular matrix
|
|
2448
|
+
* without zeroes on the diagonal (i.e. invertible).
|
|
2449
|
+
* B can have any number of columns, but must have the same number of rows as A
|
|
2450
|
+
* If A is [n, n] and B is [n, m], then the result will be [n, m] as well
|
|
2451
|
+
* Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
|
|
2452
|
+
* where n > 100 sparingly, pre-chunk if necessary.
|
|
2453
|
+
*
|
|
2454
|
+
* If left = false, solves xA=B instead
|
|
2455
|
+
* If lower = false, assumes upper triangular instead
|
|
2456
|
+
* If uni = true, assumes diagonal of A to be all ones (will override actual values)
|
|
2457
|
+
*
|
|
2458
|
+
* TODO: currently only lower, right, non-unitriangular variant is implemented
|
|
2459
|
+
*/
|
|
2460
|
+
GGML_API struct ggml_tensor * ggml_solve_tri(
|
|
2461
|
+
struct ggml_context * ctx,
|
|
2462
|
+
struct ggml_tensor * a,
|
|
2463
|
+
struct ggml_tensor * b,
|
|
2464
|
+
bool left,
|
|
2465
|
+
bool lower,
|
|
2466
|
+
bool uni);
|
|
2467
|
+
|
|
2291
2468
|
// custom operators
|
|
2292
2469
|
|
|
2293
2470
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
|
@@ -2443,7 +2620,8 @@ extern "C" {
|
|
|
2443
2620
|
|
|
2444
2621
|
// Set callback for all future logging events.
|
|
2445
2622
|
// If this is not called, or NULL is supplied, everything is output on stderr.
|
|
2446
|
-
GGML_API void
|
|
2623
|
+
GGML_API void ggml_log_get(ggml_log_callback * log_callback, void ** user_data);
|
|
2624
|
+
GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
|
|
2447
2625
|
|
|
2448
2626
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
2449
2627
|
|
|
@@ -127,10 +127,6 @@ if (NOT MSVC)
|
|
|
127
127
|
endif()
|
|
128
128
|
endif()
|
|
129
129
|
|
|
130
|
-
if (MINGW)
|
|
131
|
-
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
|
132
|
-
endif()
|
|
133
|
-
|
|
134
130
|
#
|
|
135
131
|
# POSIX conformance
|
|
136
132
|
#
|
|
@@ -145,6 +141,9 @@ endif()
|
|
|
145
141
|
# which was introduced in POSIX.1-2008, forcing us to go higher
|
|
146
142
|
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
|
147
143
|
add_compile_definitions(_XOPEN_SOURCE=700)
|
|
144
|
+
elseif (CMAKE_SYSTEM_NAME MATCHES "AIX")
|
|
145
|
+
# Don't define _XOPEN_SOURCE. We need _ALL_SOURCE, which is the default,
|
|
146
|
+
# in order to define _SC_PHYS_PAGES.
|
|
148
147
|
else()
|
|
149
148
|
add_compile_definitions(_XOPEN_SOURCE=600)
|
|
150
149
|
endif()
|
|
@@ -208,15 +207,29 @@ add_library(ggml-base
|
|
|
208
207
|
ggml-quants.h
|
|
209
208
|
gguf.cpp)
|
|
210
209
|
|
|
210
|
+
set_target_properties(ggml-base PROPERTIES
|
|
211
|
+
VERSION ${GGML_VERSION}
|
|
212
|
+
SOVERSION ${GGML_VERSION_MAJOR}
|
|
213
|
+
)
|
|
214
|
+
|
|
211
215
|
target_include_directories(ggml-base PRIVATE .)
|
|
212
216
|
if (GGML_BACKEND_DL)
|
|
213
217
|
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
|
|
214
218
|
endif()
|
|
215
219
|
|
|
220
|
+
if (GGML_SCHED_NO_REALLOC)
|
|
221
|
+
target_compile_definitions(ggml-base PUBLIC GGML_SCHED_NO_REALLOC)
|
|
222
|
+
endif()
|
|
223
|
+
|
|
216
224
|
add_library(ggml
|
|
217
225
|
ggml-backend-reg.cpp)
|
|
218
226
|
add_library(ggml::ggml ALIAS ggml)
|
|
219
227
|
|
|
228
|
+
set_target_properties(ggml PROPERTIES
|
|
229
|
+
VERSION ${GGML_VERSION}
|
|
230
|
+
SOVERSION ${GGML_VERSION_MAJOR}
|
|
231
|
+
)
|
|
232
|
+
|
|
220
233
|
if (GGML_BACKEND_DIR)
|
|
221
234
|
if (NOT GGML_BACKEND_DL)
|
|
222
235
|
message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
|
|
@@ -256,6 +269,15 @@ function(ggml_add_backend_library backend)
|
|
|
256
269
|
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
|
|
257
270
|
endif()
|
|
258
271
|
|
|
272
|
+
# Set versioning properties for all backend libraries
|
|
273
|
+
# Building a MODULE library with a version is not supported on macOS (https://gitlab.kitware.com/cmake/cmake/-/issues/20782)
|
|
274
|
+
if (NOT (APPLE AND GGML_BACKEND_DL))
|
|
275
|
+
set_target_properties(${backend} PROPERTIES
|
|
276
|
+
VERSION ${GGML_VERSION}
|
|
277
|
+
SOVERSION ${GGML_VERSION_MAJOR}
|
|
278
|
+
)
|
|
279
|
+
endif()
|
|
280
|
+
|
|
259
281
|
if(NOT GGML_AVAILABLE_BACKENDS)
|
|
260
282
|
set(GGML_AVAILABLE_BACKENDS "${backend}"
|
|
261
283
|
CACHE INTERNAL "List of backends for cmake package")
|
|
@@ -301,6 +323,22 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
301
323
|
set(GGML_INTERNAL_${feat} ON)
|
|
302
324
|
endforeach()
|
|
303
325
|
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
326
|
+
foreach (feat ${ARGN})
|
|
327
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
328
|
+
endforeach()
|
|
329
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
330
|
+
foreach (feat VXE2 NNPA)
|
|
331
|
+
set(GGML_INTERNAL_${feat} OFF)
|
|
332
|
+
endforeach()
|
|
333
|
+
|
|
334
|
+
foreach (feat ${ARGN})
|
|
335
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
336
|
+
endforeach()
|
|
337
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
338
|
+
foreach (feat RVV)
|
|
339
|
+
set(GGML_INTERNAL_${feat} OFF)
|
|
340
|
+
endforeach()
|
|
341
|
+
|
|
304
342
|
foreach (feat ${ARGN})
|
|
305
343
|
set(GGML_INTERNAL_${feat} ON)
|
|
306
344
|
endforeach()
|
|
@@ -319,15 +357,29 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
319
357
|
endif()
|
|
320
358
|
if (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
321
359
|
ggml_add_cpu_backend_variant(x64)
|
|
322
|
-
ggml_add_cpu_backend_variant(sse42
|
|
323
|
-
ggml_add_cpu_backend_variant(sandybridge
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
360
|
+
ggml_add_cpu_backend_variant(sse42 SSE42)
|
|
361
|
+
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
|
|
362
|
+
if (NOT MSVC)
|
|
363
|
+
# __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
|
364
|
+
ggml_add_cpu_backend_variant(ivybridge SSE42 AVX F16C)
|
|
365
|
+
ggml_add_cpu_backend_variant(piledriver SSE42 AVX F16C FMA)
|
|
366
|
+
endif()
|
|
367
|
+
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C FMA AVX2 BMI2)
|
|
368
|
+
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C FMA AVX2 BMI2 AVX512)
|
|
369
|
+
ggml_add_cpu_backend_variant(cannonlake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI)
|
|
370
|
+
ggml_add_cpu_backend_variant(cascadelake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VNNI)
|
|
371
|
+
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI AVX512_VNNI)
|
|
372
|
+
if (NOT MSVC)
|
|
373
|
+
# MSVC 2022 doesn't support BF16 intrinsics without `/arch:AVX10.1` ?!
|
|
374
|
+
# https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=msvc-170
|
|
375
|
+
# https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170
|
|
376
|
+
ggml_add_cpu_backend_variant(cooperlake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VNNI AVX512_BF16)
|
|
377
|
+
ggml_add_cpu_backend_variant(zen4 SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16)
|
|
378
|
+
endif()
|
|
379
|
+
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C FMA AVX2 BMI2 AVX_VNNI)
|
|
328
380
|
if (NOT MSVC)
|
|
329
381
|
# MSVC doesn't support AMX
|
|
330
|
-
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2
|
|
382
|
+
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
331
383
|
endif()
|
|
332
384
|
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
|
|
333
385
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
@@ -348,6 +400,9 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
348
400
|
ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
|
|
349
401
|
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
|
|
350
402
|
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
|
|
403
|
+
ggml_add_cpu_backend_variant(android_armv9.0_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2)
|
|
404
|
+
ggml_add_cpu_backend_variant(android_armv9.2_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
|
|
405
|
+
ggml_add_cpu_backend_variant(android_armv9.2_2 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SVE2 SME)
|
|
351
406
|
elseif (APPLE)
|
|
352
407
|
ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
|
|
353
408
|
ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)
|
|
@@ -368,6 +423,20 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
368
423
|
else()
|
|
369
424
|
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
|
370
425
|
endif()
|
|
426
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
427
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
428
|
+
ggml_add_cpu_backend_variant(z15 Z15 VXE2)
|
|
429
|
+
ggml_add_cpu_backend_variant(z16 Z16 VXE2 NNPA)
|
|
430
|
+
else()
|
|
431
|
+
message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
|
|
432
|
+
endif()
|
|
433
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
434
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
435
|
+
ggml_add_cpu_backend_variant(riscv64_0)
|
|
436
|
+
ggml_add_cpu_backend_variant(riscv64_v RVV)
|
|
437
|
+
else()
|
|
438
|
+
message(FATAL_ERROR "Unsupported RISC-V target OS: ${CMAKE_SYSTEM_NAME}")
|
|
439
|
+
endif()
|
|
371
440
|
else()
|
|
372
441
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
|
373
442
|
endif()
|
|
@@ -387,6 +456,8 @@ ggml_add_backend(Vulkan)
|
|
|
387
456
|
ggml_add_backend(WebGPU)
|
|
388
457
|
ggml_add_backend(zDNN)
|
|
389
458
|
ggml_add_backend(OpenCL)
|
|
459
|
+
ggml_add_backend(Hexagon)
|
|
460
|
+
ggml_add_backend(ZenDNN)
|
|
390
461
|
|
|
391
462
|
foreach (target ggml-base ggml)
|
|
392
463
|
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|