whispercpp 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -43
- data/ext/extconf.rb +2 -2
- data/ext/ruby_whisper.c +14 -2
- data/ext/ruby_whisper.h +39 -0
- data/ext/ruby_whisper_context.c +22 -22
- data/ext/ruby_whisper_model.c +12 -12
- data/ext/ruby_whisper_params.c +47 -23
- data/ext/ruby_whisper_segment.c +84 -19
- data/ext/ruby_whisper_token.c +351 -0
- data/ext/ruby_whisper_transcribe.cpp +1 -1
- data/ext/ruby_whisper_vad_context.c +75 -0
- data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
- data/ext/ruby_whisper_vad_segment.c +139 -0
- data/ext/ruby_whisper_vad_segments.c +106 -0
- data/ext/sources/CMakeLists.txt +4 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
- data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
- data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
- data/ext/sources/examples/addon.node/vad-example.js +2 -2
- data/ext/sources/examples/cli/cli.cpp +121 -112
- data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
- data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
- data/ext/sources/examples/server/server.cpp +10 -11
- data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
- data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
- data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
- data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
- data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
- data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
- data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
- data/ext/sources/examples/talk-llama/llama-context.h +57 -9
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
- data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
- data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
- data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
- data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
- data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
- data/ext/sources/examples/talk-llama/llama-model.h +44 -3
- data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
- data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
- data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
- data/ext/sources/examples/talk-llama/llama.cpp +729 -2
- data/ext/sources/examples/talk-llama/llama.h +152 -14
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
- data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
- data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
- data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
- data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
- data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
- data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
- data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
- data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
- data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
- data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
- data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
- data/ext/sources/examples/talk-llama/models/models.h +569 -0
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
- data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
- data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
- data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
- data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
- data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
- data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
- data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
- data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
- data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
- data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
- data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
- data/ext/sources/ggml/CMakeLists.txt +82 -54
- data/ext/sources/ggml/include/ggml-alloc.h +9 -0
- data/ext/sources/ggml/include/ggml-backend.h +4 -1
- data/ext/sources/ggml/include/ggml-cpu.h +1 -0
- data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
- data/ext/sources/ggml/include/ggml-rpc.h +8 -11
- data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
- data/ext/sources/ggml/include/ggml.h +190 -12
- data/ext/sources/ggml/src/CMakeLists.txt +82 -11
- data/ext/sources/ggml/src/ggml-alloc.c +124 -41
- data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
- data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
- data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
- data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
- data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
- data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
- data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
- data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
- data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
- data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
- data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
- data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
- data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
- data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
- data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
- data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
- data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
- data/ext/sources/ggml/src/ggml-impl.h +67 -6
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
- data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
- data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
- data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
- data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
- data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
- data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
- data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
- data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
- data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
- data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
- data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
- data/ext/sources/ggml/src/ggml.c +425 -33
- data/ext/sources/include/whisper.h +1 -0
- data/ext/sources/src/CMakeLists.txt +3 -1
- data/ext/sources/src/whisper.cpp +101 -35
- data/ext/sources/tests/CMakeLists.txt +2 -2
- data/ext/sources/tests/test-vad-full.cpp +4 -2
- data/ext/sources/tests/test-vad.cpp +1 -1
- data/extsources.rb +1 -0
- data/lib/whisper/model/uri.rb +17 -18
- data/sig/whisper.rbs +119 -2
- data/test/test_params.rb +16 -8
- data/test/test_segment.rb +0 -1
- data/test/test_token.rb +70 -0
- data/test/test_vad.rb +1 -1
- data/test/test_vad_context.rb +50 -0
- data/test/test_vad_segment.rb +19 -0
- data/test/test_vad_segments.rb +16 -0
- data/test/test_whisper.rb +7 -0
- data/whispercpp.gemspec +1 -1
- metadata +287 -34
- data/ext/sources/build-xcframework.sh +0 -571
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
- /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
data/ext/sources/ggml/src/ggml.c
CHANGED
|
@@ -53,13 +53,15 @@
|
|
|
53
53
|
|
|
54
54
|
#define UNUSED GGML_UNUSED
|
|
55
55
|
|
|
56
|
+
// Needed for ggml_fp32_to_bf16_row()
|
|
57
|
+
#if defined(__AVX512BF16__)
|
|
56
58
|
#if defined(_MSC_VER)
|
|
57
|
-
#define m512bh(p) p
|
|
58
59
|
#define m512i(p) p
|
|
59
60
|
#else
|
|
60
|
-
#
|
|
61
|
+
#include <immintrin.h>
|
|
61
62
|
#define m512i(p) (__m512i)(p)
|
|
62
|
-
#endif
|
|
63
|
+
#endif // defined(_MSC_VER)
|
|
64
|
+
#endif // defined(__AVX512BF16__)
|
|
63
65
|
|
|
64
66
|
#if defined(__linux__) || \
|
|
65
67
|
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
|
@@ -124,6 +126,13 @@ static void ggml_print_backtrace_symbols(void) {
|
|
|
124
126
|
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
125
127
|
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
|
126
128
|
}
|
|
129
|
+
#elif defined(__APPLE__)
|
|
130
|
+
#include <execinfo.h>
|
|
131
|
+
static void ggml_print_backtrace_symbols(void) {
|
|
132
|
+
void * trace[100];
|
|
133
|
+
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
|
134
|
+
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
|
135
|
+
}
|
|
127
136
|
#else
|
|
128
137
|
static void ggml_print_backtrace_symbols(void) {
|
|
129
138
|
// platform not supported
|
|
@@ -135,6 +144,20 @@ void ggml_print_backtrace(void) {
|
|
|
135
144
|
if (GGML_NO_BACKTRACE) {
|
|
136
145
|
return;
|
|
137
146
|
}
|
|
147
|
+
#if defined(__APPLE__)
|
|
148
|
+
// On macOS, fork+debugger attachment is problematic due to:
|
|
149
|
+
// 1. libdispatch "poisons" forked child processes
|
|
150
|
+
// 2. lldb has issues attaching to parent from forked child
|
|
151
|
+
// Use simple backtrace() instead to avoid Terminal.app crashes
|
|
152
|
+
const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
|
|
153
|
+
if (!GGML_BACKTRACE_LLDB) {
|
|
154
|
+
fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
|
|
155
|
+
fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
|
|
156
|
+
fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
|
|
157
|
+
ggml_print_backtrace_symbols();
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
#endif
|
|
138
161
|
#if defined(__linux__)
|
|
139
162
|
FILE * f = fopen("/proc/self/status", "r");
|
|
140
163
|
size_t size = 0;
|
|
@@ -935,6 +958,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
935
958
|
"COS",
|
|
936
959
|
"SUM",
|
|
937
960
|
"SUM_ROWS",
|
|
961
|
+
"CUMSUM",
|
|
938
962
|
"MEAN",
|
|
939
963
|
"ARGMAX",
|
|
940
964
|
"COUNT_EQUAL",
|
|
@@ -989,7 +1013,10 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
989
1013
|
"ARANGE",
|
|
990
1014
|
"TIMESTEP_EMBEDDING",
|
|
991
1015
|
"ARGSORT",
|
|
1016
|
+
"TOP_K",
|
|
992
1017
|
"LEAKY_RELU",
|
|
1018
|
+
"TRI",
|
|
1019
|
+
"FILL",
|
|
993
1020
|
|
|
994
1021
|
"FLASH_ATTN_EXT",
|
|
995
1022
|
"FLASH_ATTN_BACK",
|
|
@@ -1002,6 +1029,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
1002
1029
|
"RWKV_WKV6",
|
|
1003
1030
|
"GATED_LINEAR_ATTN",
|
|
1004
1031
|
"RWKV_WKV7",
|
|
1032
|
+
"SOLVE_TRI",
|
|
1005
1033
|
|
|
1006
1034
|
"UNARY",
|
|
1007
1035
|
|
|
@@ -1019,7 +1047,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
1019
1047
|
"GLU",
|
|
1020
1048
|
};
|
|
1021
1049
|
|
|
1022
|
-
static_assert(GGML_OP_COUNT ==
|
|
1050
|
+
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
|
|
1023
1051
|
|
|
1024
1052
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1025
1053
|
"none",
|
|
@@ -1039,6 +1067,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1039
1067
|
"cos(x)",
|
|
1040
1068
|
"Σx",
|
|
1041
1069
|
"Σx_k",
|
|
1070
|
+
"cumsum(x)",
|
|
1042
1071
|
"Σx/n",
|
|
1043
1072
|
"argmax(x)",
|
|
1044
1073
|
"count_equal(x)",
|
|
@@ -1093,7 +1122,10 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1093
1122
|
"arange(start, stop, step)",
|
|
1094
1123
|
"timestep_embedding(timesteps, dim, max_period)",
|
|
1095
1124
|
"argsort(x)",
|
|
1125
|
+
"top_k(x)",
|
|
1096
1126
|
"leaky_relu(x)",
|
|
1127
|
+
"tri(x)",
|
|
1128
|
+
"fill(x, c)",
|
|
1097
1129
|
|
|
1098
1130
|
"flash_attn_ext(x)",
|
|
1099
1131
|
"flash_attn_back(x)",
|
|
@@ -1106,6 +1138,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1106
1138
|
"rwkv_wkv6(k, v, r, tf, td, s)",
|
|
1107
1139
|
"gated_linear_attn(k, v, q, gate, s)",
|
|
1108
1140
|
"rwkv_wkv7(r, w, k, v, a, b, s)",
|
|
1141
|
+
"A X = B, A triangular, solve X",
|
|
1109
1142
|
|
|
1110
1143
|
"unary(x)",
|
|
1111
1144
|
|
|
@@ -1123,7 +1156,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1123
1156
|
"glu(x)",
|
|
1124
1157
|
};
|
|
1125
1158
|
|
|
1126
|
-
static_assert(GGML_OP_COUNT ==
|
|
1159
|
+
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
|
|
1127
1160
|
|
|
1128
1161
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
|
1129
1162
|
|
|
@@ -1142,11 +1175,17 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
|
1142
1175
|
"HARDSWISH",
|
|
1143
1176
|
"HARDSIGMOID",
|
|
1144
1177
|
"EXP",
|
|
1178
|
+
"EXPM1",
|
|
1179
|
+
"SOFTPLUS",
|
|
1145
1180
|
"GELU_ERF",
|
|
1181
|
+
"XIELU",
|
|
1182
|
+
"FLOOR",
|
|
1183
|
+
"CEIL",
|
|
1184
|
+
"ROUND",
|
|
1185
|
+
"TRUNC",
|
|
1146
1186
|
};
|
|
1147
1187
|
|
|
1148
|
-
static_assert(GGML_UNARY_OP_COUNT ==
|
|
1149
|
-
|
|
1188
|
+
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
|
|
1150
1189
|
|
|
1151
1190
|
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
|
|
1152
1191
|
"REGLU",
|
|
@@ -2254,6 +2293,30 @@ struct ggml_tensor * ggml_log_inplace(
|
|
|
2254
2293
|
return ggml_log_impl(ctx, a, true);
|
|
2255
2294
|
}
|
|
2256
2295
|
|
|
2296
|
+
struct ggml_tensor * ggml_expm1(
|
|
2297
|
+
struct ggml_context * ctx,
|
|
2298
|
+
struct ggml_tensor * a) {
|
|
2299
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
|
|
2300
|
+
}
|
|
2301
|
+
|
|
2302
|
+
struct ggml_tensor * ggml_expm1_inplace(
|
|
2303
|
+
struct ggml_context * ctx,
|
|
2304
|
+
struct ggml_tensor * a) {
|
|
2305
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
|
|
2306
|
+
}
|
|
2307
|
+
|
|
2308
|
+
struct ggml_tensor * ggml_softplus(
|
|
2309
|
+
struct ggml_context * ctx,
|
|
2310
|
+
struct ggml_tensor * a) {
|
|
2311
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
|
2312
|
+
}
|
|
2313
|
+
|
|
2314
|
+
struct ggml_tensor * ggml_softplus_inplace(
|
|
2315
|
+
struct ggml_context * ctx,
|
|
2316
|
+
struct ggml_tensor * a) {
|
|
2317
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
|
2318
|
+
}
|
|
2319
|
+
|
|
2257
2320
|
// ggml_sin
|
|
2258
2321
|
|
|
2259
2322
|
static struct ggml_tensor * ggml_sin_impl(
|
|
@@ -2337,6 +2400,21 @@ struct ggml_tensor * ggml_sum_rows(
|
|
|
2337
2400
|
return result;
|
|
2338
2401
|
}
|
|
2339
2402
|
|
|
2403
|
+
// ggml_cumsum
|
|
2404
|
+
|
|
2405
|
+
struct ggml_tensor * ggml_cumsum(
|
|
2406
|
+
struct ggml_context * ctx,
|
|
2407
|
+
struct ggml_tensor * a) {
|
|
2408
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
2409
|
+
|
|
2410
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2411
|
+
|
|
2412
|
+
result->op = GGML_OP_CUMSUM;
|
|
2413
|
+
result->src[0] = a;
|
|
2414
|
+
|
|
2415
|
+
return result;
|
|
2416
|
+
}
|
|
2417
|
+
|
|
2340
2418
|
// ggml_mean
|
|
2341
2419
|
|
|
2342
2420
|
struct ggml_tensor * ggml_mean(
|
|
@@ -2652,6 +2730,29 @@ struct ggml_tensor * ggml_silu_inplace(
|
|
|
2652
2730
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
|
2653
2731
|
}
|
|
2654
2732
|
|
|
2733
|
+
// ggml_xielu
|
|
2734
|
+
|
|
2735
|
+
struct ggml_tensor * ggml_xielu(
|
|
2736
|
+
struct ggml_context * ctx,
|
|
2737
|
+
struct ggml_tensor * a,
|
|
2738
|
+
float alpha_n,
|
|
2739
|
+
float alpha_p,
|
|
2740
|
+
float beta,
|
|
2741
|
+
float eps) {
|
|
2742
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
2743
|
+
|
|
2744
|
+
ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
|
|
2745
|
+
ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
|
|
2746
|
+
ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
|
|
2747
|
+
ggml_set_op_params_f32(result, 3, beta);
|
|
2748
|
+
ggml_set_op_params_f32(result, 4, eps);
|
|
2749
|
+
|
|
2750
|
+
result->op = GGML_OP_UNARY;
|
|
2751
|
+
result->src[0] = a;
|
|
2752
|
+
|
|
2753
|
+
return result;
|
|
2754
|
+
}
|
|
2755
|
+
|
|
2655
2756
|
// ggml_silu_back
|
|
2656
2757
|
|
|
2657
2758
|
struct ggml_tensor * ggml_silu_back(
|
|
@@ -2726,6 +2827,62 @@ static struct ggml_tensor * ggml_glu_impl(
|
|
|
2726
2827
|
return result;
|
|
2727
2828
|
}
|
|
2728
2829
|
|
|
2830
|
+
// ggml_floor
|
|
2831
|
+
|
|
2832
|
+
struct ggml_tensor * ggml_floor(
|
|
2833
|
+
struct ggml_context * ctx,
|
|
2834
|
+
struct ggml_tensor * a) {
|
|
2835
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
|
|
2836
|
+
}
|
|
2837
|
+
|
|
2838
|
+
struct ggml_tensor * ggml_floor_inplace(
|
|
2839
|
+
struct ggml_context * ctx,
|
|
2840
|
+
struct ggml_tensor * a) {
|
|
2841
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
|
|
2842
|
+
}
|
|
2843
|
+
|
|
2844
|
+
// ggml_ceil
|
|
2845
|
+
|
|
2846
|
+
struct ggml_tensor * ggml_ceil(
|
|
2847
|
+
struct ggml_context * ctx,
|
|
2848
|
+
struct ggml_tensor * a) {
|
|
2849
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
|
|
2850
|
+
}
|
|
2851
|
+
|
|
2852
|
+
struct ggml_tensor * ggml_ceil_inplace(
|
|
2853
|
+
struct ggml_context * ctx,
|
|
2854
|
+
struct ggml_tensor * a) {
|
|
2855
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
|
|
2856
|
+
}
|
|
2857
|
+
|
|
2858
|
+
//ggml_round
|
|
2859
|
+
|
|
2860
|
+
struct ggml_tensor * ggml_round(
|
|
2861
|
+
struct ggml_context * ctx,
|
|
2862
|
+
struct ggml_tensor * a) {
|
|
2863
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
|
|
2864
|
+
}
|
|
2865
|
+
|
|
2866
|
+
struct ggml_tensor * ggml_round_inplace(
|
|
2867
|
+
struct ggml_context * ctx,
|
|
2868
|
+
struct ggml_tensor * a) {
|
|
2869
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
|
|
2870
|
+
}
|
|
2871
|
+
|
|
2872
|
+
//ggml_trunc
|
|
2873
|
+
|
|
2874
|
+
struct ggml_tensor * ggml_trunc(
|
|
2875
|
+
struct ggml_context * ctx,
|
|
2876
|
+
struct ggml_tensor * a) {
|
|
2877
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
|
|
2878
|
+
}
|
|
2879
|
+
|
|
2880
|
+
struct ggml_tensor * ggml_trunc_inplace(
|
|
2881
|
+
struct ggml_context * ctx,
|
|
2882
|
+
struct ggml_tensor * a) {
|
|
2883
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
|
|
2884
|
+
}
|
|
2885
|
+
|
|
2729
2886
|
struct ggml_tensor * ggml_glu(
|
|
2730
2887
|
struct ggml_context * ctx,
|
|
2731
2888
|
struct ggml_tensor * a,
|
|
@@ -3829,6 +3986,15 @@ struct ggml_tensor * ggml_soft_max_ext(
|
|
|
3829
3986
|
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
|
|
3830
3987
|
}
|
|
3831
3988
|
|
|
3989
|
+
struct ggml_tensor * ggml_soft_max_ext_inplace(
|
|
3990
|
+
struct ggml_context * ctx,
|
|
3991
|
+
struct ggml_tensor * a,
|
|
3992
|
+
struct ggml_tensor * mask,
|
|
3993
|
+
float scale,
|
|
3994
|
+
float max_bias) {
|
|
3995
|
+
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
|
|
3996
|
+
}
|
|
3997
|
+
|
|
3832
3998
|
void ggml_soft_max_add_sinks(
|
|
3833
3999
|
struct ggml_tensor * a,
|
|
3834
4000
|
struct ggml_tensor * sinks) {
|
|
@@ -4748,6 +4914,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
|
|
|
4748
4914
|
int64_t ne3,
|
|
4749
4915
|
uint32_t mode) {
|
|
4750
4916
|
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
|
4917
|
+
// TODO: implement antialias for modes other than bilinear
|
|
4918
|
+
GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
|
|
4751
4919
|
|
|
4752
4920
|
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
|
4753
4921
|
|
|
@@ -4802,6 +4970,18 @@ struct ggml_tensor * ggml_pad(
|
|
|
4802
4970
|
return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
|
4803
4971
|
}
|
|
4804
4972
|
|
|
4973
|
+
// ggml_pad_circular
|
|
4974
|
+
|
|
4975
|
+
struct ggml_tensor * ggml_pad_circular(
|
|
4976
|
+
struct ggml_context * ctx,
|
|
4977
|
+
struct ggml_tensor * a,
|
|
4978
|
+
int p0,
|
|
4979
|
+
int p1,
|
|
4980
|
+
int p2,
|
|
4981
|
+
int p3) {
|
|
4982
|
+
return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
|
|
4983
|
+
}
|
|
4984
|
+
|
|
4805
4985
|
struct ggml_tensor * ggml_pad_ext(
|
|
4806
4986
|
struct ggml_context * ctx,
|
|
4807
4987
|
struct ggml_tensor * a,
|
|
@@ -4828,6 +5008,7 @@ struct ggml_tensor * ggml_pad_ext(
|
|
|
4828
5008
|
ggml_set_op_params_i32(result, 5, rp2);
|
|
4829
5009
|
ggml_set_op_params_i32(result, 6, lp3);
|
|
4830
5010
|
ggml_set_op_params_i32(result, 7, rp3);
|
|
5011
|
+
ggml_set_op_params_i32(result, 8, 0); // not circular by default
|
|
4831
5012
|
|
|
4832
5013
|
|
|
4833
5014
|
result->op = GGML_OP_PAD;
|
|
@@ -4836,6 +5017,25 @@ struct ggml_tensor * ggml_pad_ext(
|
|
|
4836
5017
|
return result;
|
|
4837
5018
|
}
|
|
4838
5019
|
|
|
5020
|
+
// ggml_pad_ext_circular
|
|
5021
|
+
|
|
5022
|
+
struct ggml_tensor * ggml_pad_ext_circular(
|
|
5023
|
+
struct ggml_context * ctx,
|
|
5024
|
+
struct ggml_tensor * a,
|
|
5025
|
+
int lp0,
|
|
5026
|
+
int rp0,
|
|
5027
|
+
int lp1,
|
|
5028
|
+
int rp1,
|
|
5029
|
+
int lp2,
|
|
5030
|
+
int rp2,
|
|
5031
|
+
int lp3,
|
|
5032
|
+
int rp3
|
|
5033
|
+
) {
|
|
5034
|
+
struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
|
|
5035
|
+
ggml_set_op_params_i32(result, 8, 1); // circular
|
|
5036
|
+
return result;
|
|
5037
|
+
}
|
|
5038
|
+
|
|
4839
5039
|
// ggml_pad_reflect_1d
|
|
4840
5040
|
|
|
4841
5041
|
struct ggml_tensor * ggml_pad_reflect_1d(
|
|
@@ -4895,28 +5095,6 @@ struct ggml_tensor * ggml_roll(
|
|
|
4895
5095
|
return result;
|
|
4896
5096
|
}
|
|
4897
5097
|
|
|
4898
|
-
// ggml_arange
|
|
4899
|
-
|
|
4900
|
-
struct ggml_tensor * ggml_arange(
|
|
4901
|
-
struct ggml_context * ctx,
|
|
4902
|
-
float start,
|
|
4903
|
-
float stop,
|
|
4904
|
-
float step) {
|
|
4905
|
-
GGML_ASSERT(stop > start);
|
|
4906
|
-
|
|
4907
|
-
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
|
4908
|
-
|
|
4909
|
-
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
|
4910
|
-
|
|
4911
|
-
ggml_set_op_params_f32(result, 0, start);
|
|
4912
|
-
ggml_set_op_params_f32(result, 1, stop);
|
|
4913
|
-
ggml_set_op_params_f32(result, 2, step);
|
|
4914
|
-
|
|
4915
|
-
result->op = GGML_OP_ARANGE;
|
|
4916
|
-
|
|
4917
|
-
return result;
|
|
4918
|
-
}
|
|
4919
|
-
|
|
4920
5098
|
// ggml_timestep_embedding
|
|
4921
5099
|
|
|
4922
5100
|
struct ggml_tensor * ggml_timestep_embedding(
|
|
@@ -4936,6 +5114,61 @@ struct ggml_tensor * ggml_timestep_embedding(
|
|
|
4936
5114
|
return result;
|
|
4937
5115
|
}
|
|
4938
5116
|
|
|
5117
|
+
// ggml_tri
|
|
5118
|
+
|
|
5119
|
+
struct ggml_tensor * ggml_tri(
|
|
5120
|
+
struct ggml_context * ctx,
|
|
5121
|
+
struct ggml_tensor * a,
|
|
5122
|
+
enum ggml_tri_type type) {
|
|
5123
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
5124
|
+
|
|
5125
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5126
|
+
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
|
5127
|
+
|
|
5128
|
+
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
5129
|
+
|
|
5130
|
+
ggml_set_op_params_i32(result, 0, type);
|
|
5131
|
+
|
|
5132
|
+
result->op = GGML_OP_TRI;
|
|
5133
|
+
result->src[0] = a;
|
|
5134
|
+
|
|
5135
|
+
return result;
|
|
5136
|
+
}
|
|
5137
|
+
|
|
5138
|
+
// ggml_fill
|
|
5139
|
+
|
|
5140
|
+
static struct ggml_tensor * ggml_fill_impl(
|
|
5141
|
+
struct ggml_context * ctx,
|
|
5142
|
+
struct ggml_tensor * a,
|
|
5143
|
+
float c,
|
|
5144
|
+
bool inplace) {
|
|
5145
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
5146
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5147
|
+
|
|
5148
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
5149
|
+
|
|
5150
|
+
ggml_set_op_params_f32(result, 0, c);
|
|
5151
|
+
|
|
5152
|
+
result->op = GGML_OP_FILL;
|
|
5153
|
+
result->src[0] = a;
|
|
5154
|
+
|
|
5155
|
+
return result;
|
|
5156
|
+
}
|
|
5157
|
+
|
|
5158
|
+
struct ggml_tensor * ggml_fill(
|
|
5159
|
+
struct ggml_context * ctx,
|
|
5160
|
+
struct ggml_tensor * a,
|
|
5161
|
+
float c) {
|
|
5162
|
+
return ggml_fill_impl(ctx, a, c, false);
|
|
5163
|
+
}
|
|
5164
|
+
|
|
5165
|
+
struct ggml_tensor * ggml_fill_inplace(
|
|
5166
|
+
struct ggml_context * ctx,
|
|
5167
|
+
struct ggml_tensor * a,
|
|
5168
|
+
float c) {
|
|
5169
|
+
return ggml_fill_impl(ctx, a, c, true);
|
|
5170
|
+
}
|
|
5171
|
+
|
|
4939
5172
|
// ggml_argsort
|
|
4940
5173
|
|
|
4941
5174
|
struct ggml_tensor * ggml_argsort(
|
|
@@ -4943,6 +5176,7 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4943
5176
|
struct ggml_tensor * a,
|
|
4944
5177
|
enum ggml_sort_order order) {
|
|
4945
5178
|
GGML_ASSERT(a->ne[0] <= INT32_MAX);
|
|
5179
|
+
|
|
4946
5180
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
|
4947
5181
|
|
|
4948
5182
|
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
|
@@ -4953,9 +5187,9 @@ struct ggml_tensor * ggml_argsort(
|
|
|
4953
5187
|
return result;
|
|
4954
5188
|
}
|
|
4955
5189
|
|
|
4956
|
-
//
|
|
5190
|
+
// ggml_argsort_top_k
|
|
4957
5191
|
|
|
4958
|
-
struct ggml_tensor *
|
|
5192
|
+
struct ggml_tensor * ggml_argsort_top_k(
|
|
4959
5193
|
struct ggml_context * ctx,
|
|
4960
5194
|
struct ggml_tensor * a,
|
|
4961
5195
|
int k) {
|
|
@@ -4971,6 +5205,44 @@ struct ggml_tensor * ggml_top_k(
|
|
|
4971
5205
|
return result;
|
|
4972
5206
|
}
|
|
4973
5207
|
|
|
5208
|
+
// ggml_top_k
|
|
5209
|
+
|
|
5210
|
+
struct ggml_tensor * ggml_top_k(
|
|
5211
|
+
struct ggml_context * ctx,
|
|
5212
|
+
struct ggml_tensor * a,
|
|
5213
|
+
int k) {
|
|
5214
|
+
GGML_ASSERT(a->ne[0] >= k);
|
|
5215
|
+
|
|
5216
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
|
|
5217
|
+
|
|
5218
|
+
result->op = GGML_OP_TOP_K;
|
|
5219
|
+
result->src[0] = a;
|
|
5220
|
+
|
|
5221
|
+
return result;
|
|
5222
|
+
}
|
|
5223
|
+
|
|
5224
|
+
// ggml_arange
|
|
5225
|
+
|
|
5226
|
+
struct ggml_tensor * ggml_arange(
|
|
5227
|
+
struct ggml_context * ctx,
|
|
5228
|
+
float start,
|
|
5229
|
+
float stop,
|
|
5230
|
+
float step) {
|
|
5231
|
+
GGML_ASSERT(stop > start);
|
|
5232
|
+
|
|
5233
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
|
5234
|
+
|
|
5235
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
|
5236
|
+
|
|
5237
|
+
ggml_set_op_params_f32(result, 0, start);
|
|
5238
|
+
ggml_set_op_params_f32(result, 1, stop);
|
|
5239
|
+
ggml_set_op_params_f32(result, 2, step);
|
|
5240
|
+
|
|
5241
|
+
result->op = GGML_OP_ARANGE;
|
|
5242
|
+
|
|
5243
|
+
return result;
|
|
5244
|
+
}
|
|
5245
|
+
|
|
4974
5246
|
// ggml_flash_attn_ext
|
|
4975
5247
|
|
|
4976
5248
|
struct ggml_tensor * ggml_flash_attn_ext(
|
|
@@ -4990,8 +5262,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
|
|
|
4990
5262
|
|
|
4991
5263
|
if (mask) {
|
|
4992
5264
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
4993
|
-
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
|
|
4994
|
-
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
|
|
4995
5265
|
//GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
|
|
4996
5266
|
|
|
4997
5267
|
GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
|
|
@@ -5790,6 +6060,41 @@ struct ggml_tensor * ggml_opt_step_sgd(
|
|
|
5790
6060
|
return result;
|
|
5791
6061
|
}
|
|
5792
6062
|
|
|
6063
|
+
// solve_tri
|
|
6064
|
+
|
|
6065
|
+
struct ggml_tensor * ggml_solve_tri(
|
|
6066
|
+
struct ggml_context * ctx,
|
|
6067
|
+
struct ggml_tensor * a,
|
|
6068
|
+
struct ggml_tensor * b,
|
|
6069
|
+
bool left,
|
|
6070
|
+
bool lower,
|
|
6071
|
+
bool uni) {
|
|
6072
|
+
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
|
6073
|
+
GGML_ASSERT(b->type == GGML_TYPE_F32);
|
|
6074
|
+
|
|
6075
|
+
// A must be square and lower diagonal
|
|
6076
|
+
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
|
6077
|
+
// B must have same outer dimension as A
|
|
6078
|
+
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
|
6079
|
+
|
|
6080
|
+
// batch dimensions must be equal
|
|
6081
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
|
6082
|
+
GGML_ASSERT(a->ne[3] == b->ne[3]);
|
|
6083
|
+
|
|
6084
|
+
GGML_ASSERT(ggml_is_contiguous(a));
|
|
6085
|
+
GGML_ASSERT(ggml_is_contiguous(b));
|
|
6086
|
+
|
|
6087
|
+
GGML_ASSERT(lower && left && !uni); // TODO: support other variants
|
|
6088
|
+
|
|
6089
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
|
|
6090
|
+
|
|
6091
|
+
result->op = GGML_OP_SOLVE_TRI;
|
|
6092
|
+
result->src[0] = a;
|
|
6093
|
+
result->src[1] = b;
|
|
6094
|
+
|
|
6095
|
+
return result;
|
|
6096
|
+
}
|
|
6097
|
+
|
|
5793
6098
|
////////////////////////////////////////////////////////////////////////////////
|
|
5794
6099
|
|
|
5795
6100
|
struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
|
@@ -6362,6 +6667,16 @@ static void ggml_compute_backward(
|
|
|
6362
6667
|
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
|
|
6363
6668
|
}
|
|
6364
6669
|
} break;
|
|
6670
|
+
case GGML_UNARY_OP_EXPM1: {
|
|
6671
|
+
if (src0_needs_grads) {
|
|
6672
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
|
|
6673
|
+
}
|
|
6674
|
+
} break;
|
|
6675
|
+
case GGML_UNARY_OP_SOFTPLUS: {
|
|
6676
|
+
if (src0_needs_grads) {
|
|
6677
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
|
|
6678
|
+
}
|
|
6679
|
+
} break;
|
|
6365
6680
|
default: {
|
|
6366
6681
|
fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
|
|
6367
6682
|
__func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
|
|
@@ -6872,6 +7187,78 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
|
6872
7187
|
GGML_LOG_INFO("========================================\n");
|
|
6873
7188
|
}
|
|
6874
7189
|
|
|
7190
|
+
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
|
|
7191
|
+
const int * idxs,
|
|
7192
|
+
int count,
|
|
7193
|
+
const struct ggml_tensor * tensor) {
|
|
7194
|
+
GGML_ASSERT(cgraph && idxs);
|
|
7195
|
+
for (int i = 0; i < count; ++i) {
|
|
7196
|
+
const int node_idx = idxs[i];
|
|
7197
|
+
|
|
7198
|
+
if (node_idx >= cgraph->n_nodes) {
|
|
7199
|
+
return -1;
|
|
7200
|
+
}
|
|
7201
|
+
if (cgraph->nodes[node_idx] == tensor) {
|
|
7202
|
+
return i;
|
|
7203
|
+
}
|
|
7204
|
+
}
|
|
7205
|
+
return -1;
|
|
7206
|
+
}
|
|
7207
|
+
|
|
7208
|
+
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
|
|
7209
|
+
const int * node_idxs,
|
|
7210
|
+
int count,
|
|
7211
|
+
const enum ggml_op * ops,
|
|
7212
|
+
const int * outputs,
|
|
7213
|
+
int num_outputs) {
|
|
7214
|
+
GGML_ASSERT(outputs && num_outputs > 0);
|
|
7215
|
+
|
|
7216
|
+
for (int i = 0; i < count; ++i) {
|
|
7217
|
+
if (node_idxs[i] >= cgraph->n_nodes) {
|
|
7218
|
+
return false;
|
|
7219
|
+
}
|
|
7220
|
+
|
|
7221
|
+
const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
|
|
7222
|
+
|
|
7223
|
+
if (node->op != ops[i]) {
|
|
7224
|
+
return false;
|
|
7225
|
+
}
|
|
7226
|
+
|
|
7227
|
+
if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
|
|
7228
|
+
continue;
|
|
7229
|
+
}
|
|
7230
|
+
|
|
7231
|
+
if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
|
|
7232
|
+
return false;
|
|
7233
|
+
}
|
|
7234
|
+
|
|
7235
|
+
int subgraph_uses = 0;
|
|
7236
|
+
for (int j = i + 1; j < count; ++j) {
|
|
7237
|
+
const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
|
|
7238
|
+
for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
|
|
7239
|
+
if (other_node->src[src_idx] == node) {
|
|
7240
|
+
subgraph_uses++;
|
|
7241
|
+
}
|
|
7242
|
+
}
|
|
7243
|
+
}
|
|
7244
|
+
|
|
7245
|
+
if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
|
|
7246
|
+
return false;
|
|
7247
|
+
}
|
|
7248
|
+
|
|
7249
|
+
// if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
|
|
7250
|
+
struct ggml_tensor * view_src = node->view_src;
|
|
7251
|
+
while (view_src) {
|
|
7252
|
+
if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
|
|
7253
|
+
return false;
|
|
7254
|
+
}
|
|
7255
|
+
view_src = view_src->view_src;
|
|
7256
|
+
}
|
|
7257
|
+
}
|
|
7258
|
+
|
|
7259
|
+
return true;
|
|
7260
|
+
}
|
|
7261
|
+
|
|
6875
7262
|
// check if node is part of the graph
|
|
6876
7263
|
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
|
6877
7264
|
if (cgraph == NULL) {
|
|
@@ -7181,6 +7568,11 @@ size_t ggml_quantize_chunk(
|
|
|
7181
7568
|
|
|
7182
7569
|
////////////////////////////////////////////////////////////////////////////////
|
|
7183
7570
|
|
|
7571
|
+
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
|
|
7572
|
+
*log_callback = g_logger_state.log_callback;
|
|
7573
|
+
*user_data = g_logger_state.log_callback_user_data;
|
|
7574
|
+
}
|
|
7575
|
+
|
|
7184
7576
|
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
7185
7577
|
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
|
7186
7578
|
g_logger_state.log_callback_user_data = user_data;
|
|
@@ -525,6 +525,7 @@ extern "C" {
|
|
|
525
525
|
// use whisper_tokenize() to convert text to tokens
|
|
526
526
|
// maximum of whisper_n_text_ctx()/2 tokens are used (typically 224)
|
|
527
527
|
const char * initial_prompt;
|
|
528
|
+
bool carry_initial_prompt; // if true, always prepend initial_prompt to every decode window (may reduce conditioning on previous text)
|
|
528
529
|
const whisper_token * prompt_tokens;
|
|
529
530
|
int prompt_n_tokens;
|
|
530
531
|
|
|
@@ -79,6 +79,7 @@ if (WHISPER_COREML)
|
|
|
79
79
|
)
|
|
80
80
|
|
|
81
81
|
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
|
|
82
|
+
install(TARGETS ${TARGET} LIBRARY)
|
|
82
83
|
endif()
|
|
83
84
|
|
|
84
85
|
if (WHISPER_OPENVINO)
|
|
@@ -125,7 +126,8 @@ if (WHISPER_EXTRA_FLAGS)
|
|
|
125
126
|
target_compile_options(whisper PRIVATE ${WHISPER_EXTRA_FLAGS})
|
|
126
127
|
endif()
|
|
127
128
|
|
|
128
|
-
|
|
129
|
+
find_package(Threads REQUIRED)
|
|
130
|
+
target_link_libraries(whisper PUBLIC ggml Threads::Threads)
|
|
129
131
|
|
|
130
132
|
if (WHISPER_COREML)
|
|
131
133
|
target_link_libraries(whisper PRIVATE whisper.coreml)
|