whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -46,9 +46,6 @@ size_t ggml_metal_op_flash_attn_ext_extra_tmp(const struct ggml_tensor * op);
|
|
|
46
46
|
int ggml_metal_op_concat (ggml_metal_op_t ctx, int idx);
|
|
47
47
|
int ggml_metal_op_repeat (ggml_metal_op_t ctx, int idx);
|
|
48
48
|
int ggml_metal_op_acc (ggml_metal_op_t ctx, int idx);
|
|
49
|
-
int ggml_metal_op_scale (ggml_metal_op_t ctx, int idx);
|
|
50
|
-
int ggml_metal_op_fill (ggml_metal_op_t ctx, int idx);
|
|
51
|
-
int ggml_metal_op_clamp (ggml_metal_op_t ctx, int idx);
|
|
52
49
|
int ggml_metal_op_unary (ggml_metal_op_t ctx, int idx);
|
|
53
50
|
int ggml_metal_op_glu (ggml_metal_op_t ctx, int idx);
|
|
54
51
|
int ggml_metal_op_sum (ggml_metal_op_t ctx, int idx);
|
|
@@ -56,11 +53,16 @@ int ggml_metal_op_sum_rows (ggml_metal_op_t ctx, int idx);
|
|
|
56
53
|
int ggml_metal_op_cumsum (ggml_metal_op_t ctx, int idx);
|
|
57
54
|
int ggml_metal_op_get_rows (ggml_metal_op_t ctx, int idx);
|
|
58
55
|
int ggml_metal_op_set_rows (ggml_metal_op_t ctx, int idx);
|
|
56
|
+
int ggml_metal_op_diag (ggml_metal_op_t ctx, int idx);
|
|
59
57
|
int ggml_metal_op_soft_max (ggml_metal_op_t ctx, int idx);
|
|
60
58
|
int ggml_metal_op_ssm_conv (ggml_metal_op_t ctx, int idx);
|
|
61
59
|
int ggml_metal_op_ssm_scan (ggml_metal_op_t ctx, int idx);
|
|
62
60
|
int ggml_metal_op_rwkv (ggml_metal_op_t ctx, int idx);
|
|
61
|
+
int ggml_metal_op_gated_delta_net (ggml_metal_op_t ctx, int idx);
|
|
62
|
+
int ggml_metal_op_solve_tri (ggml_metal_op_t ctx, int idx);
|
|
63
|
+
int ggml_metal_op_set (ggml_metal_op_t ctx, int idx);
|
|
63
64
|
int ggml_metal_op_cpy (ggml_metal_op_t ctx, int idx);
|
|
65
|
+
int ggml_metal_op_pool_1d (ggml_metal_op_t ctx, int idx);
|
|
64
66
|
int ggml_metal_op_pool_2d (ggml_metal_op_t ctx, int idx);
|
|
65
67
|
int ggml_metal_op_mul_mat (ggml_metal_op_t ctx, int idx);
|
|
66
68
|
int ggml_metal_op_mul_mat_id (ggml_metal_op_t ctx, int idx);
|
|
@@ -83,7 +85,6 @@ int ggml_metal_op_timestep_embedding(ggml_metal_op_t ctx, int idx);
|
|
|
83
85
|
int ggml_metal_op_argmax (ggml_metal_op_t ctx, int idx);
|
|
84
86
|
int ggml_metal_op_argsort (ggml_metal_op_t ctx, int idx);
|
|
85
87
|
int ggml_metal_op_top_k (ggml_metal_op_t ctx, int idx);
|
|
86
|
-
int ggml_metal_op_leaky_relu (ggml_metal_op_t ctx, int idx);
|
|
87
88
|
int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx);
|
|
88
89
|
int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx);
|
|
89
90
|
int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx);
|
|
@@ -7,11 +7,15 @@
|
|
|
7
7
|
#include "ggml-metal-context.h"
|
|
8
8
|
#include "ggml-metal-ops.h"
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
#include <mutex>
|
|
11
|
+
#include <string>
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
#define GGML_METAL_NAME "MTL"
|
|
14
|
+
#define GGML_METAL_MAX_DEVICES 16
|
|
15
|
+
|
|
16
|
+
// number of Metal devices
|
|
17
|
+
// note: can be overridden with GGML_METAL_DEVICES env to simulate virtual devices
|
|
18
|
+
static int g_devices = 1;
|
|
15
19
|
|
|
16
20
|
////////////////////////////////////////////////////////////////////////////////
|
|
17
21
|
// backend interface
|
|
@@ -165,10 +169,28 @@ static ggml_backend_buffer_i ggml_backend_metal_buffer_private_i = {
|
|
|
165
169
|
/* .reset = */ NULL,
|
|
166
170
|
};
|
|
167
171
|
|
|
172
|
+
static bool ggml_backend_buffer_is_metal(ggml_backend_buffer_t buffer) {
|
|
173
|
+
return buffer->iface.free_buffer == ggml_backend_metal_buffer_shared_free_buffer ||
|
|
174
|
+
buffer->iface.free_buffer == ggml_backend_metal_buffer_private_free_buffer;
|
|
175
|
+
}
|
|
176
|
+
|
|
168
177
|
//
|
|
169
178
|
// buffer types
|
|
170
179
|
//
|
|
171
180
|
|
|
181
|
+
struct ggml_backend_metal_buffer_type {
|
|
182
|
+
int device;
|
|
183
|
+
std::string name;
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
struct ggml_backend_metal_buffer_type_deleter {
|
|
187
|
+
void operator()(ggml_backend_metal_buffer_type * ctx) const {
|
|
188
|
+
delete ctx;
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
typedef std::unique_ptr<ggml_backend_metal_buffer_type, ggml_backend_metal_buffer_type_deleter> ggml_backend_metal_buffer_type_ptr;
|
|
193
|
+
|
|
172
194
|
// common method for allocating shread or private Metal buffers
|
|
173
195
|
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size, bool shared) {
|
|
174
196
|
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)buft->device->context;
|
|
@@ -218,9 +240,9 @@ static size_t ggml_backend_metal_buffer_type_get_alloc_size(ggml_backend_buffer_
|
|
|
218
240
|
// default (shared) buffer type
|
|
219
241
|
|
|
220
242
|
static const char * ggml_backend_metal_buffer_type_shared_get_name(ggml_backend_buffer_type_t buft) {
|
|
221
|
-
|
|
243
|
+
ggml_backend_metal_buffer_type * ctx = (ggml_backend_metal_buffer_type *)buft->context;
|
|
222
244
|
|
|
223
|
-
|
|
245
|
+
return ctx->name.c_str();
|
|
224
246
|
}
|
|
225
247
|
|
|
226
248
|
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_shared_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
@@ -249,29 +271,54 @@ static bool ggml_backend_metal_buffer_type_shared_is_host(ggml_backend_buffer_ty
|
|
|
249
271
|
GGML_UNUSED(buft);
|
|
250
272
|
}
|
|
251
273
|
|
|
252
|
-
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(
|
|
253
|
-
static
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
274
|
+
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int device) {
|
|
275
|
+
static std::mutex mutex;
|
|
276
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
277
|
+
|
|
278
|
+
static std::vector<ggml_backend_buffer_type> bufts;
|
|
279
|
+
static std::vector<ggml_backend_metal_buffer_type_ptr> ctxs;
|
|
280
|
+
|
|
281
|
+
static bool initialized = false;
|
|
282
|
+
if (!initialized) {
|
|
283
|
+
bufts.reserve(g_devices);
|
|
284
|
+
ctxs.reserve(g_devices);
|
|
285
|
+
|
|
286
|
+
for (int i = 0; i < g_devices; ++i) {
|
|
287
|
+
ggml_backend_metal_buffer_type * raw_ctx =
|
|
288
|
+
new ggml_backend_metal_buffer_type {
|
|
289
|
+
/* .device = */ i,
|
|
290
|
+
/* .name = */ GGML_METAL_NAME + std::to_string(i),
|
|
291
|
+
};
|
|
292
|
+
ctxs.emplace_back(raw_ctx);
|
|
293
|
+
|
|
294
|
+
ggml_backend_buffer_type buft = {
|
|
295
|
+
/* .iface = */ {
|
|
296
|
+
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
|
|
297
|
+
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
|
|
298
|
+
/* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment,
|
|
299
|
+
/* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size,
|
|
300
|
+
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
|
|
301
|
+
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
|
|
302
|
+
},
|
|
303
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
|
304
|
+
/* .context = */ raw_ctx,
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
bufts.emplace_back(buft);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
initialized = true;
|
|
311
|
+
}
|
|
265
312
|
|
|
266
|
-
return &
|
|
313
|
+
return &bufts[device];
|
|
267
314
|
}
|
|
268
315
|
|
|
269
316
|
// default (private) buffer type
|
|
270
317
|
|
|
271
318
|
static const char * ggml_backend_metal_buffer_type_private_get_name(ggml_backend_buffer_type_t buft) {
|
|
272
|
-
|
|
319
|
+
ggml_backend_metal_buffer_type * ctx = (ggml_backend_metal_buffer_type *)buft->context;
|
|
273
320
|
|
|
274
|
-
|
|
321
|
+
return ctx->name.c_str();
|
|
275
322
|
}
|
|
276
323
|
|
|
277
324
|
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_private_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
@@ -300,29 +347,53 @@ static bool ggml_backend_metal_buffer_type_private_is_host(ggml_backend_buffer_t
|
|
|
300
347
|
GGML_UNUSED(buft);
|
|
301
348
|
}
|
|
302
349
|
|
|
303
|
-
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(
|
|
304
|
-
static
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
350
|
+
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int device) {
|
|
351
|
+
static std::mutex mutex;
|
|
352
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
353
|
+
|
|
354
|
+
static std::vector<ggml_backend_buffer_type> bufts;
|
|
355
|
+
static std::vector<ggml_backend_metal_buffer_type_ptr> ctxs;
|
|
356
|
+
|
|
357
|
+
static bool initialized = false;
|
|
358
|
+
if (!initialized) {
|
|
359
|
+
bufts.reserve(g_devices);
|
|
360
|
+
ctxs.reserve(g_devices);
|
|
361
|
+
|
|
362
|
+
for (int i = 0; i < g_devices; ++i) {
|
|
363
|
+
ggml_backend_metal_buffer_type * raw_ctx = new ggml_backend_metal_buffer_type{
|
|
364
|
+
/* .device = */ i,
|
|
365
|
+
/* .name = */ GGML_METAL_NAME + std::to_string(i) + "_Private"
|
|
366
|
+
};
|
|
367
|
+
ctxs.emplace_back(raw_ctx);
|
|
368
|
+
|
|
369
|
+
ggml_backend_buffer_type buft = {
|
|
370
|
+
/* .iface = */ {
|
|
371
|
+
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
|
|
372
|
+
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
|
|
373
|
+
/* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment,
|
|
374
|
+
/* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size,
|
|
375
|
+
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
|
|
376
|
+
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
|
|
377
|
+
},
|
|
378
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
|
379
|
+
/* .context = */ raw_ctx,
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
bufts.emplace_back(buft);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
initialized = true;
|
|
386
|
+
}
|
|
316
387
|
|
|
317
|
-
return &
|
|
388
|
+
return &bufts[device];
|
|
318
389
|
}
|
|
319
390
|
|
|
320
391
|
// mapped buffer type
|
|
321
392
|
|
|
322
393
|
static const char * ggml_backend_metal_buffer_type_mapped_get_name(ggml_backend_buffer_type_t buft) {
|
|
323
|
-
|
|
394
|
+
ggml_backend_metal_buffer_type * ctx = (ggml_backend_metal_buffer_type *)buft->context;
|
|
324
395
|
|
|
325
|
-
|
|
396
|
+
return ctx->name.c_str();
|
|
326
397
|
}
|
|
327
398
|
|
|
328
399
|
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_mapped_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
@@ -352,31 +423,55 @@ static bool ggml_backend_metal_buffer_type_mapped_is_host(ggml_backend_buffer_ty
|
|
|
352
423
|
GGML_UNUSED(buft);
|
|
353
424
|
}
|
|
354
425
|
|
|
355
|
-
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
426
|
+
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int device) {
|
|
427
|
+
static std::mutex mutex;
|
|
428
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
429
|
+
|
|
430
|
+
static std::vector<ggml_backend_buffer_type> bufts;
|
|
431
|
+
static std::vector<ggml_backend_metal_buffer_type_ptr> ctxs;
|
|
432
|
+
|
|
433
|
+
static bool initialized = false;
|
|
434
|
+
if (!initialized) {
|
|
435
|
+
bufts.reserve(g_devices);
|
|
436
|
+
ctxs.reserve(g_devices);
|
|
437
|
+
|
|
438
|
+
for (int i = 0; i < g_devices; ++i) {
|
|
439
|
+
ggml_backend_metal_buffer_type * raw_ctx = new ggml_backend_metal_buffer_type{
|
|
440
|
+
/* .device = */ i,
|
|
441
|
+
/* .name = */ GGML_METAL_NAME + std::to_string(i) + "_Mapped"
|
|
442
|
+
};
|
|
443
|
+
ctxs.emplace_back(raw_ctx);
|
|
444
|
+
|
|
445
|
+
// note: not obvious, but this buffer type still needs to implement .alloc_buffer:
|
|
446
|
+
// https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099
|
|
447
|
+
ggml_backend_buffer_type buft = {
|
|
448
|
+
/* .iface = */ {
|
|
449
|
+
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
|
|
450
|
+
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
|
|
451
|
+
/* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
|
|
452
|
+
/* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
|
|
453
|
+
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
|
|
454
|
+
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
|
|
455
|
+
},
|
|
456
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
|
457
|
+
/* .context = */ raw_ctx,
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
bufts.emplace_back(buft);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
initialized = true;
|
|
464
|
+
}
|
|
370
465
|
|
|
371
|
-
return &
|
|
466
|
+
return &bufts[device];
|
|
372
467
|
}
|
|
373
468
|
|
|
374
469
|
// backend
|
|
375
470
|
|
|
376
471
|
static const char * ggml_backend_metal_name(ggml_backend_t backend) {
|
|
377
|
-
|
|
472
|
+
ggml_metal_t ctx = (ggml_metal_t)backend->context;
|
|
378
473
|
|
|
379
|
-
|
|
474
|
+
return ggml_metal_get_name(ctx);
|
|
380
475
|
}
|
|
381
476
|
|
|
382
477
|
static void ggml_backend_metal_free(ggml_backend_t backend) {
|
|
@@ -409,12 +504,24 @@ static void ggml_backend_metal_get_tensor_async(ggml_backend_t backend, const gg
|
|
|
409
504
|
}
|
|
410
505
|
|
|
411
506
|
static bool ggml_backend_metal_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) {
|
|
412
|
-
|
|
507
|
+
if (!ggml_backend_is_metal(backend_src) || !ggml_backend_is_metal(backend_dst)) {
|
|
508
|
+
return false;
|
|
509
|
+
}
|
|
413
510
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
511
|
+
if (!ggml_backend_buffer_is_metal(src->buffer) || !ggml_backend_buffer_is_metal(dst->buffer)) {
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
ggml_metal_t ctx_src = (ggml_metal_t)backend_src->context;
|
|
516
|
+
ggml_metal_t ctx_dst = (ggml_metal_t)backend_dst->context;
|
|
517
|
+
|
|
518
|
+
//ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
|
|
519
|
+
//ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
|
|
520
|
+
|
|
521
|
+
//ggml_metal_buffer_t buf_ctx_src = (ggml_metal_buffer_t)buf_src->context;
|
|
522
|
+
//ggml_metal_buffer_t buf_ctx_dst = (ggml_metal_buffer_t)buf_dst->context;
|
|
523
|
+
|
|
524
|
+
return ggml_metal_cpy_tensor_async(ctx_src, ctx_dst, src, dst);
|
|
418
525
|
}
|
|
419
526
|
|
|
420
527
|
static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
@@ -423,6 +530,20 @@ static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend,
|
|
|
423
530
|
return ggml_metal_graph_compute(ctx, cgraph);
|
|
424
531
|
}
|
|
425
532
|
|
|
533
|
+
static void ggml_backend_metal_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
|
|
534
|
+
ggml_metal_t ctx = (ggml_metal_t)backend->context;
|
|
535
|
+
ggml_metal_event_t ev = (ggml_metal_event_t)event->context;
|
|
536
|
+
|
|
537
|
+
ggml_metal_event_record(ctx, ev);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
static void ggml_backend_metal_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
|
541
|
+
ggml_metal_t ctx = (ggml_metal_t)backend->context;
|
|
542
|
+
ggml_metal_event_t ev = (ggml_metal_event_t)event->context;
|
|
543
|
+
|
|
544
|
+
ggml_metal_event_wait(ctx, ev);
|
|
545
|
+
}
|
|
546
|
+
|
|
426
547
|
static void ggml_backend_metal_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
|
427
548
|
ggml_metal_t ctx = (ggml_metal_t)backend->context;
|
|
428
549
|
|
|
@@ -435,7 +556,6 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
|
|
|
435
556
|
ggml_metal_t ctx = (ggml_metal_t)backend->context;
|
|
436
557
|
|
|
437
558
|
ggml_metal_set_n_cb(ctx, n_cb);
|
|
438
|
-
|
|
439
559
|
}
|
|
440
560
|
|
|
441
561
|
static ggml_backend_i ggml_backend_metal_i = {
|
|
@@ -450,12 +570,8 @@ static ggml_backend_i ggml_backend_metal_i = {
|
|
|
450
570
|
/* .graph_plan_update = */ NULL,
|
|
451
571
|
/* .graph_plan_compute = */ NULL,
|
|
452
572
|
/* .graph_compute = */ ggml_backend_metal_graph_compute,
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
// in any case, these docs seem relevant if we ever decide to implement it:
|
|
456
|
-
// https://developer.apple.com/documentation/metal/mtlcommandbuffer#Synchronizing-Passes-with-Events
|
|
457
|
-
/* .event_record = */ NULL,
|
|
458
|
-
/* .event_wait = */ NULL,
|
|
573
|
+
/* .event_record = */ ggml_backend_metal_event_record,
|
|
574
|
+
/* .event_wait = */ ggml_backend_metal_event_wait,
|
|
459
575
|
/* .graph_optimize = */ ggml_backend_metal_graph_optimize,
|
|
460
576
|
};
|
|
461
577
|
|
|
@@ -519,15 +635,17 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) {
|
|
|
519
635
|
// backend device
|
|
520
636
|
|
|
521
637
|
static const char * ggml_backend_metal_device_get_name(ggml_backend_dev_t dev) {
|
|
522
|
-
|
|
638
|
+
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
|
|
523
639
|
|
|
524
|
-
|
|
640
|
+
const ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx_dev);
|
|
641
|
+
|
|
642
|
+
return props_dev->name;
|
|
525
643
|
}
|
|
526
644
|
|
|
527
645
|
static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) {
|
|
528
646
|
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
|
|
529
647
|
|
|
530
|
-
return ggml_metal_device_get_props(ctx_dev)->
|
|
648
|
+
return ggml_metal_device_get_props(ctx_dev)->desc;
|
|
531
649
|
}
|
|
532
650
|
|
|
533
651
|
static void ggml_backend_metal_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
@@ -550,14 +668,14 @@ static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_bac
|
|
|
550
668
|
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
551
669
|
|
|
552
670
|
props->caps = {
|
|
553
|
-
/* .async
|
|
554
|
-
/* .host_buffer
|
|
555
|
-
/* .buffer_from_host_ptr
|
|
556
|
-
/* .events
|
|
671
|
+
/* .async = */ true,
|
|
672
|
+
/* .host_buffer = */ false,
|
|
673
|
+
/* .buffer_from_host_ptr = */ true,
|
|
674
|
+
/* .events = */ true,
|
|
557
675
|
};
|
|
558
676
|
}
|
|
559
677
|
|
|
560
|
-
static ggml_backend_t
|
|
678
|
+
static ggml_backend_t ggml_backend_metal_device_init_backend(ggml_backend_dev_t dev, const char * params) {
|
|
561
679
|
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
|
|
562
680
|
|
|
563
681
|
ggml_metal_t ctx = ggml_metal_init(ctx_dev);
|
|
@@ -587,7 +705,7 @@ static ggml_backend_buffer_type_t ggml_backend_metal_device_get_buffer_type(ggml
|
|
|
587
705
|
|
|
588
706
|
const ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx_dev);
|
|
589
707
|
|
|
590
|
-
return props_dev->use_shared_buffers ? ggml_backend_metal_buffer_type_shared() : ggml_backend_metal_buffer_type_private();
|
|
708
|
+
return props_dev->use_shared_buffers ? ggml_backend_metal_buffer_type_shared(props_dev->device) : ggml_backend_metal_buffer_type_private(props_dev->device);
|
|
591
709
|
}
|
|
592
710
|
|
|
593
711
|
static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
|
@@ -595,7 +713,9 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
|
|
|
595
713
|
|
|
596
714
|
ggml_metal_buffer_t res = ggml_metal_buffer_map(ctx_dev, ptr, size, max_tensor_size);
|
|
597
715
|
|
|
598
|
-
|
|
716
|
+
const ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx_dev);
|
|
717
|
+
|
|
718
|
+
return ggml_backend_buffer_init(ggml_backend_metal_buffer_type_mapped(props_dev->device), ggml_backend_metal_buffer_shared_i, res, size);
|
|
599
719
|
}
|
|
600
720
|
|
|
601
721
|
static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
@@ -606,9 +726,10 @@ static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const
|
|
|
606
726
|
|
|
607
727
|
static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
608
728
|
return
|
|
729
|
+
buft->device == dev && (
|
|
609
730
|
buft->iface.get_name == ggml_backend_metal_buffer_type_shared_get_name ||
|
|
610
731
|
buft->iface.get_name == ggml_backend_metal_buffer_type_private_get_name ||
|
|
611
|
-
buft->iface.get_name == ggml_backend_metal_buffer_type_mapped_get_name;
|
|
732
|
+
buft->iface.get_name == ggml_backend_metal_buffer_type_mapped_get_name);
|
|
612
733
|
|
|
613
734
|
GGML_UNUSED(dev);
|
|
614
735
|
}
|
|
@@ -632,45 +753,97 @@ static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const g
|
|
|
632
753
|
get_op_batch_size(op) >= ggml_metal_device_get_props(ctx_dev)->op_offload_min_batch_size;
|
|
633
754
|
}
|
|
634
755
|
|
|
756
|
+
static ggml_backend_event_t ggml_backend_metal_device_event_new(ggml_backend_dev_t dev) {
|
|
757
|
+
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
|
|
758
|
+
|
|
759
|
+
ggml_metal_event_t event = ggml_metal_device_event_init(ctx_dev);
|
|
760
|
+
GGML_ASSERT(event);
|
|
761
|
+
|
|
762
|
+
ggml_backend_event_t ev = new ggml_backend_event {
|
|
763
|
+
/* .device = */ dev,
|
|
764
|
+
/* .context = */ event,
|
|
765
|
+
};
|
|
766
|
+
|
|
767
|
+
return ev;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
static void ggml_backend_metal_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
|
771
|
+
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
|
|
772
|
+
|
|
773
|
+
ggml_metal_event_t ev = (ggml_metal_event_t)event->context;
|
|
774
|
+
|
|
775
|
+
ggml_metal_device_event_free(ctx_dev, ev);
|
|
776
|
+
|
|
777
|
+
delete event;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
static void ggml_backend_metal_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
|
781
|
+
ggml_metal_device_t ctx_dev = (ggml_metal_device_t)dev->context;
|
|
782
|
+
|
|
783
|
+
ggml_metal_event_t evt = (ggml_metal_event_t)event->context;
|
|
784
|
+
|
|
785
|
+
ggml_metal_device_event_synchronize(ctx_dev, evt);
|
|
786
|
+
}
|
|
787
|
+
|
|
635
788
|
static ggml_backend_device_i ggml_backend_metal_device_i = {
|
|
636
789
|
/* .get_name = */ ggml_backend_metal_device_get_name,
|
|
637
790
|
/* .get_description = */ ggml_backend_metal_device_get_description,
|
|
638
791
|
/* .get_memory = */ ggml_backend_metal_device_get_memory,
|
|
639
792
|
/* .get_type = */ ggml_backend_metal_device_get_type,
|
|
640
793
|
/* .get_props = */ ggml_backend_metal_device_get_props,
|
|
641
|
-
/* .init_backend = */
|
|
794
|
+
/* .init_backend = */ ggml_backend_metal_device_init_backend,
|
|
642
795
|
/* .get_buffer_type = */ ggml_backend_metal_device_get_buffer_type,
|
|
643
796
|
/* .get_host_buffer_type = */ NULL,
|
|
644
797
|
/* .buffer_from_host_ptr = */ ggml_backend_metal_device_buffer_mapped,
|
|
645
798
|
/* .supports_op = */ ggml_backend_metal_device_supports_op,
|
|
646
799
|
/* .supports_buft = */ ggml_backend_metal_device_supports_buft,
|
|
647
800
|
/* .offload_op = */ ggml_backend_metal_device_offload_op,
|
|
648
|
-
/* .event_new = */
|
|
649
|
-
/* .event_free = */
|
|
650
|
-
/* .event_synchronize = */
|
|
801
|
+
/* .event_new = */ ggml_backend_metal_device_event_new,
|
|
802
|
+
/* .event_free = */ ggml_backend_metal_device_event_free,
|
|
803
|
+
/* .event_synchronize = */ ggml_backend_metal_device_event_synchronize,
|
|
651
804
|
};
|
|
652
805
|
|
|
653
806
|
// backend registry
|
|
654
807
|
|
|
808
|
+
struct ggml_backend_metal_reg {
|
|
809
|
+
std::vector<ggml_backend_dev_t> devices;
|
|
810
|
+
};
|
|
811
|
+
|
|
812
|
+
typedef struct ggml_backend_metal_reg * ggml_backend_metal_reg_t;
|
|
813
|
+
|
|
814
|
+
static ggml_backend_metal_reg_t ggml_backend_metal_reg_init(void) {
|
|
815
|
+
ggml_backend_metal_reg_t ctx = new struct ggml_backend_metal_reg;
|
|
816
|
+
|
|
817
|
+
return ctx;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
static void ggml_backend_metal_reg_free(ggml_backend_metal_reg_t ctx) {
|
|
821
|
+
delete ctx;
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
struct ggml_backend_metal_reg_deleter {
|
|
825
|
+
void operator()(ggml_backend_metal_reg_t ctx) {
|
|
826
|
+
ggml_backend_metal_reg_free(ctx);
|
|
827
|
+
}
|
|
828
|
+
};
|
|
829
|
+
|
|
830
|
+
typedef std::unique_ptr<struct ggml_backend_metal_reg, ggml_backend_metal_reg_deleter> ggml_backend_metal_reg_ptr;
|
|
831
|
+
|
|
655
832
|
static const char * ggml_backend_metal_reg_get_name(ggml_backend_reg_t reg) {
|
|
656
|
-
return
|
|
833
|
+
return GGML_METAL_NAME;
|
|
657
834
|
|
|
658
835
|
GGML_UNUSED(reg);
|
|
659
836
|
}
|
|
660
837
|
|
|
661
838
|
static size_t ggml_backend_metal_reg_device_count(ggml_backend_reg_t reg) {
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
GGML_UNUSED(reg);
|
|
839
|
+
ggml_backend_metal_reg_t ctx = (ggml_backend_metal_reg_t)reg->context;
|
|
840
|
+
return ctx->devices.size();
|
|
665
841
|
}
|
|
666
842
|
|
|
667
843
|
static ggml_backend_dev_t ggml_backend_metal_reg_device_get(ggml_backend_reg_t reg, size_t index) {
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
return
|
|
671
|
-
|
|
672
|
-
GGML_UNUSED(reg);
|
|
673
|
-
GGML_UNUSED(index);
|
|
844
|
+
ggml_backend_metal_reg_t ctx = (ggml_backend_metal_reg_t)reg->context;
|
|
845
|
+
GGML_ASSERT(index < ctx->devices.size());
|
|
846
|
+
return ctx->devices[index];
|
|
674
847
|
}
|
|
675
848
|
|
|
676
849
|
static ggml_backend_feature g_ggml_backend_metal_features[] = {
|
|
@@ -698,27 +871,67 @@ static void * ggml_backend_metal_get_proc_address(ggml_backend_reg_t reg, const
|
|
|
698
871
|
|
|
699
872
|
static ggml_backend_reg_i ggml_backend_metal_reg_i = {
|
|
700
873
|
/* .get_name = */ ggml_backend_metal_reg_get_name,
|
|
701
|
-
/* .
|
|
702
|
-
/* .
|
|
874
|
+
/* .get_device_count = */ ggml_backend_metal_reg_device_count,
|
|
875
|
+
/* .get_device = */ ggml_backend_metal_reg_device_get,
|
|
703
876
|
/* .get_proc_address = */ ggml_backend_metal_get_proc_address,
|
|
704
877
|
};
|
|
705
878
|
|
|
879
|
+
static ggml_backend_dev_t ggml_backend_metal_device_init(ggml_backend_reg_t reg, int device) {
|
|
880
|
+
return new ggml_backend_device {
|
|
881
|
+
/* .iface = */ ggml_backend_metal_device_i,
|
|
882
|
+
/* .reg = */ reg,
|
|
883
|
+
/* .context = */ ggml_metal_device_get(device),
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
static void ggml_backend_metal_device_free(ggml_backend_dev_t dev) {
|
|
888
|
+
delete dev;
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
struct ggml_backend_device_deleter {
|
|
892
|
+
void operator()(ggml_backend_dev_t ctx) {
|
|
893
|
+
ggml_backend_metal_device_free(ctx);
|
|
894
|
+
}
|
|
895
|
+
};
|
|
896
|
+
|
|
897
|
+
typedef std::unique_ptr<ggml_backend_device, ggml_backend_device_deleter> ggml_backend_device_ptr;
|
|
898
|
+
|
|
706
899
|
ggml_backend_reg_t ggml_backend_metal_reg(void) {
|
|
900
|
+
static ggml_backend_reg reg;
|
|
901
|
+
static bool initialized = false;
|
|
902
|
+
|
|
707
903
|
{
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
904
|
+
static std::mutex mutex;
|
|
905
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
906
|
+
|
|
907
|
+
const char * env = getenv("GGML_METAL_DEVICES");
|
|
908
|
+
if (env) {
|
|
909
|
+
g_devices = atoi(env);
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
static std::vector<ggml_backend_device_ptr> devs;
|
|
913
|
+
|
|
914
|
+
if (!initialized) {
|
|
915
|
+
static ggml_backend_metal_reg_ptr reg_ctx(ggml_backend_metal_reg_init());
|
|
916
|
+
|
|
917
|
+
for (int i = 0; i < g_devices; ++i) {
|
|
918
|
+
auto * dev = ggml_backend_metal_device_init(®, i);
|
|
919
|
+
devs.emplace_back(dev);
|
|
920
|
+
|
|
921
|
+
reg_ctx->devices.push_back(dev);
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
reg = {
|
|
925
|
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
|
926
|
+
/* .iface = */ ggml_backend_metal_reg_i,
|
|
927
|
+
/* .context = */ reg_ctx.get(),
|
|
928
|
+
};
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
initialized = true;
|
|
719
932
|
}
|
|
720
933
|
|
|
721
|
-
return &
|
|
934
|
+
return ®
|
|
722
935
|
}
|
|
723
936
|
|
|
724
937
|
GGML_BACKEND_DL_IMPL(ggml_backend_metal_reg)
|