whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "apir_backend.gen.h"
|
|
4
|
+
|
|
5
|
+
#include <stdint.h> // for uintptr_t
|
|
6
|
+
#include <time.h> // for timespec, clock_gettime
|
|
7
|
+
|
|
8
|
+
#define APIR_BACKEND_INITIALIZE_SUCCESS 0
|
|
9
|
+
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1
|
|
10
|
+
#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2
|
|
11
|
+
#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3
|
|
12
|
+
#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4
|
|
13
|
+
#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5
|
|
14
|
+
#define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6
|
|
15
|
+
#define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7
|
|
16
|
+
#define APIR_BACKEND_INITIALIZE_NO_DEVICE 8
|
|
17
|
+
#define APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED 9
|
|
18
|
+
|
|
19
|
+
// new entries here need to be added to the apir_backend_initialize_error function below
|
|
20
|
+
|
|
21
|
+
#define APIR_BACKEND_FORWARD_INDEX_INVALID 6
|
|
22
|
+
|
|
23
|
+
// 0 is fast, 1 avoids the backend to crash if an unsupported tensor is received
|
|
24
|
+
#define APIR_BACKEND_CHECK_SUPPORTS_OP 0
|
|
25
|
+
|
|
26
|
+
typedef uintptr_t apir_buffer_type_host_handle_t;
|
|
27
|
+
typedef uintptr_t apir_buffer_host_handle_t;
|
|
28
|
+
|
|
29
|
+
static const char * apir_backend_initialize_error(int code) {
|
|
30
|
+
#define APIR_BACKEND_INITIALIZE_ERROR(code_name) \
|
|
31
|
+
do { \
|
|
32
|
+
if (code == code_name) \
|
|
33
|
+
return #code_name; \
|
|
34
|
+
} while (0)
|
|
35
|
+
|
|
36
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS);
|
|
37
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY);
|
|
38
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY);
|
|
39
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS);
|
|
40
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS);
|
|
41
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED);
|
|
42
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED);
|
|
43
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_ALREADY_INITED);
|
|
44
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_NO_DEVICE);
|
|
45
|
+
APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED);
|
|
46
|
+
|
|
47
|
+
return "Unknown APIR_BACKEND_INITIALIZE error:/";
|
|
48
|
+
|
|
49
|
+
#undef APIR_BACKEND_INITIALIZE_ERROR
|
|
50
|
+
}
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml-impl.h"
|
|
4
|
+
|
|
5
|
+
#include <cassert>
|
|
6
|
+
#include <cstring>
|
|
7
|
+
|
|
8
|
+
#define likely(x) __builtin_expect(!!(x), 1)
|
|
9
|
+
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
10
|
+
|
|
11
|
+
struct apir_encoder {
|
|
12
|
+
char * cur;
|
|
13
|
+
const char * start;
|
|
14
|
+
const char * end;
|
|
15
|
+
bool fatal;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
struct apir_decoder {
|
|
19
|
+
const char * cur;
|
|
20
|
+
const char * end;
|
|
21
|
+
bool fatal;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/*
|
|
25
|
+
* new encoder and decoder
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
static apir_decoder apir_new_decoder(const char * ptr, size_t size) {
|
|
29
|
+
apir_decoder dec = {
|
|
30
|
+
.cur = ptr,
|
|
31
|
+
.end = ptr + size,
|
|
32
|
+
.fatal = false,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
return dec;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
static apir_encoder apir_new_encoder(char * ptr, size_t size) {
|
|
39
|
+
apir_encoder enc = {
|
|
40
|
+
.cur = ptr,
|
|
41
|
+
.start = ptr,
|
|
42
|
+
.end = ptr + size,
|
|
43
|
+
.fatal = false,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
return enc;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/*
|
|
50
|
+
* fatal flag handling
|
|
51
|
+
*/
|
|
52
|
+
|
|
53
|
+
static inline void apir_encoder_reset_fatal(apir_encoder * enc) {
|
|
54
|
+
enc->fatal = false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
static inline void apir_encoder_set_fatal(apir_encoder * enc) {
|
|
58
|
+
enc->fatal = true;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
static inline bool apir_encoder_get_fatal(const apir_encoder * enc) {
|
|
62
|
+
return enc->fatal;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static inline void apir_decoder_reset_fatal(apir_decoder * dec) {
|
|
66
|
+
dec->fatal = false;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
static inline void apir_decoder_set_fatal(apir_decoder * dec) {
|
|
70
|
+
dec->fatal = true;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
static inline bool apir_decoder_get_fatal(const apir_decoder * dec) {
|
|
74
|
+
return dec->fatal;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/*
|
|
78
|
+
* encode peek
|
|
79
|
+
*/
|
|
80
|
+
|
|
81
|
+
static inline bool apir_decoder_peek_internal(apir_decoder * dec, size_t size, void * val, size_t val_size) {
|
|
82
|
+
assert(val_size <= size);
|
|
83
|
+
|
|
84
|
+
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
|
|
85
|
+
GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
|
|
86
|
+
apir_decoder_set_fatal(dec);
|
|
87
|
+
memset(val, 0, val_size);
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/* we should not rely on the compiler to optimize away memcpy... */
|
|
92
|
+
memcpy(val, dec->cur, val_size);
|
|
93
|
+
return true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val, size_t val_size) {
|
|
97
|
+
apir_decoder_peek_internal(dec, size, val, val_size);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) {
|
|
101
|
+
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
|
|
102
|
+
GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
|
|
103
|
+
apir_decoder_set_fatal(dec);
|
|
104
|
+
return NULL;
|
|
105
|
+
}
|
|
106
|
+
const void * addr = dec->cur;
|
|
107
|
+
dec->cur += size;
|
|
108
|
+
|
|
109
|
+
return addr;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/*
|
|
113
|
+
* read/write
|
|
114
|
+
*/
|
|
115
|
+
|
|
116
|
+
static inline void apir_decoder_read(apir_decoder * dec, size_t size, void * val, size_t val_size) {
|
|
117
|
+
if (apir_decoder_peek_internal(dec, size, val, val_size)) {
|
|
118
|
+
dec->cur += size;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
static inline char * apir_encoder_write(apir_encoder * enc, size_t size, const void * val, size_t val_size) {
|
|
123
|
+
assert(val_size <= size);
|
|
124
|
+
assert(size <= ((size_t) (enc->end - enc->cur)));
|
|
125
|
+
|
|
126
|
+
char * write_addr = enc->cur;
|
|
127
|
+
/* we should not rely on the compiler to optimize away memcpy... */
|
|
128
|
+
memcpy(write_addr, val, val_size);
|
|
129
|
+
enc->cur += size;
|
|
130
|
+
|
|
131
|
+
return write_addr;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/*
|
|
135
|
+
* encode/decode
|
|
136
|
+
*/
|
|
137
|
+
|
|
138
|
+
static inline void apir_decode(apir_decoder * dec, size_t size, void * data, size_t data_size) {
|
|
139
|
+
assert(size % 4 == 0);
|
|
140
|
+
apir_decoder_read(dec, size, data, data_size);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
static inline void apir_encode(apir_encoder * enc, size_t size, const void * data, size_t data_size) {
|
|
144
|
+
assert(size % 4 == 0);
|
|
145
|
+
apir_encoder_write(enc, size, data, data_size);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/*
|
|
149
|
+
* typed encode/decode
|
|
150
|
+
*/
|
|
151
|
+
|
|
152
|
+
/* uint8_t */
|
|
153
|
+
|
|
154
|
+
static inline void apir_encode_uint8_t(apir_encoder * enc, const uint8_t * val) {
|
|
155
|
+
apir_encode(enc, sizeof(int), val, sizeof(*val));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
static inline void apir_decode_uint8_t(apir_decoder * dec, uint8_t * val) {
|
|
159
|
+
apir_decode(dec, sizeof(int), val, sizeof(*val));
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/* uint64_t */
|
|
163
|
+
|
|
164
|
+
static inline void apir_encode_uint64_t(apir_encoder * enc, const uint64_t * val) {
|
|
165
|
+
apir_encode(enc, 8, val, sizeof(*val));
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
static inline void apir_decode_uint64_t(apir_decoder * dec, uint64_t * val) {
|
|
169
|
+
apir_decode(dec, 8, val, sizeof(*val));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
static inline void apir_encode_uint64_t_array(apir_encoder * enc, const uint64_t * val, uint32_t count) {
|
|
173
|
+
const size_t size = sizeof(*val) * count;
|
|
174
|
+
assert(size >= count);
|
|
175
|
+
apir_encode(enc, size, val, size);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
static inline void apir_decode_uint64_t_array(apir_decoder * dec, uint64_t * val, uint32_t count) {
|
|
179
|
+
const size_t size = sizeof(*val) * count;
|
|
180
|
+
assert(size >= count);
|
|
181
|
+
apir_decode(dec, size, val, size);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
static inline const uint64_t * apir_decode_uint64_t_array_inplace(apir_decoder * dec, uint32_t count) {
|
|
185
|
+
return (uint64_t *) (uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t));
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/* int32_t */
|
|
189
|
+
|
|
190
|
+
static inline void apir_encode_int32_t(apir_encoder * enc, const int32_t * val) {
|
|
191
|
+
apir_encode(enc, 4, val, sizeof(*val));
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
static inline void apir_decode_int32_t(apir_decoder * dec, int32_t * val) {
|
|
195
|
+
apir_decode(dec, 4, val, sizeof(*val));
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static inline void apir_encode_int32_t_array(apir_encoder * enc, const int32_t * val, uint32_t count) {
|
|
199
|
+
const size_t size = sizeof(*val) * count;
|
|
200
|
+
assert(size >= count);
|
|
201
|
+
apir_encode(enc, size, val, size);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static inline void apir_decode_int32_t_array(apir_decoder * dec, int32_t * val, uint32_t count) {
|
|
205
|
+
const size_t size = sizeof(*val) * count;
|
|
206
|
+
assert(size >= count);
|
|
207
|
+
apir_decode(dec, size, val, size);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/* array size (uint64_t) */
|
|
211
|
+
|
|
212
|
+
static inline void apir_encode_array_size(apir_encoder * enc, uint64_t size) {
|
|
213
|
+
apir_encode_uint64_t(enc, &size);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expected_size) {
|
|
217
|
+
uint64_t size;
|
|
218
|
+
apir_decode_uint64_t(dec, &size);
|
|
219
|
+
if (size != expected_size) {
|
|
220
|
+
GGML_LOG_ERROR("%s: Couldn't decode array from the decoder\n", __func__);
|
|
221
|
+
apir_decoder_set_fatal(dec);
|
|
222
|
+
size = 0;
|
|
223
|
+
}
|
|
224
|
+
return size;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
static inline uint64_t apir_decode_array_size_unchecked(apir_decoder * dec) {
|
|
228
|
+
uint64_t size;
|
|
229
|
+
apir_decode_uint64_t(dec, &size);
|
|
230
|
+
return size;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/* non-array pointer */
|
|
234
|
+
|
|
235
|
+
static inline bool apir_encode_simple_pointer(apir_encoder * enc, const void * val) {
|
|
236
|
+
apir_encode_array_size(enc, val ? 1 : 0);
|
|
237
|
+
return val;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
static inline bool apir_decode_simple_pointer(apir_decoder * dec) {
|
|
241
|
+
return apir_decode_array_size_unchecked(dec);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/* uint32_t */
|
|
245
|
+
|
|
246
|
+
static inline void apir_encode_uint32_t(apir_encoder * enc, const uint32_t * val) {
|
|
247
|
+
apir_encode(enc, 4, val, sizeof(*val));
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
static inline void apir_decode_uint32_t(apir_decoder * dec, uint32_t * val) {
|
|
251
|
+
apir_decode(dec, 4, val, sizeof(*val));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
static inline void apir_encode_uint32_t_array(apir_encoder * enc, const uint32_t * val, uint32_t count) {
|
|
255
|
+
const size_t size = sizeof(*val) * count;
|
|
256
|
+
assert(size >= count);
|
|
257
|
+
apir_encode(enc, size, val, size);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
static inline void apir_decode_uint32_t_array(apir_decoder * dec, uint32_t * val, uint32_t count) {
|
|
261
|
+
const size_t size = sizeof(*val) * count;
|
|
262
|
+
assert(size >= count);
|
|
263
|
+
apir_decode(dec, size, val, size);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/* size_t */
|
|
267
|
+
|
|
268
|
+
static inline void apir_encode_size_t(apir_encoder * enc, const size_t * val) {
|
|
269
|
+
const uint64_t tmp = *val;
|
|
270
|
+
apir_encode_uint64_t(enc, &tmp);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
static inline void apir_decode_size_t(apir_decoder * dec, size_t * val) {
|
|
274
|
+
uint64_t tmp;
|
|
275
|
+
apir_decode_uint64_t(dec, &tmp);
|
|
276
|
+
*val = tmp;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
static inline void apir_encode_size_t_array(apir_encoder * enc, const size_t * val, uint32_t count) {
|
|
280
|
+
if (sizeof(size_t) == sizeof(uint64_t)) {
|
|
281
|
+
apir_encode_uint64_t_array(enc, (const uint64_t *) val, count);
|
|
282
|
+
} else {
|
|
283
|
+
for (uint32_t i = 0; i < count; i++) {
|
|
284
|
+
apir_encode_size_t(enc, &val[i]);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
static inline void apir_decode_size_t_array(apir_decoder * dec, size_t * val, uint32_t count) {
|
|
290
|
+
if (sizeof(size_t) == sizeof(uint64_t)) {
|
|
291
|
+
apir_decode_uint64_t_array(dec, (uint64_t *) val, count);
|
|
292
|
+
} else {
|
|
293
|
+
for (uint32_t i = 0; i < count; i++) {
|
|
294
|
+
apir_decode_size_t(dec, &val[i]);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/* opaque blob */
|
|
300
|
+
|
|
301
|
+
static inline void apir_encode_blob_array(apir_encoder * enc, const void * val, size_t size) {
|
|
302
|
+
apir_encode(enc, (size + 3) & ~3, val, size);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
static inline void apir_decode_blob_array(apir_decoder * dec, void * val, size_t size) {
|
|
306
|
+
apir_decode(dec, (size + 3) & ~3, val, size);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/* string */
|
|
310
|
+
|
|
311
|
+
static inline void apir_encode_char_array(apir_encoder * enc, const char * val, size_t size) {
|
|
312
|
+
assert(size && strlen(val) < size);
|
|
313
|
+
apir_encode_blob_array(enc, val, size);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t size) {
|
|
317
|
+
apir_decode_blob_array(dec, val, size);
|
|
318
|
+
if (size) {
|
|
319
|
+
val[size - 1] = '\0';
|
|
320
|
+
} else {
|
|
321
|
+
GGML_LOG_ERROR("%s: Couldn't decode the blog array\n", __func__);
|
|
322
|
+
apir_decoder_set_fatal(dec);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/* (temp) buffer allocation */
|
|
327
|
+
|
|
328
|
+
static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
|
|
329
|
+
size_t alloc_size;
|
|
330
|
+
if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
|
|
331
|
+
GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", __func__, size, count);
|
|
332
|
+
return NULL;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
return malloc(alloc_size);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/* bool */
|
|
339
|
+
|
|
340
|
+
static inline void apir_encode_bool_t(apir_encoder * enc, const bool * val) {
|
|
341
|
+
apir_encode(enc, sizeof(int), val, sizeof(bool));
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) {
|
|
345
|
+
apir_decode(dec, sizeof(int), val, sizeof(bool));
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/* apir_buffer_type_host_handle_t */
|
|
349
|
+
|
|
350
|
+
static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc,
|
|
351
|
+
const apir_buffer_type_host_handle_t * val) {
|
|
352
|
+
apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec,
|
|
356
|
+
apir_buffer_type_host_handle_t * val) {
|
|
357
|
+
apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/* apir_buffer_host_handle_t */
|
|
361
|
+
|
|
362
|
+
static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, const apir_buffer_host_handle_t * val) {
|
|
363
|
+
apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
static inline void apir_decode_apir_buffer_host_handle_t(apir_decoder * dec, apir_buffer_host_handle_t * val) {
|
|
367
|
+
apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/* uintptr_t */
|
|
371
|
+
|
|
372
|
+
static inline void apir_encode_uintptr_t(apir_encoder * enc, const uintptr_t * val) {
|
|
373
|
+
apir_encode(enc, sizeof(*val), val, sizeof(*val));
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
static inline void apir_decode_uintptr_t(apir_decoder * dec, uintptr_t * val) {
|
|
377
|
+
apir_decode(dec, sizeof(*val), val, sizeof(*val));
|
|
378
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
#include "apir_cs.h"
|
|
2
|
+
#include "apir_cs_rpc.h"
|
|
3
|
+
#include "ggml-impl.h"
|
|
4
|
+
|
|
5
|
+
// ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer);
|
|
6
|
+
|
|
7
|
+
static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle);
|
|
8
|
+
|
|
9
|
+
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec);
|
|
10
|
+
|
|
11
|
+
/* apir_rpc_tensor */
|
|
12
|
+
|
|
13
|
+
static inline void apir_encode_rcp_tensor(apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) {
|
|
14
|
+
size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor);
|
|
15
|
+
apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder * dec) {
|
|
19
|
+
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor);
|
|
20
|
+
|
|
21
|
+
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, uint32_t n_tensors) {
|
|
25
|
+
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors;
|
|
26
|
+
|
|
27
|
+
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/* ggml_tensor */
|
|
31
|
+
|
|
32
|
+
static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor * tensor) {
|
|
33
|
+
apir_rpc_tensor serialized = apir_serialize_tensor(tensor);
|
|
34
|
+
|
|
35
|
+
apir_encode_rcp_tensor(enc, &serialized);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
|
|
39
|
+
const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);
|
|
40
|
+
|
|
41
|
+
if (!apir_rpc_tensor) {
|
|
42
|
+
return NULL;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
ggml_init_params params{
|
|
46
|
+
/*.mem_size =*/ggml_tensor_overhead(),
|
|
47
|
+
/*.mem_buffer =*/NULL,
|
|
48
|
+
/*.no_alloc =*/true,
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
ggml_context * ctx = ggml_init(params);
|
|
52
|
+
|
|
53
|
+
const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
|
|
54
|
+
|
|
55
|
+
return tensor;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/* *** ggml_backend_buffer_type_t *** */
|
|
59
|
+
|
|
60
|
+
// ggml_backend_buffer_type_t is a POINTER (to a struct).
|
|
61
|
+
// Only the host pointer is shared between the host and guest.
|
|
62
|
+
// The guest stores it in `buft->context`.
|
|
63
|
+
// The host simply writes the pointer address in the buffer variable.
|
|
64
|
+
|
|
65
|
+
static inline void apir_encode_ggml_buffer_type(apir_encoder * enc, ggml_backend_buffer_type_t buft) {
|
|
66
|
+
apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft);
|
|
67
|
+
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decoder * dec) {
|
|
71
|
+
apir_buffer_type_host_handle_t handle;
|
|
72
|
+
|
|
73
|
+
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
|
|
74
|
+
|
|
75
|
+
return (ggml_backend_buffer_type_t) handle;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) {
|
|
79
|
+
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
|
|
83
|
+
apir_buffer_type_host_handle_t handle;
|
|
84
|
+
|
|
85
|
+
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
|
|
86
|
+
|
|
87
|
+
return handle;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/* *** ggml_backend_type_t *** */
|
|
91
|
+
|
|
92
|
+
// ggml_backend_buffer_t is a POINTER.
|
|
93
|
+
// same logic as for ggml_backend_buffer_type_t
|
|
94
|
+
|
|
95
|
+
static inline void apir_encode_ggml_buffer(apir_encoder * enc, const ggml_backend_buffer_t buffer) {
|
|
96
|
+
apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer);
|
|
97
|
+
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec) {
|
|
101
|
+
ggml_backend_buffer_t buffer;
|
|
102
|
+
size_t buffer_ptr_size = sizeof(buffer);
|
|
103
|
+
|
|
104
|
+
apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size);
|
|
105
|
+
|
|
106
|
+
// SECURITY: Validate buffer handle against tracked buffers to prevent
|
|
107
|
+
// guest VM from providing arbitrary host memory addresses
|
|
108
|
+
if (buffer) {
|
|
109
|
+
extern std::unordered_set<ggml_backend_buffer_t> backend_buffers;
|
|
110
|
+
if (backend_buffers.find(buffer) == backend_buffers.end()) {
|
|
111
|
+
GGML_LOG_WARN("ggml-virtgpu-backend: %s: Invalid buffer handle from guest: %p\n", __func__,
|
|
112
|
+
(void *) buffer);
|
|
113
|
+
// Set fatal flag to prevent further processing with invalid handle
|
|
114
|
+
apir_decoder_set_fatal(dec);
|
|
115
|
+
return NULL;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return buffer;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/* enum ggml_status */
|
|
123
|
+
|
|
124
|
+
static inline void apir_encode_ggml_status(apir_encoder * enc, const ggml_status * status) {
|
|
125
|
+
apir_encoder_write(enc, sizeof(*status), status, sizeof(*status));
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
static inline void apir_decode_ggml_status(apir_decoder * dec, ggml_status * status) {
|
|
129
|
+
apir_decoder_read(dec, sizeof(*status), status, sizeof(*status));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/* virtgpu_shmem */
|
|
133
|
+
|
|
134
|
+
static inline void apir_encode_virtgpu_shmem_res_id(apir_encoder * enc, uint32_t shmem_res_id) {
|
|
135
|
+
apir_encode_uint32_t(enc, &shmem_res_id);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
static inline void apir_decode_virtgpu_shmem_res_id(apir_decoder * dec, uint32_t * shmem_res_id) {
|
|
139
|
+
apir_decode_uint32_t(dec, shmem_res_id);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/* ggml_cgraph */
|
|
143
|
+
|
|
144
|
+
static inline size_t apir_serialize_ggml_cgraph(ggml_cgraph * cgraph, std::vector<uint8_t> & cgraph_data) {
|
|
145
|
+
apir_serialize_graph(cgraph, cgraph_data);
|
|
146
|
+
|
|
147
|
+
return cgraph_data.size();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
static inline void apir_encode_cgraph_data(apir_encoder * enc, std::vector<uint8_t> & cgraph_data) {
|
|
151
|
+
size_t cgraph_size = cgraph_data.size();
|
|
152
|
+
|
|
153
|
+
apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
static inline ggml_cgraph * apir_decode_ggml_cgraph(apir_decoder * dec, size_t cgraph_size) {
|
|
157
|
+
GGML_UNUSED(cgraph_size);
|
|
158
|
+
|
|
159
|
+
uint32_t n_nodes;
|
|
160
|
+
apir_decode_uint32_t(dec, &n_nodes);
|
|
161
|
+
const uint64_t * nodes = apir_decode_uint64_t_array_inplace(dec, n_nodes);
|
|
162
|
+
|
|
163
|
+
uint32_t n_tensors;
|
|
164
|
+
apir_decode_uint32_t(dec, &n_tensors);
|
|
165
|
+
const apir_rpc_tensor * tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors);
|
|
166
|
+
|
|
167
|
+
return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
static inline void apir_encode_ggml_buffer_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle) {
|
|
171
|
+
apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle));
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml_tensor * tensor) {
|
|
175
|
+
size_t tensor_size = sizeof(*tensor);
|
|
176
|
+
|
|
177
|
+
if (tensor->extra) {
|
|
178
|
+
GGML_ABORT("%s: Cannot pass tensors with extra", __func__);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (tensor->src[0] && tensor->buffer) {
|
|
182
|
+
static int first = 1;
|
|
183
|
+
if (first) {
|
|
184
|
+
GGML_LOG_WARN("%s: Cannot pass tensors with src and buffer\n", __func__);
|
|
185
|
+
first = 0;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
apir_encoder_write(enc, tensor_size, tensor, tensor_size);
|
|
190
|
+
|
|
191
|
+
// tensor->data is a pointer inside the device buffer. No need to touch it
|
|
192
|
+
// tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence.
|
|
193
|
+
// (could also make a copy of the tensor, and update locally.)
|
|
194
|
+
|
|
195
|
+
if (tensor->buffer) {
|
|
196
|
+
apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle(tensor->buffer);
|
|
197
|
+
apir_encode_ggml_buffer_handle(enc, &buffer_handle);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (tensor->view_src) {
|
|
201
|
+
apir_encoder_write(enc, tensor_size, tensor->view_src, tensor_size);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
for (int i = 0; tensor->src[i]; i++) {
|
|
205
|
+
const ggml_tensor * tensor_src = tensor->src[i];
|
|
206
|
+
apir_encoder_write(enc, tensor_size, tensor_src, tensor_size);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(apir_decoder * dec) {
|
|
211
|
+
// it safe to remove the `const` qualifier here, we *do* want to
|
|
212
|
+
// modify the shared memory data to fix the `src` pointers.
|
|
213
|
+
ggml_tensor * tensor = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
|
214
|
+
|
|
215
|
+
// tensor->data is a pointer inside the device buffer. No need to touch it
|
|
216
|
+
// tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence.
|
|
217
|
+
if (tensor->buffer) {
|
|
218
|
+
tensor->buffer = apir_decode_ggml_buffer(dec);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (tensor->view_src) {
|
|
222
|
+
ggml_tensor * tensor_view_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
|
223
|
+
tensor->view_src = tensor_view_src;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
for (int i = 0; tensor->src[i]; i++) {
|
|
227
|
+
ggml_tensor * tensor_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
|
228
|
+
tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return tensor;
|
|
232
|
+
}
|