whispercpp 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +99 -2
- data/ext/extconf.rb +1 -0
- data/ext/ruby_whisper.c +20 -4
- data/ext/ruby_whisper.h +30 -2
- data/ext/ruby_whisper_context.c +216 -124
- data/ext/ruby_whisper_context_params.c +163 -0
- data/ext/ruby_whisper_model.c +0 -1
- data/ext/ruby_whisper_params.c +0 -1
- data/ext/ruby_whisper_segment.c +0 -1
- data/ext/ruby_whisper_token.c +29 -9
- data/ext/ruby_whisper_transcribe.cpp +4 -1
- data/ext/ruby_whisper_vad_context.c +48 -1
- data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
- data/ext/ruby_whisper_vad_params.c +0 -1
- data/ext/ruby_whisper_vad_segment.c +0 -1
- data/ext/ruby_whisper_vad_segments.c +0 -1
- data/ext/sources/CMakeLists.txt +1 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/cmake/whisper-config.cmake.in +5 -40
- data/ext/sources/examples/bench/bench.cpp +23 -18
- data/ext/sources/examples/cli/cli.cpp +8 -0
- data/ext/sources/examples/common-ggml.cpp +2 -0
- data/ext/sources/examples/miniaudio.h +4507 -2131
- data/ext/sources/examples/server/server.cpp +18 -4
- data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
- data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
- data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
- data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
- data/ext/sources/examples/talk-llama/llama-context.h +27 -28
- data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
- data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
- data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
- data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
- data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
- data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
- data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
- data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
- data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
- data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
- data/ext/sources/examples/talk-llama/llama-model.h +72 -19
- data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
- data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
- data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
- data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
- data/ext/sources/examples/talk-llama/llama.cpp +76 -22
- data/ext/sources/examples/talk-llama/llama.h +63 -30
- data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
- data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
- data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
- data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
- data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
- data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
- data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
- data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
- data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
- data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
- data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
- data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
- data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
- data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
- data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
- data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
- data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
- data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
- data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
- data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
- data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
- data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
- data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/models.h +181 -46
- data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
- data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
- data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
- data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
- data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
- data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
- data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
- data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
- data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
- data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
- data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
- data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
- data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
- data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
- data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
- data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
- data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
- data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
- data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
- data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
- data/ext/sources/ggml/CMakeLists.txt +9 -3
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cann.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +5 -0
- data/ext/sources/ggml/include/ggml-openvino.h +37 -0
- data/ext/sources/ggml/include/ggml-opt.h +1 -1
- data/ext/sources/ggml/include/ggml-rpc.h +6 -1
- data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
- data/ext/sources/ggml/include/ggml.h +56 -9
- data/ext/sources/ggml/src/CMakeLists.txt +3 -0
- data/ext/sources/ggml/src/ggml-alloc.c +4 -9
- data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
- data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
- data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
- data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
- data/ext/sources/ggml/src/ggml-common.h +11 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
- data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
- data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
- data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
- data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
- data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
- data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
- data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
- data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
- data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
- data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
- data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
- data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
- data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
- data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
- data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
- data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
- data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
- data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
- data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
- data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
- data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
- data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
- data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
- data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
- data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
- data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
- data/ext/sources/ggml/src/ggml-impl.h +62 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
- data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
- data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
- data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
- data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
- data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
- data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
- data/ext/sources/ggml/src/ggml-quants.c +96 -5
- data/ext/sources/ggml/src/ggml-quants.h +3 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
- data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
- data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
- data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
- data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
- data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
- data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
- data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
- data/ext/sources/ggml/src/ggml.c +167 -33
- data/ext/sources/ggml/src/gguf.cpp +229 -44
- data/ext/sources/src/whisper.cpp +6 -28
- data/sig/whisper.rbs +43 -2
- data/test/test_context_params.rb +82 -0
- data/test/test_token.rb +11 -0
- data/test/test_vad_context.rb +58 -8
- data/test/test_whisper.rb +20 -0
- data/whispercpp.gemspec +1 -1
- metadata +240 -28
- data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
- data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
- data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
- data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
- data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
// sample drv interface
|
|
2
|
+
|
|
3
|
+
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
|
4
|
+
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
5
|
+
#pragma clang diagnostic ignored "-Wsign-compare"
|
|
6
|
+
|
|
7
|
+
#include <filesystem>
|
|
8
|
+
#include <set>
|
|
9
|
+
#include <sstream>
|
|
10
|
+
#include <string>
|
|
11
|
+
#ifdef _WIN32
|
|
12
|
+
# define WIN32_LEAN_AND_MEAN
|
|
13
|
+
# ifndef NOMINMAX
|
|
14
|
+
# define NOMINMAX
|
|
15
|
+
# endif
|
|
16
|
+
# include <windows.h>
|
|
17
|
+
# include <winevt.h>
|
|
18
|
+
#else
|
|
19
|
+
# include <dlfcn.h>
|
|
20
|
+
# include <unistd.h>
|
|
21
|
+
#endif
|
|
22
|
+
#include "ggml-impl.h"
|
|
23
|
+
#include "htp-drv.h"
|
|
24
|
+
#include "libdl.h"
|
|
25
|
+
|
|
26
|
+
#include <domain.h>
|
|
27
|
+
|
|
28
|
+
//
|
|
29
|
+
// Driver API types
|
|
30
|
+
//
|
|
31
|
+
|
|
32
|
+
typedef void * (*rpcmem_alloc_pfn_t)(int heapid, uint32_t flags, int size);
|
|
33
|
+
typedef void * (*rpcmem_alloc2_pfn_t)(int heapid, uint32_t flags, size_t size);
|
|
34
|
+
typedef void (*rpcmem_free_pfn_t)(void * po);
|
|
35
|
+
typedef int (*rpcmem_to_fd_pfn_t)(void * po);
|
|
36
|
+
|
|
37
|
+
typedef AEEResult (*dspqueue_create_pfn_t)(int domain,
|
|
38
|
+
uint32_t flags,
|
|
39
|
+
uint32_t req_queue_size,
|
|
40
|
+
uint32_t resp_queue_size,
|
|
41
|
+
dspqueue_callback_t packet_callback,
|
|
42
|
+
dspqueue_callback_t error_callback,
|
|
43
|
+
void * callback_context,
|
|
44
|
+
dspqueue_t * queue);
|
|
45
|
+
typedef AEEResult (*dspqueue_close_pfn_t)(dspqueue_t queue);
|
|
46
|
+
typedef AEEResult (*dspqueue_export_pfn_t)(dspqueue_t queue, uint64_t *queue_id);
|
|
47
|
+
typedef AEEResult (*dspqueue_write_pfn_t)(dspqueue_t queue, uint32_t flags,
|
|
48
|
+
uint32_t num_buffers,
|
|
49
|
+
struct dspqueue_buffer *buffers,
|
|
50
|
+
uint32_t message_length,
|
|
51
|
+
const uint8_t *message,
|
|
52
|
+
uint32_t timeout_us);
|
|
53
|
+
typedef AEEResult (*dspqueue_read_pfn_t)(dspqueue_t queue, uint32_t *flags,
|
|
54
|
+
uint32_t max_buffers, uint32_t *num_buffers,
|
|
55
|
+
struct dspqueue_buffer *buffers,
|
|
56
|
+
uint32_t max_message_length,
|
|
57
|
+
uint32_t *message_length, uint8_t *message,
|
|
58
|
+
uint32_t timeout_us);
|
|
59
|
+
|
|
60
|
+
typedef int (*fastrpc_mmap_pfn_t)(int domain, int fd, void *addr, int offset, size_t length, enum fastrpc_map_flags flags);
|
|
61
|
+
typedef int (*fastrpc_munmap_pfn_t)(int domain, int fd, void *addr, size_t length);
|
|
62
|
+
|
|
63
|
+
typedef int (*remote_handle64_open_pfn_t)(const char* name, remote_handle64 *ph);
|
|
64
|
+
typedef int (*remote_handle64_invoke_pfn_t)(remote_handle64 h, uint32_t dwScalars, remote_arg *pra);
|
|
65
|
+
typedef int (*remote_handle64_close_pfn_t)(remote_handle h);
|
|
66
|
+
typedef int (*remote_handle_control_pfn_t)(uint32_t req, void* data, uint32_t datalen);
|
|
67
|
+
typedef int (*remote_handle64_control_pfn_t)(remote_handle64 h, uint32_t req, void* data, uint32_t datalen);
|
|
68
|
+
typedef int (*remote_session_control_pfn_t)(uint32_t req, void *data, uint32_t datalen);
|
|
69
|
+
|
|
70
|
+
//
|
|
71
|
+
// Driver API pfns
|
|
72
|
+
//
|
|
73
|
+
|
|
74
|
+
rpcmem_alloc_pfn_t rpcmem_alloc_pfn = nullptr;
|
|
75
|
+
rpcmem_alloc2_pfn_t rpcmem_alloc2_pfn = nullptr;
|
|
76
|
+
rpcmem_free_pfn_t rpcmem_free_pfn = nullptr;
|
|
77
|
+
rpcmem_to_fd_pfn_t rpcmem_to_fd_pfn = nullptr;
|
|
78
|
+
|
|
79
|
+
fastrpc_mmap_pfn_t fastrpc_mmap_pfn = nullptr;
|
|
80
|
+
fastrpc_munmap_pfn_t fastrpc_munmap_pfn = nullptr;
|
|
81
|
+
|
|
82
|
+
dspqueue_create_pfn_t dspqueue_create_pfn = nullptr;
|
|
83
|
+
dspqueue_close_pfn_t dspqueue_close_pfn = nullptr;
|
|
84
|
+
dspqueue_export_pfn_t dspqueue_export_pfn = nullptr;
|
|
85
|
+
dspqueue_write_pfn_t dspqueue_write_pfn = nullptr;
|
|
86
|
+
dspqueue_read_pfn_t dspqueue_read_pfn = nullptr;
|
|
87
|
+
|
|
88
|
+
remote_handle64_open_pfn_t remote_handle64_open_pfn = nullptr;
|
|
89
|
+
remote_handle64_invoke_pfn_t remote_handle64_invoke_pfn = nullptr;
|
|
90
|
+
remote_handle64_close_pfn_t remote_handle64_close_pfn = nullptr;
|
|
91
|
+
remote_handle_control_pfn_t remote_handle_control_pfn = nullptr;
|
|
92
|
+
remote_handle64_control_pfn_t remote_handle64_control_pfn = nullptr;
|
|
93
|
+
remote_session_control_pfn_t remote_session_control_pfn = nullptr;
|
|
94
|
+
|
|
95
|
+
//
|
|
96
|
+
// Driver API
|
|
97
|
+
//
|
|
98
|
+
|
|
99
|
+
void * rpcmem_alloc(int heapid, uint32_t flags, int size) {
|
|
100
|
+
return rpcmem_alloc_pfn(heapid, flags, size);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
void * rpcmem_alloc2(int heapid, uint32_t flags, size_t size) {
|
|
104
|
+
if (rpcmem_alloc2_pfn) {
|
|
105
|
+
return rpcmem_alloc2_pfn(heapid, flags, size);
|
|
106
|
+
} else {
|
|
107
|
+
GGML_LOG_INFO("ggml-hex: rpcmem_alloc2 not found, falling back to rpcmem_alloc\n");
|
|
108
|
+
return rpcmem_alloc_pfn(heapid, flags, size);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
void rpcmem_free(void * po) {
|
|
113
|
+
return rpcmem_free_pfn(po);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
int rpcmem_to_fd(void * po) {
|
|
117
|
+
return rpcmem_to_fd_pfn(po);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
HTPDRV_API int fastrpc_mmap(int domain, int fd, void * addr, int offset, size_t length, enum fastrpc_map_flags flags) {
|
|
121
|
+
return fastrpc_mmap_pfn(domain, fd, addr, offset, length, flags);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
HTPDRV_API int fastrpc_munmap(int domain, int fd, void * addr, size_t length) {
|
|
125
|
+
return fastrpc_munmap_pfn(domain, fd, addr, length);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
AEEResult dspqueue_create(int domain,
|
|
129
|
+
uint32_t flags,
|
|
130
|
+
uint32_t req_queue_size,
|
|
131
|
+
uint32_t resp_queue_size,
|
|
132
|
+
dspqueue_callback_t packet_callback,
|
|
133
|
+
dspqueue_callback_t error_callback,
|
|
134
|
+
void * callback_context,
|
|
135
|
+
dspqueue_t * queue) {
|
|
136
|
+
return dspqueue_create_pfn(domain, flags, req_queue_size, resp_queue_size, packet_callback, error_callback,
|
|
137
|
+
callback_context, queue);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
AEEResult dspqueue_close(dspqueue_t queue) {
|
|
141
|
+
return dspqueue_close_pfn(queue);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
AEEResult dspqueue_export(dspqueue_t queue, uint64_t * queue_id) {
|
|
145
|
+
return dspqueue_export_pfn(queue, queue_id);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
AEEResult dspqueue_write(dspqueue_t queue,
|
|
149
|
+
uint32_t flags,
|
|
150
|
+
uint32_t num_buffers,
|
|
151
|
+
struct dspqueue_buffer * buffers,
|
|
152
|
+
uint32_t message_length,
|
|
153
|
+
const uint8_t * message,
|
|
154
|
+
uint32_t timeout_us) {
|
|
155
|
+
return dspqueue_write_pfn(queue, flags, num_buffers, buffers, message_length, message, timeout_us);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
AEEResult dspqueue_read(dspqueue_t queue,
|
|
159
|
+
uint32_t * flags,
|
|
160
|
+
uint32_t max_buffers,
|
|
161
|
+
uint32_t * num_buffers,
|
|
162
|
+
struct dspqueue_buffer * buffers,
|
|
163
|
+
uint32_t max_message_length,
|
|
164
|
+
uint32_t * message_length,
|
|
165
|
+
uint8_t * message,
|
|
166
|
+
uint32_t timeout_us) {
|
|
167
|
+
return dspqueue_read_pfn(queue, flags, max_buffers, num_buffers, buffers, max_message_length, message_length,
|
|
168
|
+
message, timeout_us);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
HTPDRV_API int remote_handle64_open(const char * name, remote_handle64 * ph) {
|
|
172
|
+
return remote_handle64_open_pfn(name, ph);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
HTPDRV_API int remote_handle64_invoke(remote_handle64 h, uint32_t dwScalars, remote_arg * pra) {
|
|
176
|
+
return remote_handle64_invoke_pfn(h, dwScalars, pra);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
HTPDRV_API int remote_handle64_close(remote_handle64 h) {
|
|
180
|
+
return remote_handle64_close_pfn(h);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
HTPDRV_API int remote_handle_control(uint32_t req, void * data, uint32_t datalen) {
|
|
184
|
+
return remote_handle_control_pfn(req, data, datalen);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
HTPDRV_API int remote_handle64_control(remote_handle64 h, uint32_t req, void * data, uint32_t datalen) {
|
|
188
|
+
return remote_handle64_control_pfn(h, req, data, datalen);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
HTPDRV_API int remote_session_control(uint32_t req, void * data, uint32_t datalen) {
|
|
192
|
+
return remote_session_control_pfn(req, data, datalen);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
#ifdef _WIN32
|
|
196
|
+
|
|
197
|
+
static std::string wstr_to_str(std::wstring_view wstr) {
|
|
198
|
+
std::string result;
|
|
199
|
+
if (wstr.empty()) {
|
|
200
|
+
return result;
|
|
201
|
+
}
|
|
202
|
+
auto bytes_needed = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS,
|
|
203
|
+
wstr.data(), (int) wstr.size(),
|
|
204
|
+
nullptr, 0, nullptr, nullptr);
|
|
205
|
+
if (bytes_needed == 0) {
|
|
206
|
+
GGML_LOG_ERROR("ggml-hex: WideCharToMultiByte failed. Error %lu\n", GetLastError());
|
|
207
|
+
throw std::runtime_error("Invalid wstring input");
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
result.resize(bytes_needed, '\0');
|
|
211
|
+
int bytes_written = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS,
|
|
212
|
+
wstr.data(), (int) wstr.size(),
|
|
213
|
+
result.data(), bytes_needed,
|
|
214
|
+
nullptr, nullptr);
|
|
215
|
+
if (bytes_written == 0) {
|
|
216
|
+
GGML_LOG_ERROR("ggml-hex: WideCharToMultiByte failed. Error %lu\n", GetLastError());
|
|
217
|
+
throw std::runtime_error("Wstring conversion failed");
|
|
218
|
+
}
|
|
219
|
+
return result;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
static std::string get_driver_path() {
|
|
223
|
+
std::wstring serviceName = L"qcnspmcdm";
|
|
224
|
+
std::string result;
|
|
225
|
+
|
|
226
|
+
// Get a handle to the SCM database.
|
|
227
|
+
SC_HANDLE schSCManager = OpenSCManagerW(NULL, NULL, STANDARD_RIGHTS_READ);
|
|
228
|
+
if (nullptr == schSCManager) {
|
|
229
|
+
GGML_LOG_ERROR("ggml-hex: Failed to open SCManager. Error: %lu\n", GetLastError());
|
|
230
|
+
return result;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Get a handle to the service.
|
|
234
|
+
SC_HANDLE schService = OpenServiceW(schSCManager, // SCM database
|
|
235
|
+
serviceName.c_str(), // name of service
|
|
236
|
+
SERVICE_QUERY_CONFIG); // need query config access
|
|
237
|
+
|
|
238
|
+
if (nullptr == schService) {
|
|
239
|
+
GGML_LOG_ERROR("ggml-hex: Failed to open qcnspmcdm service. Error: %lu\n", GetLastError());
|
|
240
|
+
CloseServiceHandle(schSCManager);
|
|
241
|
+
return result;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Store the size of buffer used as an output.
|
|
245
|
+
DWORD bufferSize;
|
|
246
|
+
if (!QueryServiceConfigW(schService, NULL, 0, &bufferSize) &&
|
|
247
|
+
(GetLastError() != ERROR_INSUFFICIENT_BUFFER)) {
|
|
248
|
+
GGML_LOG_ERROR("ggml-hex: Failed to query service config. Error: %lu\n", GetLastError());
|
|
249
|
+
CloseServiceHandle(schService);
|
|
250
|
+
CloseServiceHandle(schSCManager);
|
|
251
|
+
return result;
|
|
252
|
+
}
|
|
253
|
+
// Get the configuration of the service.
|
|
254
|
+
LPQUERY_SERVICE_CONFIGW serviceConfig =
|
|
255
|
+
static_cast<LPQUERY_SERVICE_CONFIGW>(LocalAlloc(LMEM_FIXED, bufferSize));
|
|
256
|
+
if (!QueryServiceConfigW(schService, serviceConfig, bufferSize, &bufferSize)) {
|
|
257
|
+
fprintf(stderr, "ggml-hex: Failed to query service config. Error: %lu\n", GetLastError());
|
|
258
|
+
LocalFree(serviceConfig);
|
|
259
|
+
CloseServiceHandle(schService);
|
|
260
|
+
CloseServiceHandle(schSCManager);
|
|
261
|
+
return result;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Read the driver file path get its parent directory
|
|
265
|
+
std::wstring driverPath = std::wstring(serviceConfig->lpBinaryPathName);
|
|
266
|
+
driverPath = driverPath.substr(0, driverPath.find_last_of(L"\\"));
|
|
267
|
+
|
|
268
|
+
// Clean up resources
|
|
269
|
+
LocalFree(serviceConfig);
|
|
270
|
+
CloseServiceHandle(schService);
|
|
271
|
+
CloseServiceHandle(schSCManager);
|
|
272
|
+
|
|
273
|
+
// Driver path would contain invalid path string, like:
|
|
274
|
+
// \SystemRoot\System32\DriverStore\FileRepository\qcadsprpc8280.inf_arm64_c2b9460c9a072f37
|
|
275
|
+
// "\SystemRoot" should be replace with a correct one (e.g. C:\Windows)
|
|
276
|
+
const std::wstring systemRootPlaceholder = L"\\SystemRoot";
|
|
277
|
+
if (0 != driverPath.compare(0, systemRootPlaceholder.length(), systemRootPlaceholder)) {
|
|
278
|
+
GGML_LOG_ERROR("ggml-hex: String pattern not found in driver path.\n");
|
|
279
|
+
return result;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Replace \SystemRoot with an absolute path from system ENV windir
|
|
283
|
+
const std::wstring systemRootEnv = L"windir";
|
|
284
|
+
|
|
285
|
+
// Query the number of wide characters this variable requires
|
|
286
|
+
DWORD numWords = GetEnvironmentVariableW(systemRootEnv.c_str(), NULL, 0);
|
|
287
|
+
if (numWords == 0) {
|
|
288
|
+
GGML_LOG_ERROR("ggml-hex: Failed get systemRoot environment variable\n");
|
|
289
|
+
return result;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Query the actual system root name from environment variable
|
|
293
|
+
std::vector<wchar_t> systemRoot(numWords + 1);
|
|
294
|
+
numWords = GetEnvironmentVariableW(systemRootEnv.c_str(), systemRoot.data(), numWords + 1);
|
|
295
|
+
if (numWords == 0) {
|
|
296
|
+
GGML_LOG_ERROR("ggml-hex: Failed to read windir environment variable\n");
|
|
297
|
+
return result;
|
|
298
|
+
}
|
|
299
|
+
driverPath.replace(0, systemRootPlaceholder.length(), std::wstring(systemRoot.data()));
|
|
300
|
+
|
|
301
|
+
return wstr_to_str(driverPath);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
#endif
|
|
305
|
+
|
|
306
|
+
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
|
307
|
+
|
|
308
|
+
int htpdrv_init() {
|
|
309
|
+
static dl_handle_ptr lib_cdsp_rpc_handle = nullptr;
|
|
310
|
+
static bool initialized = false;
|
|
311
|
+
#ifdef _WIN32
|
|
312
|
+
std::string drv_path = get_driver_path() + "\\" + "libcdsprpc.dll";
|
|
313
|
+
#else
|
|
314
|
+
std::string drv_path = "libcdsprpc.so";
|
|
315
|
+
#endif
|
|
316
|
+
if (initialized) {
|
|
317
|
+
GGML_LOG_INFO("ggml-hex: Driver already loaded\n");
|
|
318
|
+
return AEE_SUCCESS;
|
|
319
|
+
}
|
|
320
|
+
GGML_LOG_INFO("ggml-hex: Loading driver %s\n", drv_path.c_str());
|
|
321
|
+
|
|
322
|
+
fs::path path{ drv_path.c_str() };
|
|
323
|
+
dl_handle_ptr handle { dl_load_library(path) };
|
|
324
|
+
if (!handle) {
|
|
325
|
+
GGML_LOG_ERROR("ggml-hex: failed to load %s: %s\n", path.u8string().c_str(), dl_error());
|
|
326
|
+
return AEE_EUNABLETOLOAD;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
#define dlsym(drv, type, pfn, symbol, ignore) \
|
|
330
|
+
do { \
|
|
331
|
+
pfn = (type) dl_get_sym(drv, #symbol); \
|
|
332
|
+
if (!ignore && nullptr == pfn) { \
|
|
333
|
+
GGML_LOG_ERROR("ggml-hex: failed to dlsym %s\n", #symbol); \
|
|
334
|
+
return AEE_EUNABLETOLOAD; \
|
|
335
|
+
} \
|
|
336
|
+
} while (0)
|
|
337
|
+
|
|
338
|
+
dlsym(handle.get(), rpcmem_alloc_pfn_t, rpcmem_alloc_pfn, rpcmem_alloc, false);
|
|
339
|
+
dlsym(handle.get(), rpcmem_alloc2_pfn_t, rpcmem_alloc2_pfn, rpcmem_alloc2, true);
|
|
340
|
+
dlsym(handle.get(), rpcmem_free_pfn_t, rpcmem_free_pfn, rpcmem_free, false);
|
|
341
|
+
dlsym(handle.get(), rpcmem_to_fd_pfn_t, rpcmem_to_fd_pfn, rpcmem_to_fd, false);
|
|
342
|
+
dlsym(handle.get(), fastrpc_mmap_pfn_t, fastrpc_mmap_pfn, fastrpc_mmap, false);
|
|
343
|
+
dlsym(handle.get(), fastrpc_munmap_pfn_t, fastrpc_munmap_pfn, fastrpc_munmap, false);
|
|
344
|
+
dlsym(handle.get(), dspqueue_create_pfn_t, dspqueue_create_pfn, dspqueue_create, false);
|
|
345
|
+
dlsym(handle.get(), dspqueue_close_pfn_t, dspqueue_close_pfn, dspqueue_close, false);
|
|
346
|
+
dlsym(handle.get(), dspqueue_export_pfn_t, dspqueue_export_pfn, dspqueue_export, false);
|
|
347
|
+
dlsym(handle.get(), dspqueue_write_pfn_t, dspqueue_write_pfn, dspqueue_write, false);
|
|
348
|
+
dlsym(handle.get(), dspqueue_read_pfn_t, dspqueue_read_pfn, dspqueue_read, false);
|
|
349
|
+
dlsym(handle.get(), remote_handle64_open_pfn_t, remote_handle64_open_pfn, remote_handle64_open, false);
|
|
350
|
+
dlsym(handle.get(), remote_handle64_invoke_pfn_t, remote_handle64_invoke_pfn, remote_handle64_invoke, false);
|
|
351
|
+
dlsym(handle.get(), remote_handle_control_pfn_t, remote_handle_control_pfn, remote_handle_control, false);
|
|
352
|
+
dlsym(handle.get(), remote_handle64_control_pfn_t, remote_handle64_control_pfn, remote_handle64_control, false);
|
|
353
|
+
dlsym(handle.get(), remote_session_control_pfn_t, remote_session_control_pfn, remote_session_control, false);
|
|
354
|
+
dlsym(handle.get(), remote_handle64_close_pfn_t, remote_handle64_close_pfn, remote_handle64_close, false);
|
|
355
|
+
|
|
356
|
+
lib_cdsp_rpc_handle = std::move(handle);
|
|
357
|
+
initialized = true;
|
|
358
|
+
|
|
359
|
+
return AEE_SUCCESS;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
domain * get_domain(int domain_id) {
|
|
363
|
+
int i = 0;
|
|
364
|
+
int size = sizeof(supported_domains) / sizeof(domain);
|
|
365
|
+
|
|
366
|
+
for (i = 0; i < size; i++) {
|
|
367
|
+
if (supported_domains[i].id == domain_id) {
|
|
368
|
+
return &supported_domains[i];
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
return NULL;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
int get_hex_arch_ver(int domain, int * arch) {
|
|
376
|
+
if (!remote_handle_control_pfn) {
|
|
377
|
+
GGML_LOG_ERROR("ggml-hex: remote_handle_control is not supported on this device\n");
|
|
378
|
+
return AEE_EUNSUPPORTEDAPI;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
struct remote_dsp_capability arch_ver;
|
|
382
|
+
arch_ver.domain = (uint32_t) domain;
|
|
383
|
+
arch_ver.attribute_ID = ARCH_VER;
|
|
384
|
+
arch_ver.capability = (uint32_t) 0;
|
|
385
|
+
|
|
386
|
+
int err = remote_handle_control(DSPRPC_GET_DSP_INFO, &arch_ver, sizeof(arch_ver));
|
|
387
|
+
if ((err & 0xff) == (AEE_EUNSUPPORTEDAPI & 0xff)) {
|
|
388
|
+
GGML_LOG_ERROR("ggml-hex: FastRPC capability API is not supported on this device\n");
|
|
389
|
+
return AEE_EUNSUPPORTEDAPI;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (err != AEE_SUCCESS) {
|
|
393
|
+
GGML_LOG_ERROR("ggml-hex: FastRPC capability query failed (err %d)\n", err);
|
|
394
|
+
return err;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
switch (arch_ver.capability & 0xff) {
|
|
398
|
+
case 0x68:
|
|
399
|
+
*arch = 68;
|
|
400
|
+
return 0;
|
|
401
|
+
case 0x69:
|
|
402
|
+
*arch = 69;
|
|
403
|
+
return 0;
|
|
404
|
+
case 0x73:
|
|
405
|
+
*arch = 73;
|
|
406
|
+
return 0;
|
|
407
|
+
case 0x75:
|
|
408
|
+
*arch = 75;
|
|
409
|
+
return 0;
|
|
410
|
+
case 0x79:
|
|
411
|
+
*arch = 79;
|
|
412
|
+
return 0;
|
|
413
|
+
case 0x81:
|
|
414
|
+
*arch = 81;
|
|
415
|
+
return 0;
|
|
416
|
+
}
|
|
417
|
+
return -1;
|
|
418
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#ifdef __cplusplus
|
|
4
|
+
extern "C" {
|
|
5
|
+
#endif
|
|
6
|
+
|
|
7
|
+
#ifdef _WIN32
|
|
8
|
+
# pragma clang diagnostic ignored "-Wignored-attributes"
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#include <AEEStdErr.h>
|
|
12
|
+
#include <rpcmem.h>
|
|
13
|
+
#include <remote.h>
|
|
14
|
+
#include <dspqueue.h>
|
|
15
|
+
|
|
16
|
+
#if defined(_WIN32) && !defined(__MINGW32__)
|
|
17
|
+
# ifdef GGML_BACKEND_BUILD
|
|
18
|
+
# define HTPDRV_API __declspec(dllexport) extern
|
|
19
|
+
# else
|
|
20
|
+
# define HTPDRV_API __declspec(dllimport) extern
|
|
21
|
+
# endif
|
|
22
|
+
#else
|
|
23
|
+
# define HTPDRV_API __attribute__ ((visibility ("default"))) extern
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
/* Offset to differentiate HLOS and Hexagon error codes.
|
|
27
|
+
Stores the value of AEE_EOFFSET for Hexagon. */
|
|
28
|
+
#ifndef DSP_OFFSET
|
|
29
|
+
# define DSP_OFFSET 0x80000400
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
/* Errno for connection reset by peer. */
|
|
33
|
+
#ifndef ECONNRESET
|
|
34
|
+
# ifdef __hexagon__
|
|
35
|
+
# define ECONNRESET 104
|
|
36
|
+
# endif
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
/* Abstraction of different OS specific sleep APIs.
|
|
40
|
+
SLEEP accepts input in seconds. */
|
|
41
|
+
#ifndef SLEEP
|
|
42
|
+
# ifdef __hexagon__
|
|
43
|
+
# define SLEEP(x) \
|
|
44
|
+
{ /* Do nothing for simulator. */ \
|
|
45
|
+
}
|
|
46
|
+
# else
|
|
47
|
+
# ifdef _WIN32
|
|
48
|
+
# define SLEEP(x) Sleep(1000 * x) /* Sleep accepts input in milliseconds. */
|
|
49
|
+
# else
|
|
50
|
+
# define SLEEP(x) sleep(x) /* sleep accepts input in seconds. */
|
|
51
|
+
# endif
|
|
52
|
+
# endif
|
|
53
|
+
#endif
|
|
54
|
+
|
|
55
|
+
/* Include windows specific header files. */
|
|
56
|
+
#ifdef _WIN32
|
|
57
|
+
# include <windows.h>
|
|
58
|
+
# include <sysinfoapi.h>
|
|
59
|
+
# define _CRT_SECURE_NO_WARNINGS 1
|
|
60
|
+
# define _WINSOCK_DEPRECATED_NO_WARNINGS 1
|
|
61
|
+
#endif
|
|
62
|
+
|
|
63
|
+
/* Includes and defines for all HLOS except windows */
|
|
64
|
+
#if !defined(__hexagon__) && !defined(_WIN32)
|
|
65
|
+
# include "unistd.h"
|
|
66
|
+
|
|
67
|
+
# include <sys/time.h>
|
|
68
|
+
#endif
|
|
69
|
+
|
|
70
|
+
/* Includes and defines for Hexagon and all HLOS except Windows. */
|
|
71
|
+
#if !defined(_WIN32)
|
|
72
|
+
/* Weak reference to remote symbol for compilation. */
|
|
73
|
+
# pragma weak remote_session_control
|
|
74
|
+
# pragma weak remote_handle_control
|
|
75
|
+
# pragma weak remote_handle64_control
|
|
76
|
+
# pragma weak fastrpc_mmap
|
|
77
|
+
# pragma weak fastrpc_munmap
|
|
78
|
+
# pragma weak rpcmem_alloc2
|
|
79
|
+
#endif
|
|
80
|
+
|
|
81
|
+
#if !defined(_WIN32)
|
|
82
|
+
# pragma weak remote_system_request
|
|
83
|
+
#endif
|
|
84
|
+
|
|
85
|
+
#ifdef _WIN32
|
|
86
|
+
# define DSPQUEUE_TIMEOUT DSPQUEUE_TIMEOUT_NONE
|
|
87
|
+
#else
|
|
88
|
+
# define DSPQUEUE_TIMEOUT 1000000
|
|
89
|
+
#endif
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* htpdrv_init API: driver interface entry point
|
|
93
|
+
*
|
|
94
|
+
* @return Return AEE error codes as defined in Hexagon SDK.
|
|
95
|
+
*/
|
|
96
|
+
HTPDRV_API int htpdrv_init(void);
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* get_domain API: get domain struct from domain value.
|
|
100
|
+
*
|
|
101
|
+
* @param[in] domain value of a domain
|
|
102
|
+
* @return Returns domain struct of the domain if it is supported or else
|
|
103
|
+
* returns NULL.
|
|
104
|
+
*
|
|
105
|
+
*/
|
|
106
|
+
HTPDRV_API domain * get_domain(int domain_id);
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* get_hex_arch_ver API: query the Hexagon processor architecture version information
|
|
110
|
+
*
|
|
111
|
+
* @param[in] domain_id value of a domain
|
|
112
|
+
* @param[out] Arch version (73, 75, ...)
|
|
113
|
+
* @return 0 if query is successful.
|
|
114
|
+
* non-zero if error, return value points to the error.
|
|
115
|
+
*
|
|
116
|
+
*/
|
|
117
|
+
HTPDRV_API int get_hex_arch_ver(int domain, int * arch);
|
|
118
|
+
|
|
119
|
+
#ifdef __cplusplus
|
|
120
|
+
}
|
|
121
|
+
#endif
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#ifdef _WIN32
|
|
4
|
+
# define WIN32_LEAN_AND_MEAN
|
|
5
|
+
# ifndef NOMINMAX
|
|
6
|
+
# define NOMINMAX
|
|
7
|
+
# endif
|
|
8
|
+
# include <windows.h>
|
|
9
|
+
# include <winevt.h>
|
|
10
|
+
#else
|
|
11
|
+
# include <dlfcn.h>
|
|
12
|
+
# include <unistd.h>
|
|
13
|
+
#endif
|
|
14
|
+
#include <filesystem>
|
|
15
|
+
|
|
16
|
+
namespace fs = std::filesystem;
|
|
17
|
+
|
|
18
|
+
#ifdef _WIN32
|
|
19
|
+
|
|
20
|
+
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
21
|
+
|
|
22
|
+
struct dl_handle_deleter {
|
|
23
|
+
void operator()(HMODULE handle) {
|
|
24
|
+
FreeLibrary(handle);
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
static inline dl_handle * dl_load_library(const fs::path & path) {
|
|
29
|
+
// suppress error dialogs for missing DLLs
|
|
30
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
31
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
32
|
+
|
|
33
|
+
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
|
34
|
+
|
|
35
|
+
SetErrorMode(old_mode);
|
|
36
|
+
|
|
37
|
+
return handle;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static inline void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
41
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
42
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
43
|
+
|
|
44
|
+
void * p = (void *) GetProcAddress(handle, name);
|
|
45
|
+
|
|
46
|
+
SetErrorMode(old_mode);
|
|
47
|
+
|
|
48
|
+
return p;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static inline const char * dl_error() {
|
|
52
|
+
return "";
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
#else
|
|
56
|
+
|
|
57
|
+
using dl_handle = void;
|
|
58
|
+
|
|
59
|
+
struct dl_handle_deleter {
|
|
60
|
+
void operator()(void * handle) {
|
|
61
|
+
dlclose(handle);
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
static inline dl_handle * dl_load_library(const fs::path & path) {
|
|
66
|
+
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
67
|
+
return handle;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static inline void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
71
|
+
return dlsym(handle, name);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
static inline const char * dl_error() {
|
|
75
|
+
const char *rslt = dlerror();
|
|
76
|
+
return rslt != nullptr ? rslt : "";
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
#endif
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[Version]
|
|
2
|
+
Signature = "$WINDOWS NT$"
|
|
3
|
+
Class = ComputeAccelerator
|
|
4
|
+
ClassGuid = {F01A9D53-3FF6-48D2-9F97-C8A7004BE10C}
|
|
5
|
+
Provider = %GGML%
|
|
6
|
+
DriverVer = 01/01/2026,1.0.0.0
|
|
7
|
+
CatalogFile = libggml-htp.cat
|
|
8
|
+
PnpLockDown = 1
|
|
9
|
+
|
|
10
|
+
[DestinationDirs]
|
|
11
|
+
Drivers_Dir = 6
|
|
12
|
+
|
|
13
|
+
[SourceDisksNames]
|
|
14
|
+
1 = %DiskId%
|
|
15
|
+
|
|
16
|
+
[SourceDisksFiles]
|
|
17
|
+
libggml-htp-v68.so = 1
|
|
18
|
+
libggml-htp-v69.so = 1
|
|
19
|
+
libggml-htp-v73.so = 1
|
|
20
|
+
libggml-htp-v75.so = 1
|
|
21
|
+
libggml-htp-v81.so = 1
|
|
22
|
+
|
|
23
|
+
[ControlFlags]
|
|
24
|
+
ExcludeFromSelect = *
|
|
25
|
+
|
|
26
|
+
[DefaultInstall.NTarm64]
|
|
27
|
+
CopyFiles=Drivers_Dir
|
|
28
|
+
|
|
29
|
+
[Drivers_Dir]
|
|
30
|
+
libggml-htp-v68.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
|
31
|
+
libggml-htp-v69.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
|
32
|
+
libggml-htp-v73.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
|
33
|
+
libggml-htp-v75.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
|
34
|
+
libggml-htp-v81.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
|
35
|
+
|
|
36
|
+
[Strings]
|
|
37
|
+
GGML = 'GGML'
|
|
38
|
+
DiskId = 'GGML HTP library'
|
|
@@ -11,6 +11,10 @@ endif()
|
|
|
11
11
|
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
|
|
12
12
|
list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake")
|
|
13
13
|
|
|
14
|
+
if (NOT DEFINED CMAKE_HIP_FLAGS_DEBUG)
|
|
15
|
+
set(CMAKE_HIP_FLAGS_DEBUG "-g -O2")
|
|
16
|
+
endif()
|
|
17
|
+
|
|
14
18
|
# CMake on Windows doesn't support the HIP language yet
|
|
15
19
|
if (WIN32)
|
|
16
20
|
set(CXX_IS_HIPCC TRUE)
|
|
@@ -62,6 +66,8 @@ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu")
|
|
|
62
66
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
|
63
67
|
file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu")
|
|
64
68
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
|
69
|
+
file(GLOB SRCS "../ggml-cuda/template-instances/mmf*.cu")
|
|
70
|
+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
|
65
71
|
|
|
66
72
|
if (GGML_CUDA_FA_ALL_QUANTS)
|
|
67
73
|
file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu")
|