whispercpp 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +7 -3
- data/README.md +161 -43
- data/Rakefile +45 -13
- data/ext/.gitignore +4 -8
- data/ext/dependencies.rb +73 -0
- data/ext/extconf.rb +21 -198
- data/ext/options.rb +85 -0
- data/ext/ruby_whisper.c +177 -0
- data/ext/ruby_whisper.h +17 -2
- data/ext/ruby_whisper_context.c +672 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1303 -0
- data/ext/ruby_whisper_segment.c +220 -0
- data/ext/ruby_whisper_transcribe.cpp +93 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +255 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +133 -0
- data/ext/sources/examples/addon.node/addon.cpp +557 -0
- data/ext/sources/examples/addon.node/index.js +57 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +176 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1295 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +800 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +175 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +469 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +226 -0
- data/ext/sources/examples/server/CMakeLists.txt +15 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1238 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +435 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +43 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1914 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +464 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +843 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +147 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +685 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +59 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2845 -0
- data/ext/sources/examples/talk-llama/llama-context.h +297 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1693 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +710 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +103 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +207 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +44 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +439 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +59 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +116 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1163 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +282 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +15114 -0
- data/ext/sources/examples/talk-llama/llama-model.h +452 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +1049 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3377 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +132 -0
- data/ext/sources/examples/talk-llama/llama.cpp +358 -0
- data/ext/sources/examples/talk-llama/llama.h +1484 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +810 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +854 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +149 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +251 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +435 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +50 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-backend.h +10 -8
- data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
- data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +11 -1
- data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
- data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
- data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
- data/ext/{ggml → sources/ggml}/include/ggml.h +325 -269
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +404 -0
- data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
- data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +92 -53
- data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +69 -34
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +75 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
- data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +140 -1
- data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +588 -146
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
- data/ext/{ggml → sources/ggml}/src/ggml-common.h +16 -8
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +597 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +3 -2
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- data/ext/{ggml/src/ggml-cpu/cpu-feats-x86.cpp → sources/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp} +5 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +3285 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +73 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +172 -41
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3551 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +78 -25
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.cpp → sources/ggml/src/ggml-cpu/hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3594 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +9786 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +118 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-quants.h → sources/ggml/src/ggml-cpu/quants.h} +26 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +1184 -0
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.cpp → sources/ggml/src/ggml-cpu/traits.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +345 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +1027 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +851 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +752 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +31 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +638 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3647 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +26 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +378 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +66 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-impl.h +147 -158
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +121 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +649 -0
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2504 -1108
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +2102 -1463
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +110 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +6494 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
- data/ext/{ggml → sources/ggml}/src/ggml-quants.c +120 -128
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +494 -84
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +344 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +561 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +56 -70
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +8 -12
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +575 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +839 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +823 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +188 -67
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1120 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +84 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +102 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +212 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1197 -1295
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +60 -81
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +482 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +111 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +472 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +38 -28
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +15 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +6 -11
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +289 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +200 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3822 -1335
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +61 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +203 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/{ggml → sources/ggml}/src/ggml.c +918 -1782
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +1351 -0
- data/ext/{include → sources/include}/whisper.h +70 -2
- data/ext/sources/src/CMakeLists.txt +145 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +36 -10
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +29 -3
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{src → sources/src}/whisper.cpp +1966 -386
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +39 -5
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +202 -126
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +510 -0
- data/test/helper.rb +24 -0
- data/{tests → test}/test_callback.rb +45 -3
- data/{tests → test}/test_error.rb +2 -2
- data/{tests → test}/test_model.rb +47 -0
- data/test/test_package.rb +51 -0
- data/test/test_params.rb +297 -0
- data/test/test_segment.rb +146 -0
- data/test/test_vad.rb +19 -0
- data/test/test_vad_params.rb +103 -0
- data/{tests → test}/test_whisper.rb +106 -36
- data/whispercpp.gemspec +5 -5
- metadata +837 -134
- data/ext/cpu.mk +0 -9
- data/ext/examples/dr_wav.h +0 -8815
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -10835
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
- data/ext/ggml/src/ggml-sycl/convert.cpp +0 -547
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
- data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +0 -1015
- data/ext/ggml/src/ggml-sycl/norm.cpp +0 -378
- data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
- data/ext/metal-embed.mk +0 -17
- data/ext/metal.mk +0 -6
- data/ext/ruby_whisper.cpp +0 -1909
- data/ext/scripts/get-flags.mk +0 -38
- data/lib/whisper.rb +0 -2
- data/tests/helper.rb +0 -7
- data/tests/test_package.rb +0 -31
- data/tests/test_params.rb +0 -160
- data/tests/test_segment.rb +0 -83
- /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
- /data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.h → sources/ggml/src/ggml-cpu/hbm.h} +0 -0
- /data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.h → sources/ggml/src/ggml-cpu/traits.h} +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
- /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
@@ -29,11 +29,16 @@
|
|
29
29
|
#include <cstdio>
|
30
30
|
#include <cstring>
|
31
31
|
#include <mutex>
|
32
|
+
#include <queue>
|
33
|
+
#include <chrono>
|
34
|
+
#include <unordered_set>
|
35
|
+
#include <optional>
|
32
36
|
|
33
37
|
#include "ggml-impl.h"
|
34
38
|
#include "ggml-backend-impl.h"
|
35
39
|
#include "ggml-cann/aclnn_ops.h"
|
36
40
|
#include "ggml-cann/common.h"
|
41
|
+
#include "ggml.h"
|
37
42
|
|
38
43
|
#define GGML_COMMON_DECL_C
|
39
44
|
|
@@ -90,6 +95,26 @@ int32_t ggml_cann_get_device() {
|
|
90
95
|
return id;
|
91
96
|
}
|
92
97
|
|
98
|
+
/**
|
99
|
+
* @brief Get the value of the specified environment variable (name).
|
100
|
+
* if not empty, return a std::string object
|
101
|
+
*/
|
102
|
+
std::optional<std::string> get_env(const std::string& name) {
|
103
|
+
const char* val = std::getenv(name.c_str());
|
104
|
+
if (!val) return std::nullopt;
|
105
|
+
std::string res = std::string(val);
|
106
|
+
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
|
107
|
+
return res;
|
108
|
+
}
|
109
|
+
|
110
|
+
/**
|
111
|
+
* @brief Verify whether the environment variable is a valid value.
|
112
|
+
*/
|
113
|
+
bool parse_bool(const std::string& value) {
|
114
|
+
std::unordered_set<std::string> valid_values = {"on", "1", "yes", "y", "enable", "true"};
|
115
|
+
return valid_values.find(value) != valid_values.end();
|
116
|
+
}
|
117
|
+
|
93
118
|
/**
|
94
119
|
* @brief Initialize the CANN device information.
|
95
120
|
*
|
@@ -119,9 +144,10 @@ static ggml_cann_device_info ggml_cann_init() {
|
|
119
144
|
prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
|
120
145
|
prop.location.id = id;
|
121
146
|
prop.reserve = 0;
|
122
|
-
|
147
|
+
err = aclrtMemGetAllocationGranularity(
|
123
148
|
&prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED,
|
124
|
-
&info.devices[id].vmm_granularity)
|
149
|
+
&info.devices[id].vmm_granularity);
|
150
|
+
info.devices[id].vmm = err == ACL_SUCCESS;
|
125
151
|
|
126
152
|
size_t free, total;
|
127
153
|
ggml_backend_cann_get_device_memory(id, &free, &total);
|
@@ -148,11 +174,223 @@ const ggml_cann_device_info& ggml_cann_info() {
|
|
148
174
|
|
149
175
|
//#define DEBUG_CANN_MALLOC
|
150
176
|
/**
|
151
|
-
* @brief A pool of CANN buffers(
|
177
|
+
* @brief A pool of CANN buffers(priority segment buffer).
|
152
178
|
*
|
153
179
|
* This class manages a pool of CANN buffers for a specific device.
|
154
180
|
*/
|
155
|
-
struct
|
181
|
+
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
182
|
+
/**
|
183
|
+
* @brief The maximum reuse margin for a buffer.
|
184
|
+
*/
|
185
|
+
static const size_t max_reuse_margin = 1ull << 22; // 4MB
|
186
|
+
|
187
|
+
/**
|
188
|
+
* @brief The minimum free margin for a buffer.
|
189
|
+
*/
|
190
|
+
static const size_t min_free_margin = 1ull << 20; // 1MB
|
191
|
+
|
192
|
+
/**
|
193
|
+
* @brief The alignment for buffer allocation.
|
194
|
+
*/
|
195
|
+
static const size_t alignment = 128;
|
196
|
+
|
197
|
+
/**
|
198
|
+
* @brief The device ID associated with this buffer pool.
|
199
|
+
*/
|
200
|
+
int device;
|
201
|
+
|
202
|
+
/**
|
203
|
+
* @brief Whether to disable clean during buffer allocation.
|
204
|
+
*/
|
205
|
+
bool disable_clean = false;
|
206
|
+
|
207
|
+
/**
|
208
|
+
* @brief Structure representing a CANN buffer.
|
209
|
+
*/
|
210
|
+
struct ggml_cann_buffer {
|
211
|
+
void* ptr = nullptr; ///< Pointer to the buffer.
|
212
|
+
size_t size = 0; ///< Size of the buffer.
|
213
|
+
std::chrono::steady_clock::time_point last_used; ///< Last used time.
|
214
|
+
|
215
|
+
bool operator>(const ggml_cann_buffer& other) const {
|
216
|
+
return size > other.size;
|
217
|
+
}
|
218
|
+
};
|
219
|
+
|
220
|
+
/**
|
221
|
+
* @brief Array of CANN buffers in the pool.
|
222
|
+
*/
|
223
|
+
std::unordered_map<void*, size_t> buffer_pool;
|
224
|
+
std::priority_queue<ggml_cann_buffer,
|
225
|
+
std::vector<ggml_cann_buffer>,
|
226
|
+
std::greater<>> free_buffers ;
|
227
|
+
|
228
|
+
/**
|
229
|
+
* @brief Total size of all buffers in the pool.
|
230
|
+
*/
|
231
|
+
size_t pool_size = 0;
|
232
|
+
|
233
|
+
/**
|
234
|
+
* @brief Constructor to initialize the buffer pool for a specific device.
|
235
|
+
*
|
236
|
+
* @param device The device ID to associate with this buffer pool.
|
237
|
+
*/
|
238
|
+
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
239
|
+
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
240
|
+
}
|
241
|
+
|
242
|
+
/**
|
243
|
+
* @brief Destructor to free all buffers in the pool.
|
244
|
+
*/
|
245
|
+
~ggml_cann_pool_buf_prio() {
|
246
|
+
ggml_cann_set_device(device);
|
247
|
+
for (auto& [b_ptr, b_size] : buffer_pool) {
|
248
|
+
aclrtFree(b_ptr);
|
249
|
+
pool_size -= b_size;
|
250
|
+
}
|
251
|
+
buffer_pool.clear();
|
252
|
+
GGML_ASSERT(pool_size == 0);
|
253
|
+
}
|
254
|
+
|
255
|
+
/**
|
256
|
+
* @brief Allocate a buffer of the given size.
|
257
|
+
*
|
258
|
+
* @param size The size of the buffer to allocate.
|
259
|
+
* @param actual_size A pointer to a variable to receive the actual size of
|
260
|
+
* the allocated buffer.
|
261
|
+
* @return A pointer to the allocated buffer.
|
262
|
+
*/
|
263
|
+
void* alloc(size_t size, size_t* actual_size) override {
|
264
|
+
size = GGML_PAD(size, alignment);
|
265
|
+
if (size == 0) {
|
266
|
+
size = alignment;
|
267
|
+
}
|
268
|
+
|
269
|
+
void* ptr = nullptr;
|
270
|
+
auto now = std::chrono::steady_clock::now();
|
271
|
+
|
272
|
+
std::vector<ggml_cann_buffer> free_buffers_rest;
|
273
|
+
free_buffers_rest.reserve(free_buffers.size());
|
274
|
+
while (!free_buffers.empty()) {
|
275
|
+
auto b = free_buffers.top();
|
276
|
+
free_buffers.pop();
|
277
|
+
|
278
|
+
if (b.size >= size) {
|
279
|
+
// reuse the buffer if the size is enough
|
280
|
+
const size_t margin = b.size - size;
|
281
|
+
if (margin <= max_reuse_margin) {
|
282
|
+
*actual_size = b.size;
|
283
|
+
ptr = b.ptr;
|
284
|
+
#ifdef DEBUG_CANN_MALLOC
|
285
|
+
GGML_LOG_INFO(
|
286
|
+
"cann pool[%d]: reused %p, "
|
287
|
+
"pool_size = %5u MB, "
|
288
|
+
"size = %5u MB, "
|
289
|
+
"margin = %5u MB\n",
|
290
|
+
device, b.ptr,
|
291
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
292
|
+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
|
293
|
+
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
|
294
|
+
#endif
|
295
|
+
break;
|
296
|
+
}
|
297
|
+
}
|
298
|
+
|
299
|
+
bool should_clean = !disable_clean &&
|
300
|
+
b.size > min_free_margin &&
|
301
|
+
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
|
302
|
+
if (should_clean) {
|
303
|
+
// free the buffer if the size is needed to be freed
|
304
|
+
ACL_CHECK(aclrtFree(b.ptr));
|
305
|
+
pool_size -= b.size;
|
306
|
+
buffer_pool.erase(b.ptr);
|
307
|
+
#ifdef DEBUG_CANN_MALLOC
|
308
|
+
GGML_LOG_INFO(
|
309
|
+
"cann pool[%d]: clean %p, "
|
310
|
+
"pool_size = %5u MB, "
|
311
|
+
"size = %5u MB\n",
|
312
|
+
device, b.ptr,
|
313
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
314
|
+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
315
|
+
#endif
|
316
|
+
continue;
|
317
|
+
}
|
318
|
+
free_buffers_rest.push_back(b);
|
319
|
+
}
|
320
|
+
for (ggml_cann_buffer &b : free_buffers_rest) {
|
321
|
+
free_buffers.push(std::move(b));
|
322
|
+
}
|
323
|
+
|
324
|
+
#ifdef DEBUG_CANN_MALLOC
|
325
|
+
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
326
|
+
#endif
|
327
|
+
if (ptr != nullptr) {
|
328
|
+
return ptr;
|
329
|
+
}
|
330
|
+
|
331
|
+
// allocate a new buffer if no buffer can be reused
|
332
|
+
ggml_cann_set_device(device);
|
333
|
+
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
334
|
+
*actual_size = size;
|
335
|
+
pool_size += size;
|
336
|
+
#ifdef DEBUG_CANN_MALLOC
|
337
|
+
GGML_LOG_INFO(
|
338
|
+
"cann pool[%d]: allocate %p, "
|
339
|
+
"pool_size = %5u MB, "
|
340
|
+
"size = %5u MB\n",
|
341
|
+
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
342
|
+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
|
343
|
+
#endif
|
344
|
+
buffer_pool.emplace(ptr, size);
|
345
|
+
return ptr;
|
346
|
+
}
|
347
|
+
|
348
|
+
/**
|
349
|
+
* @brief Free a buffer and return it to the pool.
|
350
|
+
*
|
351
|
+
* @param ptr Pointer to the buffer to free.
|
352
|
+
* @param size Size of the buffer to free.
|
353
|
+
*/
|
354
|
+
void free(void* ptr, size_t size) override {
|
355
|
+
GGML_UNUSED(size);
|
356
|
+
auto it = buffer_pool.find(ptr);
|
357
|
+
if (it == buffer_pool.end()) {
|
358
|
+
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
|
359
|
+
}
|
360
|
+
|
361
|
+
auto now = std::chrono::steady_clock::now();
|
362
|
+
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
|
363
|
+
#ifdef DEBUG_CANN_MALLOC
|
364
|
+
GGML_LOG_INFO(
|
365
|
+
"cann pool[%d]: return %p, "
|
366
|
+
"pool_size = %5u MB\n",
|
367
|
+
device, ptr,
|
368
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
369
|
+
#endif
|
370
|
+
}
|
371
|
+
};
|
372
|
+
|
373
|
+
/**
|
374
|
+
* @brief A pool of CANN buffers(segment buffer).
|
375
|
+
*
|
376
|
+
* This class manages a pool of CANN buffers for a specific device.
|
377
|
+
*/
|
378
|
+
struct ggml_cann_pool_buf : public ggml_cann_pool {
|
379
|
+
/**
|
380
|
+
* @brief The maximum reuse margin for a buffer.
|
381
|
+
*/
|
382
|
+
static const size_t max_reuse_margin = 1ull << 22; // 4MB
|
383
|
+
|
384
|
+
/**
|
385
|
+
* @brief The minimum free margin for a buffer.
|
386
|
+
*/
|
387
|
+
static const size_t min_free_margin = 1ull << 20; // 1MB
|
388
|
+
|
389
|
+
/**
|
390
|
+
* @brief The alignment for buffer allocation.
|
391
|
+
*/
|
392
|
+
static const size_t alignment = 128;
|
393
|
+
|
156
394
|
/**
|
157
395
|
* @brief The maximum number of buffers in the pool.
|
158
396
|
*/
|
@@ -163,12 +401,19 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
163
401
|
*/
|
164
402
|
int device;
|
165
403
|
|
404
|
+
/**
|
405
|
+
* @brief Whether to disable clean during buffer allocation.
|
406
|
+
*/
|
407
|
+
bool disable_clean = false;
|
408
|
+
|
166
409
|
/**
|
167
410
|
* @brief Structure representing a CANN buffer.
|
168
411
|
*/
|
169
412
|
struct ggml_cann_buffer {
|
170
413
|
void* ptr = nullptr; ///< Pointer to the buffer memory.
|
171
414
|
size_t size = 0; ///< Size of the buffer.
|
415
|
+
bool used = false; ///< Whether the buffer is currently in use.
|
416
|
+
std::chrono::steady_clock::time_point last_used; ///< Last used time.
|
172
417
|
};
|
173
418
|
|
174
419
|
/**
|
@@ -186,17 +431,19 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
186
431
|
*
|
187
432
|
* @param device The device ID to associate with this buffer pool.
|
188
433
|
*/
|
189
|
-
explicit
|
434
|
+
explicit ggml_cann_pool_buf(int device) : device(device) {
|
435
|
+
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
436
|
+
}
|
190
437
|
|
191
438
|
/**
|
192
439
|
* @brief Destructor to free all buffers in the pool.
|
193
440
|
*/
|
194
|
-
~
|
441
|
+
~ggml_cann_pool_buf() {
|
195
442
|
ggml_cann_set_device(device);
|
196
443
|
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
197
444
|
ggml_cann_buffer& b = buffer_pool[i];
|
198
445
|
if (b.ptr != nullptr) {
|
199
|
-
|
446
|
+
aclrtFree(b.ptr);
|
200
447
|
pool_size -= b.size;
|
201
448
|
}
|
202
449
|
}
|
@@ -212,63 +459,93 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
212
459
|
* @return A pointer to the allocated buffer.
|
213
460
|
*/
|
214
461
|
void* alloc(size_t size, size_t* actual_size) override {
|
215
|
-
const size_t alignment = 128;
|
216
462
|
size = GGML_PAD(size, alignment);
|
217
463
|
if (size == 0) {
|
218
464
|
size = alignment;
|
219
465
|
}
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
466
|
+
|
467
|
+
void* ptr = nullptr;
|
468
|
+
auto now = std::chrono::steady_clock::now();
|
469
|
+
|
470
|
+
int i = 0;
|
471
|
+
for (; i < MAX_BUFFERS; ++i) {
|
227
472
|
ggml_cann_buffer& b = buffer_pool[i];
|
228
|
-
if (b.ptr
|
473
|
+
if (b.ptr == nullptr) {
|
474
|
+
break;
|
475
|
+
}
|
476
|
+
if (b.used) {
|
477
|
+
continue;
|
478
|
+
}
|
479
|
+
if (b.size >= size) {
|
480
|
+
// reuse the buffer if the size is enough
|
481
|
+
const size_t margin = b.size - size;
|
482
|
+
if (margin <= max_reuse_margin) {
|
483
|
+
*actual_size = b.size;
|
484
|
+
b.used = true;
|
485
|
+
ptr = b.ptr;
|
229
486
|
#ifdef DEBUG_CANN_MALLOC
|
230
|
-
|
231
|
-
|
487
|
+
GGML_LOG_INFO(
|
488
|
+
"cann pool[%d]: reused %p, "
|
489
|
+
"pool_size = %5u MB, "
|
490
|
+
"size = %5u MB, "
|
491
|
+
"margin = %5u MB\n",
|
492
|
+
device, b.ptr,
|
493
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
494
|
+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
|
495
|
+
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
|
232
496
|
#endif
|
233
|
-
|
234
|
-
size_t diff = b.size - size;
|
235
|
-
if (diff < best_diff) {
|
236
|
-
best_diff = diff;
|
237
|
-
ibest = i;
|
238
|
-
if (!best_diff) {
|
239
|
-
void* ptr = b.ptr;
|
240
|
-
*actual_size = b.size;
|
241
|
-
b.ptr = nullptr;
|
242
|
-
b.size = 0;
|
243
|
-
return ptr;
|
244
|
-
}
|
245
|
-
}
|
497
|
+
break;
|
246
498
|
}
|
247
499
|
}
|
500
|
+
|
501
|
+
bool should_clean = !disable_clean &&
|
502
|
+
b.size > min_free_margin &&
|
503
|
+
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
|
504
|
+
if (should_clean) {
|
505
|
+
// free the buffer if the size is needed to be freed
|
506
|
+
ACL_CHECK(aclrtFree(b.ptr));
|
507
|
+
pool_size -= b.size;
|
508
|
+
#ifdef DEBUG_CANN_MALLOC
|
509
|
+
GGML_LOG_INFO(
|
510
|
+
"cann pool[%d]: clean %p, "
|
511
|
+
"pool_size = %5u MB, "
|
512
|
+
"size = %5u MB\n",
|
513
|
+
device, b.ptr,
|
514
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
515
|
+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
516
|
+
#endif
|
517
|
+
b.ptr = nullptr;
|
518
|
+
}
|
248
519
|
}
|
249
|
-
if (
|
250
|
-
ggml_cann_buffer& b = buffer_pool[ibest];
|
251
|
-
void* ptr = b.ptr;
|
252
|
-
*actual_size = b.size;
|
253
|
-
b.ptr = nullptr;
|
254
|
-
b.size = 0;
|
520
|
+
if (ptr != nullptr) {
|
255
521
|
return ptr;
|
256
522
|
}
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
523
|
+
|
524
|
+
if (i < MAX_BUFFERS) {
|
525
|
+
// allocate a new buffer if no buffer can be reused
|
526
|
+
ggml_cann_buffer& b = buffer_pool[i];
|
527
|
+
ggml_cann_set_device(device);
|
528
|
+
ACL_CHECK(aclrtMalloc(&b.ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
529
|
+
pool_size += size;
|
530
|
+
*actual_size = size;
|
531
|
+
b.size = size;
|
532
|
+
b.used = true;
|
533
|
+
if (i >= MAX_BUFFERS - 8) {
|
534
|
+
GGML_LOG_WARN("cann pool[%d]: slots almost full\n", device);
|
535
|
+
}
|
263
536
|
#ifdef DEBUG_CANN_MALLOC
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
537
|
+
GGML_LOG_INFO(
|
538
|
+
"cann pool[%d]: allocate %p, "
|
539
|
+
"pool_size = %5u MB, "
|
540
|
+
"size = %5u MB\n",
|
541
|
+
device, b.ptr,
|
542
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
543
|
+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
270
544
|
#endif
|
271
|
-
|
545
|
+
return b.ptr;
|
546
|
+
}
|
547
|
+
|
548
|
+
GGML_ABORT("cann pool[%d]: slots full\n", device);
|
272
549
|
}
|
273
550
|
|
274
551
|
/**
|
@@ -278,18 +555,24 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
278
555
|
* @param size Size of the buffer to free.
|
279
556
|
*/
|
280
557
|
void free(void* ptr, size_t size) override {
|
558
|
+
GGML_UNUSED(size);
|
281
559
|
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
282
560
|
ggml_cann_buffer& b = buffer_pool[i];
|
283
|
-
if (b.ptr
|
284
|
-
|
285
|
-
b.size = size;
|
286
|
-
return;
|
561
|
+
if (b.ptr != ptr) {
|
562
|
+
continue;
|
287
563
|
}
|
564
|
+
b.used = false;
|
565
|
+
b.last_used = std::chrono::steady_clock::now();
|
566
|
+
#ifdef DEBUG_CANN_MALLOC
|
567
|
+
GGML_LOG_INFO(
|
568
|
+
"cann pool[%d]: return %p, "
|
569
|
+
"pool_size = %5u MB\n",
|
570
|
+
device, b.ptr,
|
571
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
572
|
+
#endif
|
573
|
+
return;
|
288
574
|
}
|
289
|
-
|
290
|
-
// tasks in stream.
|
291
|
-
// TODO, fix me.
|
292
|
-
GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
|
575
|
+
GGML_ABORT("cann pool[%d]: slots full\n", device);
|
293
576
|
}
|
294
577
|
};
|
295
578
|
|
@@ -347,8 +630,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
347
630
|
* @param device The device ID to associate with this buffer pool.
|
348
631
|
*/
|
349
632
|
explicit ggml_cann_pool_vmm(int device)
|
350
|
-
|
351
|
-
granularity(ggml_cann_info().devices[device].vmm_granularity) {
|
633
|
+
: device(device) {
|
352
634
|
auto dev = ggml_cann_info().devices[device];
|
353
635
|
granularity = dev.vmm_granularity;
|
354
636
|
max_size = dev.total_vram;
|
@@ -471,7 +753,20 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
471
753
|
*/
|
472
754
|
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
473
755
|
int device) {
|
474
|
-
|
756
|
+
std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
|
757
|
+
|
758
|
+
if (mem_pool_type == "prio") {
|
759
|
+
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
|
760
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
|
761
|
+
}
|
762
|
+
|
763
|
+
if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
|
764
|
+
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
|
765
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
766
|
+
}
|
767
|
+
|
768
|
+
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
|
769
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
|
475
770
|
}
|
476
771
|
|
477
772
|
// cann buffer
|
@@ -796,14 +1091,14 @@ static bool need_transform(ggml_type type) {
|
|
796
1091
|
* @param buffer The CANN buffer from which to initialize the tensor.
|
797
1092
|
* @param tensor Pointer to the tensor to be initialized.
|
798
1093
|
*/
|
799
|
-
static
|
1094
|
+
static enum ggml_status ggml_backend_cann_buffer_init_tensor(
|
800
1095
|
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
801
1096
|
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
802
1097
|
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
803
|
-
return;
|
1098
|
+
return GGML_STATUS_SUCCESS;
|
804
1099
|
}
|
805
1100
|
|
806
|
-
// TODO:
|
1101
|
+
// TODO: cann backend doesn't support quantized yet. Just leave the code
|
807
1102
|
// here.
|
808
1103
|
if (ggml_is_quantized(tensor->type)) {
|
809
1104
|
// Initialize padding to 0 to avoid possible NaN values
|
@@ -817,6 +1112,7 @@ static void ggml_backend_cann_buffer_init_tensor(
|
|
817
1112
|
memset_size, 0, memset_size));
|
818
1113
|
}
|
819
1114
|
}
|
1115
|
+
return GGML_STATUS_SUCCESS;
|
820
1116
|
}
|
821
1117
|
|
822
1118
|
// TODO: need handle tensor which has paddings.
|
@@ -1019,8 +1315,11 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
1019
1315
|
|
1020
1316
|
ggml_cann_set_device(buft_ctx->device);
|
1021
1317
|
|
1022
|
-
|
1023
|
-
|
1318
|
+
const size_t alignment = 128;
|
1319
|
+
size = GGML_PAD(size, alignment);
|
1320
|
+
if (size == 0) {
|
1321
|
+
size = alignment;
|
1322
|
+
}
|
1024
1323
|
void* dev_ptr;
|
1025
1324
|
aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST);
|
1026
1325
|
if (err != ACL_SUCCESS) {
|
@@ -1299,47 +1598,69 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
1299
1598
|
ggml_cann_dup(ctx, dst);
|
1300
1599
|
break;
|
1301
1600
|
case GGML_OP_ADD:
|
1302
|
-
|
1601
|
+
case GGML_OP_ADD1:
|
1602
|
+
ggml_cann_binary_op<aclnn_add>(ctx, dst);
|
1603
|
+
break;
|
1604
|
+
case GGML_OP_SUB:
|
1605
|
+
ggml_cann_binary_op<aclnn_sub>(ctx, dst);
|
1303
1606
|
break;
|
1304
1607
|
case GGML_OP_ACC:
|
1305
1608
|
ggml_cann_acc(ctx, dst);
|
1306
1609
|
break;
|
1307
1610
|
case GGML_OP_MUL:
|
1308
|
-
|
1611
|
+
ggml_cann_binary_op<aclnn_mul>(ctx, dst);
|
1309
1612
|
break;
|
1310
1613
|
case GGML_OP_DIV:
|
1311
|
-
|
1614
|
+
ggml_cann_binary_op<aclnn_div>(ctx, dst);
|
1312
1615
|
break;
|
1313
1616
|
case GGML_OP_UNARY:
|
1314
1617
|
switch (ggml_get_unary_op(dst)) {
|
1618
|
+
case GGML_UNARY_OP_ABS:
|
1619
|
+
GGML_CANN_CALL_UNARY_OP(Abs);
|
1620
|
+
break;
|
1621
|
+
case GGML_UNARY_OP_NEG:
|
1622
|
+
GGML_CANN_CALL_UNARY_OP(Neg);
|
1623
|
+
break;
|
1315
1624
|
case GGML_UNARY_OP_GELU:
|
1316
|
-
|
1317
|
-
ctx, dst);
|
1625
|
+
GGML_CANN_CALL_UNARY_OP(Gelu);
|
1318
1626
|
break;
|
1319
1627
|
case GGML_UNARY_OP_SILU:
|
1320
|
-
|
1321
|
-
ctx, dst);
|
1322
|
-
break;
|
1323
|
-
// TODO: Use faster gelu??
|
1324
|
-
case GGML_UNARY_OP_GELU_QUICK:
|
1325
|
-
ggml_cann_activation<aclnnGeluGetWorkspaceSize, aclnnGelu>(
|
1326
|
-
ctx, dst);
|
1628
|
+
GGML_CANN_CALL_UNARY_OP(Silu);
|
1327
1629
|
break;
|
1630
|
+
case GGML_UNARY_OP_GELU_QUICK: {
|
1631
|
+
auto lambda = [](ggml_backend_cann_context& ctx,
|
1632
|
+
aclTensor* acl_src,
|
1633
|
+
aclTensor* acl_dst) {
|
1634
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, GeluV2, acl_src, 0, acl_dst);
|
1635
|
+
};
|
1636
|
+
ggml_cann_unary_op(lambda, ctx, dst);
|
1637
|
+
} break;
|
1328
1638
|
case GGML_UNARY_OP_TANH:
|
1329
|
-
|
1330
|
-
ctx, dst);
|
1639
|
+
GGML_CANN_CALL_UNARY_OP(Tanh);
|
1331
1640
|
break;
|
1332
1641
|
case GGML_UNARY_OP_RELU:
|
1333
|
-
|
1334
|
-
|
1642
|
+
GGML_CANN_CALL_UNARY_OP(Relu);
|
1643
|
+
break;
|
1644
|
+
case GGML_UNARY_OP_SIGMOID:
|
1645
|
+
GGML_CANN_CALL_UNARY_OP(Sigmoid);
|
1335
1646
|
break;
|
1336
1647
|
case GGML_UNARY_OP_HARDSIGMOID:
|
1337
|
-
|
1338
|
-
aclnnHardsigmoid>(ctx, dst);
|
1648
|
+
GGML_CANN_CALL_UNARY_OP(Hardsigmoid);
|
1339
1649
|
break;
|
1340
1650
|
case GGML_UNARY_OP_HARDSWISH:
|
1341
|
-
|
1342
|
-
|
1651
|
+
GGML_CANN_CALL_UNARY_OP(Hardswish);
|
1652
|
+
break;
|
1653
|
+
case GGML_UNARY_OP_EXP:
|
1654
|
+
GGML_CANN_CALL_UNARY_OP(Exp);
|
1655
|
+
break;
|
1656
|
+
case GGML_UNARY_OP_ELU:
|
1657
|
+
ggml_cann_elu(ctx, dst);
|
1658
|
+
break;
|
1659
|
+
case GGML_UNARY_OP_SGN:
|
1660
|
+
GGML_CANN_CALL_UNARY_OP(Sign);
|
1661
|
+
break;
|
1662
|
+
case GGML_UNARY_OP_STEP:
|
1663
|
+
ggml_cann_step(ctx, dst);
|
1343
1664
|
break;
|
1344
1665
|
default:
|
1345
1666
|
return false;
|
@@ -1376,12 +1697,18 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
1376
1697
|
ggml_cann_mul_mat(ctx, dst);
|
1377
1698
|
break;
|
1378
1699
|
case GGML_OP_MUL_MAT_ID:
|
1379
|
-
|
1700
|
+
ggml_cann_mul_mat_id(ctx, dst);
|
1701
|
+
break;
|
1380
1702
|
case GGML_OP_SCALE:
|
1381
1703
|
ggml_cann_scale(ctx, dst);
|
1382
1704
|
break;
|
1383
1705
|
case GGML_OP_SQR:
|
1384
|
-
|
1706
|
+
GGML_ASSERT(dst->src[1] == nullptr);
|
1707
|
+
dst->src[1] = dst->src[0];
|
1708
|
+
ggml_cann_binary_op<aclnn_mul>(ctx, dst);
|
1709
|
+
break;
|
1710
|
+
case GGML_OP_SQRT:
|
1711
|
+
GGML_CANN_CALL_UNARY_OP(Sqrt);
|
1385
1712
|
break;
|
1386
1713
|
case GGML_OP_CLAMP:
|
1387
1714
|
ggml_cann_clamp(ctx, dst);
|
@@ -1413,12 +1740,42 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
1413
1740
|
case GGML_OP_POOL_2D:
|
1414
1741
|
ggml_cann_pool2d(ctx, dst);
|
1415
1742
|
break;
|
1743
|
+
case GGML_OP_SUM:
|
1744
|
+
ggml_cann_sum(ctx, dst);
|
1745
|
+
break;
|
1416
1746
|
case GGML_OP_SUM_ROWS:
|
1417
1747
|
ggml_cann_sum_rows(ctx, dst);
|
1418
1748
|
break;
|
1419
1749
|
case GGML_OP_ARGSORT:
|
1420
1750
|
ggml_cann_argsort(ctx, dst);
|
1421
1751
|
break;
|
1752
|
+
case GGML_OP_ARGMAX:
|
1753
|
+
ggml_cann_argmax(ctx, dst);
|
1754
|
+
break;
|
1755
|
+
case GGML_OP_COS:
|
1756
|
+
ggml_cann_unary_op<aclnn_cos>(ctx, dst);
|
1757
|
+
break;
|
1758
|
+
case GGML_OP_SIN:
|
1759
|
+
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
1760
|
+
break;
|
1761
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
1762
|
+
ggml_cann_conv_transpose_1d(ctx, dst);
|
1763
|
+
break;
|
1764
|
+
case GGML_OP_LOG:
|
1765
|
+
GGML_CANN_CALL_UNARY_OP(Log);
|
1766
|
+
break;
|
1767
|
+
case GGML_OP_MEAN:
|
1768
|
+
ggml_cann_mean(ctx, dst);
|
1769
|
+
break;
|
1770
|
+
case GGML_OP_PAD_REFLECT_1D:
|
1771
|
+
ggml_cann_pad_reflect_1d(ctx, dst);
|
1772
|
+
break;
|
1773
|
+
case GGML_OP_COUNT_EQUAL:
|
1774
|
+
ggml_cann_count_equal(ctx, dst);
|
1775
|
+
break;
|
1776
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
1777
|
+
ggml_cann_flash_attn_ext(ctx, dst);
|
1778
|
+
break;
|
1422
1779
|
default:
|
1423
1780
|
return false;
|
1424
1781
|
}
|
@@ -1457,21 +1814,15 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|
1457
1814
|
ACL_CHECK(aclrtSynchronizeDevice());
|
1458
1815
|
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
|
1459
1816
|
|
1460
|
-
// finalize when last backend freed.
|
1461
|
-
if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) {
|
1462
|
-
ACL_CHECK(aclFinalize());
|
1463
|
-
}
|
1464
|
-
|
1465
1817
|
delete cann_ctx;
|
1466
1818
|
delete backend;
|
1467
1819
|
}
|
1468
1820
|
|
1821
|
+
|
1469
1822
|
/**
|
1470
1823
|
* @brief Sets tensor data asynchronously in the CANN backend.
|
1471
1824
|
*
|
1472
|
-
* This function asynchronously sets tensor data in the CANN backend.
|
1473
|
-
* on the tensor type, it may perform data transformations before copying data
|
1474
|
-
* to the device.
|
1825
|
+
* This function asynchronously sets tensor data in the CANN backend.
|
1475
1826
|
*
|
1476
1827
|
* @param backend Pointer to the CANN backend structure.
|
1477
1828
|
* @param tensor Pointer to the tensor structure to set data for.
|
@@ -1486,23 +1837,28 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
|
1486
1837
|
size_t size) {
|
1487
1838
|
ggml_backend_cann_context *cann_ctx =
|
1488
1839
|
(ggml_backend_cann_context *)backend->context;
|
1840
|
+
ggml_backend_buffer_t buf =
|
1841
|
+
tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
1489
1842
|
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
cann_ctx->stream()));
|
1494
|
-
} else {
|
1495
|
-
void *transform_buffer = malloc(size);
|
1496
|
-
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
1843
|
+
GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) &&
|
1844
|
+
"unsupported buffer type");
|
1845
|
+
GGML_ASSERT(!ggml_is_quantized(tensor->type));
|
1497
1846
|
|
1498
|
-
|
1499
|
-
|
1500
|
-
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
|
1501
|
-
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
1502
|
-
free(transform_buffer);
|
1503
|
-
}
|
1847
|
+
ggml_cann_async_memcpy(cann_ctx, (char *)tensor->data + offset, data, size,
|
1848
|
+
ACL_MEMCPY_HOST_TO_DEVICE);
|
1504
1849
|
}
|
1505
1850
|
|
1851
|
+
/**
|
1852
|
+
* @brief Gets tensor data asynchronously in the CANN backend.
|
1853
|
+
*
|
1854
|
+
* This function asynchronously gets tensor data in the CANN backend.
|
1855
|
+
*
|
1856
|
+
* @param backend Pointer to the CANN backend structure.
|
1857
|
+
* @param tensor Pointer to the tensor structure to get data from.
|
1858
|
+
* @param data Pointer to the host data to copy from the tensor.
|
1859
|
+
* @param offset Offset in bytes within the host data.
|
1860
|
+
* @param size Size of the data to copy in bytes.
|
1861
|
+
*/
|
1506
1862
|
static void ggml_backend_cann_get_tensor_async(
|
1507
1863
|
ggml_backend_t backend, const ggml_tensor *tensor, void *data,
|
1508
1864
|
size_t offset, size_t size) {
|
@@ -1513,20 +1869,11 @@ static void ggml_backend_cann_get_tensor_async(
|
|
1513
1869
|
|
1514
1870
|
GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) &&
|
1515
1871
|
"unsupported buffer type");
|
1872
|
+
GGML_ASSERT(!ggml_is_quantized(tensor->type));
|
1873
|
+
|
1874
|
+
ggml_cann_async_memcpy(cann_ctx, data, (char *)tensor->data + offset, size,
|
1875
|
+
ACL_MEMCPY_DEVICE_TO_HOST);
|
1516
1876
|
|
1517
|
-
if (!need_transform(tensor->type)) {
|
1518
|
-
ACL_CHECK(aclrtMemcpyAsync(data, size, (char *)tensor->data + offset,
|
1519
|
-
size, ACL_MEMCPY_DEVICE_TO_HOST,
|
1520
|
-
cann_ctx->stream()));
|
1521
|
-
} else {
|
1522
|
-
void *transform_buffer = malloc(size);
|
1523
|
-
ACL_CHECK(aclrtMemcpyAsync(
|
1524
|
-
transform_buffer, size, (char *)tensor->data + offset, size,
|
1525
|
-
ACL_MEMCPY_DEVICE_TO_HOST, cann_ctx->stream()));
|
1526
|
-
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
1527
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer, data);
|
1528
|
-
free(transform_buffer);
|
1529
|
-
}
|
1530
1877
|
}
|
1531
1878
|
|
1532
1879
|
/**
|
@@ -1586,6 +1933,8 @@ static bool ggml_backend_cann_cpy_tensor_async(
|
|
1586
1933
|
ggml_cann_set_device(cann_ctx_src->device);
|
1587
1934
|
ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_dst->device, 0));
|
1588
1935
|
|
1936
|
+
// wait for task_queue empty to keep task order.
|
1937
|
+
cann_ctx_src->task_queue.wait();
|
1589
1938
|
ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size,
|
1590
1939
|
ACL_MEMCPY_DEVICE_TO_DEVICE,
|
1591
1940
|
cann_ctx_src->stream()));
|
@@ -1613,9 +1962,8 @@ static bool ggml_backend_cann_cpy_tensor_async(
|
|
1613
1962
|
static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
1614
1963
|
ggml_backend_cann_context* cann_ctx =
|
1615
1964
|
(ggml_backend_cann_context*)backend->context;
|
1616
|
-
|
1965
|
+
cann_ctx->task_queue.wait();
|
1617
1966
|
ggml_cann_set_device(cann_ctx->device);
|
1618
|
-
|
1619
1967
|
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
1620
1968
|
}
|
1621
1969
|
|
@@ -1674,58 +2022,86 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1674
2022
|
switch (op->op) {
|
1675
2023
|
case GGML_OP_UNARY:
|
1676
2024
|
switch (ggml_get_unary_op(op)) {
|
2025
|
+
case GGML_UNARY_OP_ABS:
|
2026
|
+
case GGML_UNARY_OP_NEG:
|
1677
2027
|
case GGML_UNARY_OP_GELU:
|
1678
2028
|
case GGML_UNARY_OP_SILU:
|
1679
2029
|
case GGML_UNARY_OP_RELU:
|
2030
|
+
case GGML_UNARY_OP_SIGMOID:
|
1680
2031
|
case GGML_UNARY_OP_HARDSIGMOID:
|
1681
2032
|
case GGML_UNARY_OP_HARDSWISH:
|
1682
2033
|
case GGML_UNARY_OP_GELU_QUICK:
|
1683
2034
|
case GGML_UNARY_OP_TANH:
|
2035
|
+
case GGML_UNARY_OP_EXP:
|
2036
|
+
case GGML_UNARY_OP_ELU:
|
2037
|
+
case GGML_UNARY_OP_SGN:
|
2038
|
+
case GGML_UNARY_OP_STEP:
|
1684
2039
|
return true;
|
1685
2040
|
default:
|
1686
2041
|
return false;
|
1687
2042
|
}
|
1688
2043
|
case GGML_OP_MUL_MAT: {
|
1689
2044
|
switch (op->src[0]->type) {
|
1690
|
-
case GGML_TYPE_Q8_0:
|
1691
|
-
// Current groupsize should not be greater than k-1 in
|
1692
|
-
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
|
1693
|
-
if (op->src[0]->ne[0] <= QK8_0) {
|
1694
|
-
return false;
|
1695
|
-
}
|
1696
2045
|
case GGML_TYPE_F16:
|
1697
2046
|
case GGML_TYPE_F32:
|
1698
|
-
case GGML_TYPE_Q4_0:
|
1699
2047
|
return true;
|
2048
|
+
case GGML_TYPE_Q8_0:
|
2049
|
+
case GGML_TYPE_Q4_0:
|
2050
|
+
#ifdef ASCEND_310P
|
2051
|
+
// Q4 && Q8 per group is not suppor on 310p device
|
2052
|
+
return false;
|
2053
|
+
#endif
|
2054
|
+
// only support contiguous for quantized types.
|
2055
|
+
return ggml_is_contiguous(op->src[0]) &&
|
2056
|
+
ggml_is_contiguous(op->src[1]);
|
1700
2057
|
default:
|
1701
2058
|
return false;
|
1702
2059
|
}
|
1703
2060
|
}
|
1704
2061
|
case GGML_OP_MUL_MAT_ID:
|
1705
|
-
return false;
|
1706
|
-
// embedding
|
1707
|
-
case GGML_OP_GET_ROWS: {
|
1708
2062
|
switch (op->src[0]->type) {
|
1709
|
-
case GGML_TYPE_F32:
|
1710
2063
|
case GGML_TYPE_F16:
|
1711
|
-
case
|
1712
|
-
case GGML_TYPE_Q8_0:
|
2064
|
+
case GGML_TYPE_F32:
|
1713
2065
|
return true;
|
2066
|
+
case GGML_TYPE_Q8_0:
|
2067
|
+
case GGML_TYPE_Q4_0:
|
2068
|
+
#ifdef ASCEND_310P
|
2069
|
+
// Q4 && Q8 per group is not suppor on 310p device
|
2070
|
+
return false;
|
2071
|
+
#endif
|
2072
|
+
// only support contiguous for quantized types.
|
2073
|
+
return ggml_is_contiguous(op->src[0]) &&
|
2074
|
+
ggml_is_contiguous(op->src[1]);
|
1714
2075
|
default:
|
1715
2076
|
return false;
|
1716
2077
|
}
|
1717
|
-
|
1718
|
-
case
|
1719
|
-
switch (op->type) {
|
2078
|
+
// embedding
|
2079
|
+
case GGML_OP_GET_ROWS: {
|
2080
|
+
switch (op->src[0]->type) {
|
1720
2081
|
case GGML_TYPE_F32:
|
1721
2082
|
case GGML_TYPE_F16:
|
1722
2083
|
case GGML_TYPE_Q8_0:
|
1723
|
-
case GGML_TYPE_Q4_0:
|
1724
2084
|
return true;
|
1725
2085
|
default:
|
1726
2086
|
return false;
|
1727
2087
|
}
|
1728
|
-
}
|
2088
|
+
} break;
|
2089
|
+
case GGML_OP_CPY: {
|
2090
|
+
ggml_tensor *src = op->src[0];
|
2091
|
+
if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
|
2092
|
+
(src->type != GGML_TYPE_F32 &&
|
2093
|
+
src->type != GGML_TYPE_F16)) {
|
2094
|
+
// only support F32 and F16.
|
2095
|
+
return false;
|
2096
|
+
}
|
2097
|
+
|
2098
|
+
if (!ggml_are_same_shape(op, src) && !ggml_is_contiguous(op)) {
|
2099
|
+
// unsupport dst is not contiguous.
|
2100
|
+
return false;
|
2101
|
+
}
|
2102
|
+
|
2103
|
+
return true;
|
2104
|
+
} break;
|
1729
2105
|
case GGML_OP_CONT: {
|
1730
2106
|
// TODO: support GGML_TYPE_BF16
|
1731
2107
|
switch (op->src[0]->type) {
|
@@ -1738,13 +2114,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1738
2114
|
}
|
1739
2115
|
case GGML_OP_ROPE: {
|
1740
2116
|
// TODO: with ops-test v == 1
|
1741
|
-
float
|
2117
|
+
float ext_factor = 0.0f;
|
2118
|
+
memcpy(&ext_factor, (const float *) op->op_params + 7, sizeof(float));
|
1742
2119
|
// TODO: n_dims <= ne0
|
1743
2120
|
if (op->src[0]->ne[0] != op->op_params[1]) {
|
1744
2121
|
return false;
|
1745
2122
|
}
|
1746
2123
|
// TODO: ext_factor != 0
|
1747
|
-
if (
|
2124
|
+
if (ext_factor != 0) {
|
1748
2125
|
return false;
|
1749
2126
|
}
|
1750
2127
|
|
@@ -1756,6 +2133,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1756
2133
|
return false;
|
1757
2134
|
}
|
1758
2135
|
|
2136
|
+
if(!ggml_is_contiguous(op->src[0])){
|
2137
|
+
return false;
|
2138
|
+
}
|
1759
2139
|
return true;
|
1760
2140
|
}
|
1761
2141
|
case GGML_OP_UPSCALE: {
|
@@ -1764,11 +2144,31 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1764
2144
|
if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
|
1765
2145
|
return false;
|
1766
2146
|
}
|
2147
|
+
if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
|
2148
|
+
return false;
|
2149
|
+
}
|
1767
2150
|
return true;
|
1768
2151
|
}
|
2152
|
+
case GGML_OP_POOL_2D: {
|
2153
|
+
const int32_t * opts = (const int32_t *) op->op_params;
|
2154
|
+
#ifdef ASCEND_310P
|
2155
|
+
enum ggml_op_pool opt = static_cast<ggml_op_pool>(opts[0]);
|
2156
|
+
if(opt == GGML_OP_POOL_MAX){
|
2157
|
+
return false;
|
2158
|
+
}
|
2159
|
+
#endif
|
2160
|
+
const int k0 = opts[1];
|
2161
|
+
const int k1 = opts[2];
|
2162
|
+
const int p0 = opts[5];
|
2163
|
+
const int p1 = opts[6];
|
2164
|
+
// value of paddingH should be at most half of kernelH
|
2165
|
+
// value of paddingW should be at most half of kernelW
|
2166
|
+
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
|
2167
|
+
}
|
2168
|
+
case GGML_OP_SUM:
|
2169
|
+
case GGML_OP_DUP:
|
1769
2170
|
case GGML_OP_IM2COL:
|
1770
2171
|
case GGML_OP_CONCAT:
|
1771
|
-
case GGML_OP_DUP:
|
1772
2172
|
case GGML_OP_REPEAT:
|
1773
2173
|
case GGML_OP_NONE:
|
1774
2174
|
case GGML_OP_RESHAPE:
|
@@ -1777,15 +2177,17 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1777
2177
|
case GGML_OP_TRANSPOSE:
|
1778
2178
|
case GGML_OP_NORM:
|
1779
2179
|
case GGML_OP_ADD:
|
2180
|
+
case GGML_OP_ADD1:
|
2181
|
+
case GGML_OP_SUB:
|
1780
2182
|
case GGML_OP_MUL:
|
1781
2183
|
case GGML_OP_DIV:
|
1782
2184
|
case GGML_OP_RMS_NORM:
|
1783
2185
|
case GGML_OP_SCALE:
|
1784
2186
|
case GGML_OP_SQR:
|
2187
|
+
case GGML_OP_SQRT:
|
1785
2188
|
case GGML_OP_CLAMP:
|
1786
2189
|
case GGML_OP_DIAG_MASK_INF:
|
1787
2190
|
case GGML_OP_SOFT_MAX:
|
1788
|
-
case GGML_OP_POOL_2D:
|
1789
2191
|
case GGML_OP_SUM_ROWS:
|
1790
2192
|
case GGML_OP_ARGSORT:
|
1791
2193
|
case GGML_OP_ACC:
|
@@ -1794,7 +2196,47 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1794
2196
|
case GGML_OP_ARANGE:
|
1795
2197
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
1796
2198
|
case GGML_OP_LEAKY_RELU:
|
2199
|
+
case GGML_OP_ARGMAX:
|
2200
|
+
case GGML_OP_COS:
|
2201
|
+
case GGML_OP_SIN:
|
2202
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
2203
|
+
case GGML_OP_LOG:
|
2204
|
+
case GGML_OP_MEAN:
|
2205
|
+
case GGML_OP_PAD_REFLECT_1D:
|
2206
|
+
case GGML_OP_COUNT_EQUAL:
|
1797
2207
|
return true;
|
2208
|
+
case GGML_OP_FLASH_ATTN_EXT:{
|
2209
|
+
// derived from [ggml-cuda.cu]
|
2210
|
+
if(op->src[1]->type != GGML_TYPE_F16 || op->src[2]->type != GGML_TYPE_F16){
|
2211
|
+
return false;
|
2212
|
+
}
|
2213
|
+
if(op->src[1]->type != GGML_TYPE_F16 && op->src[1]->type != GGML_TYPE_F32 && op->src[1]->type != GGML_TYPE_BF16){
|
2214
|
+
return false;
|
2215
|
+
}
|
2216
|
+
if(op->type != GGML_TYPE_F16 && op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_BF16){
|
2217
|
+
return false;
|
2218
|
+
}
|
2219
|
+
if (op->src[1]->ne[0] != op->src[2]->ne[0]) {
|
2220
|
+
// different head sizes of K and V are not supported yet
|
2221
|
+
return false;
|
2222
|
+
}
|
2223
|
+
if (op->src[0]->ne[0] == 192) {
|
2224
|
+
return false;
|
2225
|
+
}
|
2226
|
+
if (op->src[0]->ne[0] == 576) {
|
2227
|
+
// DeepSeek MLA
|
2228
|
+
return false;
|
2229
|
+
}
|
2230
|
+
if (op->src[0]->ne[3] != 1) {
|
2231
|
+
return false;
|
2232
|
+
}
|
2233
|
+
float logitSoftcap = 0.0f;
|
2234
|
+
memcpy(&logitSoftcap, (float*)op->op_params + 2, sizeof(float));
|
2235
|
+
if(logitSoftcap != 0.0f) {
|
2236
|
+
return false;
|
2237
|
+
}
|
2238
|
+
return true;
|
2239
|
+
}
|
1798
2240
|
default:
|
1799
2241
|
return false;
|
1800
2242
|
}
|