whispercpp 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -3
- data/README.md +92 -31
- data/Rakefile +26 -7
- data/ext/.gitignore +5 -7
- data/ext/dependencies.rb +61 -0
- data/ext/extconf.rb +21 -198
- data/ext/options.rb +221 -0
- data/ext/ruby_whisper.c +159 -0
- data/ext/ruby_whisper.h +17 -2
- data/ext/ruby_whisper_context.c +641 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1301 -0
- data/ext/ruby_whisper_segment.c +143 -0
- data/ext/ruby_whisper_transcribe.cpp +87 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/.dockerignore +3 -0
- data/ext/sources/.github/workflows/bindings-ruby.yml +21 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +251 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/ci/run.sh +336 -0
- data/ext/sources/close-issue.yml +28 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +37 -0
- data/ext/sources/examples/addon.node/addon.cpp +438 -0
- data/ext/sources/examples/addon.node/index.js +54 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +175 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1294 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +776 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +168 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +467 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +223 -0
- data/ext/sources/examples/server/CMakeLists.txt +12 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1091 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +429 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +40 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1746 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +437 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +374 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +89 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +663 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +58 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2676 -0
- data/ext/sources/examples/talk-llama/llama-context.h +276 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1618 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +640 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +95 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +190 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2739 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +502 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +379 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +32 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1138 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +281 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +13814 -0
- data/ext/sources/examples/talk-llama/llama-model.h +425 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +966 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3340 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +131 -0
- data/ext/sources/examples/talk-llama/llama.cpp +354 -0
- data/ext/sources/examples/talk-llama/llama.h +1377 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +808 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +849 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +143 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +249 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +390 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +26 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-backend.h +9 -7
- data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
- data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +9 -1
- data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
- data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
- data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
- data/ext/{ggml → sources/ggml}/include/ggml.h +182 -265
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +346 -0
- data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
- data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +87 -53
- data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +26 -14
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +74 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
- data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +135 -1
- data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +564 -146
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
- data/ext/{ggml → sources/ggml}/src/ggml-common.h +12 -8
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +504 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +2 -1
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +72 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/cpu-feats-x86.cpp +5 -1
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +163 -41
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.c +4029 -1117
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3510 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +67 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +8903 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +110 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +252 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +818 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +828 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +730 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +26 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1471 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3505 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +336 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +289 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +59 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +131 -0
- data/ext/{ggml → sources/ggml}/src/ggml-impl.h +64 -19
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +120 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2178 -1064
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +1575 -1218
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +5124 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
- data/ext/{ggml → sources/ggml}/src/ggml-quants.c +114 -120
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +480 -73
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +345 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +589 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +32 -33
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +4 -2
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/convert.cpp +104 -28
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +700 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +791 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +156 -17
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +75 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +99 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1004 -1240
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +0 -1
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmvq.cpp +261 -166
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/norm.cpp +204 -81
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +83 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +361 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +35 -25
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1215 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +293 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +196 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3130 -1087
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +193 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/{ggml → sources/ggml}/src/ggml.c +676 -1820
- data/ext/sources/ggml/src/gguf.cpp +1330 -0
- data/ext/{include → sources/include}/whisper.h +68 -2
- data/ext/sources/src/CMakeLists.txt +143 -0
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +35 -10
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +28 -3
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{src → sources/src}/whisper.cpp +1905 -374
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +33 -5
- data/lib/whisper/model/uri.rb +149 -128
- data/sig/whisper.rbs +480 -0
- data/tests/helper.rb +28 -0
- data/tests/test_callback.rb +45 -3
- data/tests/test_error.rb +2 -2
- data/tests/test_model.rb +38 -0
- data/tests/test_package.rb +18 -3
- data/tests/test_params.rb +145 -8
- data/tests/test_segment.rb +10 -19
- data/tests/test_vad.rb +19 -0
- data/tests/test_vad_params.rb +103 -0
- data/tests/test_whisper.rb +37 -37
- data/whispercpp.gemspec +5 -4
- metadata +766 -111
- data/ext/cpu.mk +0 -9
- data/ext/examples/dr_wav.h +0 -8815
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
- data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
- data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
- data/ext/metal-embed.mk +0 -17
- data/ext/metal.mk +0 -6
- data/ext/ruby_whisper.cpp +0 -1909
- data/ext/scripts/get-flags.mk +0 -38
- data/lib/whisper.rb +0 -2
- /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-aarch64.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
- /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
@@ -29,11 +29,14 @@
|
|
29
29
|
#include <cstdio>
|
30
30
|
#include <cstring>
|
31
31
|
#include <mutex>
|
32
|
+
#include <queue>
|
33
|
+
#include <chrono>
|
32
34
|
|
33
35
|
#include "ggml-impl.h"
|
34
36
|
#include "ggml-backend-impl.h"
|
35
37
|
#include "ggml-cann/aclnn_ops.h"
|
36
38
|
#include "ggml-cann/common.h"
|
39
|
+
#include "ggml.h"
|
37
40
|
|
38
41
|
#define GGML_COMMON_DECL_C
|
39
42
|
|
@@ -119,9 +122,10 @@ static ggml_cann_device_info ggml_cann_init() {
|
|
119
122
|
prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
|
120
123
|
prop.location.id = id;
|
121
124
|
prop.reserve = 0;
|
122
|
-
|
125
|
+
err = aclrtMemGetAllocationGranularity(
|
123
126
|
&prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED,
|
124
|
-
&info.devices[id].vmm_granularity)
|
127
|
+
&info.devices[id].vmm_granularity);
|
128
|
+
info.devices[id].vmm = err == ACL_SUCCESS;
|
125
129
|
|
126
130
|
size_t free, total;
|
127
131
|
ggml_backend_cann_get_device_memory(id, &free, &total);
|
@@ -148,11 +152,223 @@ const ggml_cann_device_info& ggml_cann_info() {
|
|
148
152
|
|
149
153
|
//#define DEBUG_CANN_MALLOC
|
150
154
|
/**
|
151
|
-
* @brief A pool of CANN buffers(
|
155
|
+
* @brief A pool of CANN buffers(priority segment buffer).
|
152
156
|
*
|
153
157
|
* This class manages a pool of CANN buffers for a specific device.
|
154
158
|
*/
|
155
|
-
struct
|
159
|
+
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
160
|
+
/**
|
161
|
+
* @brief The maximum reuse margin for a buffer.
|
162
|
+
*/
|
163
|
+
static const size_t max_reuse_margin = 1ull << 22; // 4MB
|
164
|
+
|
165
|
+
/**
|
166
|
+
* @brief The minimum free margin for a buffer.
|
167
|
+
*/
|
168
|
+
static const size_t min_free_margin = 1ull << 20; // 1MB
|
169
|
+
|
170
|
+
/**
|
171
|
+
* @brief The alignment for buffer allocation.
|
172
|
+
*/
|
173
|
+
static const size_t alignment = 128;
|
174
|
+
|
175
|
+
/**
|
176
|
+
* @brief The device ID associated with this buffer pool.
|
177
|
+
*/
|
178
|
+
int device;
|
179
|
+
|
180
|
+
/**
|
181
|
+
* @brief Whether to disable clean during buffer allocation.
|
182
|
+
*/
|
183
|
+
bool disable_clean = false;
|
184
|
+
|
185
|
+
/**
|
186
|
+
* @brief Structure representing a CANN buffer.
|
187
|
+
*/
|
188
|
+
struct ggml_cann_buffer {
|
189
|
+
void* ptr = nullptr; ///< Pointer to the buffer.
|
190
|
+
size_t size = 0; ///< Size of the buffer.
|
191
|
+
std::chrono::steady_clock::time_point last_used; ///< Last used time.
|
192
|
+
|
193
|
+
bool operator>(const ggml_cann_buffer& other) const {
|
194
|
+
return size > other.size;
|
195
|
+
}
|
196
|
+
};
|
197
|
+
|
198
|
+
/**
|
199
|
+
* @brief Array of CANN buffers in the pool.
|
200
|
+
*/
|
201
|
+
std::unordered_map<void*, size_t> buffer_pool;
|
202
|
+
std::priority_queue<ggml_cann_buffer,
|
203
|
+
std::vector<ggml_cann_buffer>,
|
204
|
+
std::greater<>> free_buffers ;
|
205
|
+
|
206
|
+
/**
|
207
|
+
* @brief Total size of all buffers in the pool.
|
208
|
+
*/
|
209
|
+
size_t pool_size = 0;
|
210
|
+
|
211
|
+
/**
|
212
|
+
* @brief Constructor to initialize the buffer pool for a specific device.
|
213
|
+
*
|
214
|
+
* @param device The device ID to associate with this buffer pool.
|
215
|
+
*/
|
216
|
+
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
217
|
+
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
|
218
|
+
}
|
219
|
+
|
220
|
+
/**
|
221
|
+
* @brief Destructor to free all buffers in the pool.
|
222
|
+
*/
|
223
|
+
~ggml_cann_pool_buf_prio() {
|
224
|
+
ggml_cann_set_device(device);
|
225
|
+
for (auto& [b_ptr, b_size] : buffer_pool) {
|
226
|
+
aclrtFree(b_ptr);
|
227
|
+
pool_size -= b_size;
|
228
|
+
}
|
229
|
+
buffer_pool.clear();
|
230
|
+
GGML_ASSERT(pool_size == 0);
|
231
|
+
}
|
232
|
+
|
233
|
+
/**
|
234
|
+
* @brief Allocate a buffer of the given size.
|
235
|
+
*
|
236
|
+
* @param size The size of the buffer to allocate.
|
237
|
+
* @param actual_size A pointer to a variable to receive the actual size of
|
238
|
+
* the allocated buffer.
|
239
|
+
* @return A pointer to the allocated buffer.
|
240
|
+
*/
|
241
|
+
void* alloc(size_t size, size_t* actual_size) override {
|
242
|
+
size = GGML_PAD(size, alignment);
|
243
|
+
if (size == 0) {
|
244
|
+
size = alignment;
|
245
|
+
}
|
246
|
+
|
247
|
+
void* ptr = nullptr;
|
248
|
+
auto now = std::chrono::steady_clock::now();
|
249
|
+
|
250
|
+
std::vector<ggml_cann_buffer> free_buffers_rest;
|
251
|
+
free_buffers_rest.reserve(free_buffers.size());
|
252
|
+
while (!free_buffers.empty()) {
|
253
|
+
auto b = free_buffers.top();
|
254
|
+
free_buffers.pop();
|
255
|
+
|
256
|
+
if (b.size >= size) {
|
257
|
+
// reuse the buffer if the size is enough
|
258
|
+
const size_t margin = b.size - size;
|
259
|
+
if (margin <= max_reuse_margin) {
|
260
|
+
*actual_size = b.size;
|
261
|
+
ptr = b.ptr;
|
262
|
+
#ifdef DEBUG_CANN_MALLOC
|
263
|
+
GGML_LOG_INFO(
|
264
|
+
"cann pool[%d]: reused %p, "
|
265
|
+
"pool_size = %5u MB, "
|
266
|
+
"size = %5u MB, "
|
267
|
+
"margin = %5u MB\n",
|
268
|
+
device, b.ptr,
|
269
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
270
|
+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
|
271
|
+
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
|
272
|
+
#endif
|
273
|
+
break;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
bool should_clean = !disable_clean &&
|
278
|
+
b.size > min_free_margin &&
|
279
|
+
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
|
280
|
+
if (should_clean) {
|
281
|
+
// free the buffer if the size is needed to be freed
|
282
|
+
ACL_CHECK(aclrtFree(b.ptr));
|
283
|
+
pool_size -= b.size;
|
284
|
+
buffer_pool.erase(b.ptr);
|
285
|
+
#ifdef DEBUG_CANN_MALLOC
|
286
|
+
GGML_LOG_INFO(
|
287
|
+
"cann pool[%d]: clean %p, "
|
288
|
+
"pool_size = %5u MB, "
|
289
|
+
"size = %5u MB\n",
|
290
|
+
device, b.ptr,
|
291
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
292
|
+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
293
|
+
#endif
|
294
|
+
continue;
|
295
|
+
}
|
296
|
+
free_buffers_rest.push_back(b);
|
297
|
+
}
|
298
|
+
for (ggml_cann_buffer &b : free_buffers_rest) {
|
299
|
+
free_buffers.push(std::move(b));
|
300
|
+
}
|
301
|
+
|
302
|
+
#ifdef DEBUG_CANN_MALLOC
|
303
|
+
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
304
|
+
#endif
|
305
|
+
if (ptr != nullptr) {
|
306
|
+
return ptr;
|
307
|
+
}
|
308
|
+
|
309
|
+
// allocate a new buffer if no buffer can be reused
|
310
|
+
ggml_cann_set_device(device);
|
311
|
+
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
312
|
+
*actual_size = size;
|
313
|
+
pool_size += size;
|
314
|
+
#ifdef DEBUG_CANN_MALLOC
|
315
|
+
GGML_LOG_INFO(
|
316
|
+
"cann pool[%d]: allocate %p, "
|
317
|
+
"pool_size = %5u MB, "
|
318
|
+
"size = %5u MB\n",
|
319
|
+
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
320
|
+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
|
321
|
+
#endif
|
322
|
+
buffer_pool.emplace(ptr, size);
|
323
|
+
return ptr;
|
324
|
+
}
|
325
|
+
|
326
|
+
/**
|
327
|
+
* @brief Free a buffer and return it to the pool.
|
328
|
+
*
|
329
|
+
* @param ptr Pointer to the buffer to free.
|
330
|
+
* @param size Size of the buffer to free.
|
331
|
+
*/
|
332
|
+
void free(void* ptr, size_t size) override {
|
333
|
+
GGML_UNUSED(size);
|
334
|
+
auto it = buffer_pool.find(ptr);
|
335
|
+
if (it == buffer_pool.end()) {
|
336
|
+
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
|
337
|
+
}
|
338
|
+
|
339
|
+
auto now = std::chrono::steady_clock::now();
|
340
|
+
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
|
341
|
+
#ifdef DEBUG_CANN_MALLOC
|
342
|
+
GGML_LOG_INFO(
|
343
|
+
"cann pool[%d]: return %p, "
|
344
|
+
"pool_size = %5u MB\n",
|
345
|
+
device, ptr,
|
346
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
347
|
+
#endif
|
348
|
+
}
|
349
|
+
};
|
350
|
+
|
351
|
+
/**
|
352
|
+
* @brief A pool of CANN buffers(segment buffer).
|
353
|
+
*
|
354
|
+
* This class manages a pool of CANN buffers for a specific device.
|
355
|
+
*/
|
356
|
+
struct ggml_cann_pool_buf : public ggml_cann_pool {
|
357
|
+
/**
|
358
|
+
* @brief The maximum reuse margin for a buffer.
|
359
|
+
*/
|
360
|
+
static const size_t max_reuse_margin = 1ull << 22; // 4MB
|
361
|
+
|
362
|
+
/**
|
363
|
+
* @brief The minimum free margin for a buffer.
|
364
|
+
*/
|
365
|
+
static const size_t min_free_margin = 1ull << 20; // 1MB
|
366
|
+
|
367
|
+
/**
|
368
|
+
* @brief The alignment for buffer allocation.
|
369
|
+
*/
|
370
|
+
static const size_t alignment = 128;
|
371
|
+
|
156
372
|
/**
|
157
373
|
* @brief The maximum number of buffers in the pool.
|
158
374
|
*/
|
@@ -163,12 +379,19 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
163
379
|
*/
|
164
380
|
int device;
|
165
381
|
|
382
|
+
/**
|
383
|
+
* @brief Whether to disable clean during buffer allocation.
|
384
|
+
*/
|
385
|
+
bool disable_clean = false;
|
386
|
+
|
166
387
|
/**
|
167
388
|
* @brief Structure representing a CANN buffer.
|
168
389
|
*/
|
169
390
|
struct ggml_cann_buffer {
|
170
391
|
void* ptr = nullptr; ///< Pointer to the buffer memory.
|
171
392
|
size_t size = 0; ///< Size of the buffer.
|
393
|
+
bool used = false; ///< Whether the buffer is currently in use.
|
394
|
+
std::chrono::steady_clock::time_point last_used; ///< Last used time.
|
172
395
|
};
|
173
396
|
|
174
397
|
/**
|
@@ -186,17 +409,19 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
186
409
|
*
|
187
410
|
* @param device The device ID to associate with this buffer pool.
|
188
411
|
*/
|
189
|
-
explicit
|
412
|
+
explicit ggml_cann_pool_buf(int device) : device(device) {
|
413
|
+
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
|
414
|
+
}
|
190
415
|
|
191
416
|
/**
|
192
417
|
* @brief Destructor to free all buffers in the pool.
|
193
418
|
*/
|
194
|
-
~
|
419
|
+
~ggml_cann_pool_buf() {
|
195
420
|
ggml_cann_set_device(device);
|
196
421
|
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
197
422
|
ggml_cann_buffer& b = buffer_pool[i];
|
198
423
|
if (b.ptr != nullptr) {
|
199
|
-
|
424
|
+
aclrtFree(b.ptr);
|
200
425
|
pool_size -= b.size;
|
201
426
|
}
|
202
427
|
}
|
@@ -212,63 +437,93 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
212
437
|
* @return A pointer to the allocated buffer.
|
213
438
|
*/
|
214
439
|
void* alloc(size_t size, size_t* actual_size) override {
|
215
|
-
const size_t alignment = 128;
|
216
440
|
size = GGML_PAD(size, alignment);
|
217
441
|
if (size == 0) {
|
218
442
|
size = alignment;
|
219
443
|
}
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
444
|
+
|
445
|
+
void* ptr = nullptr;
|
446
|
+
auto now = std::chrono::steady_clock::now();
|
447
|
+
|
448
|
+
int i = 0;
|
449
|
+
for (; i < MAX_BUFFERS; ++i) {
|
227
450
|
ggml_cann_buffer& b = buffer_pool[i];
|
228
|
-
if (b.ptr
|
451
|
+
if (b.ptr == nullptr) {
|
452
|
+
break;
|
453
|
+
}
|
454
|
+
if (b.used) {
|
455
|
+
continue;
|
456
|
+
}
|
457
|
+
if (b.size >= size) {
|
458
|
+
// reuse the buffer if the size is enough
|
459
|
+
const size_t margin = b.size - size;
|
460
|
+
if (margin <= max_reuse_margin) {
|
461
|
+
*actual_size = b.size;
|
462
|
+
b.used = true;
|
463
|
+
ptr = b.ptr;
|
229
464
|
#ifdef DEBUG_CANN_MALLOC
|
230
|
-
|
231
|
-
|
465
|
+
GGML_LOG_INFO(
|
466
|
+
"cann pool[%d]: reused %p, "
|
467
|
+
"pool_size = %5u MB, "
|
468
|
+
"size = %5u MB, "
|
469
|
+
"margin = %5u MB\n",
|
470
|
+
device, b.ptr,
|
471
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
472
|
+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
|
473
|
+
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
|
232
474
|
#endif
|
233
|
-
|
234
|
-
size_t diff = b.size - size;
|
235
|
-
if (diff < best_diff) {
|
236
|
-
best_diff = diff;
|
237
|
-
ibest = i;
|
238
|
-
if (!best_diff) {
|
239
|
-
void* ptr = b.ptr;
|
240
|
-
*actual_size = b.size;
|
241
|
-
b.ptr = nullptr;
|
242
|
-
b.size = 0;
|
243
|
-
return ptr;
|
244
|
-
}
|
245
|
-
}
|
475
|
+
break;
|
246
476
|
}
|
247
477
|
}
|
478
|
+
|
479
|
+
bool should_clean = !disable_clean &&
|
480
|
+
b.size > min_free_margin &&
|
481
|
+
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
|
482
|
+
if (should_clean) {
|
483
|
+
// free the buffer if the size is needed to be freed
|
484
|
+
ACL_CHECK(aclrtFree(b.ptr));
|
485
|
+
pool_size -= b.size;
|
486
|
+
#ifdef DEBUG_CANN_MALLOC
|
487
|
+
GGML_LOG_INFO(
|
488
|
+
"cann pool[%d]: clean %p, "
|
489
|
+
"pool_size = %5u MB, "
|
490
|
+
"size = %5u MB\n",
|
491
|
+
device, b.ptr,
|
492
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
493
|
+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
494
|
+
#endif
|
495
|
+
b.ptr = nullptr;
|
496
|
+
}
|
248
497
|
}
|
249
|
-
if (
|
250
|
-
ggml_cann_buffer& b = buffer_pool[ibest];
|
251
|
-
void* ptr = b.ptr;
|
252
|
-
*actual_size = b.size;
|
253
|
-
b.ptr = nullptr;
|
254
|
-
b.size = 0;
|
498
|
+
if (ptr != nullptr) {
|
255
499
|
return ptr;
|
256
500
|
}
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
501
|
+
|
502
|
+
if (i < MAX_BUFFERS) {
|
503
|
+
// allocate a new buffer if no buffer can be reused
|
504
|
+
ggml_cann_buffer& b = buffer_pool[i];
|
505
|
+
ggml_cann_set_device(device);
|
506
|
+
ACL_CHECK(aclrtMalloc(&b.ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
507
|
+
pool_size += size;
|
508
|
+
*actual_size = size;
|
509
|
+
b.size = size;
|
510
|
+
b.used = true;
|
511
|
+
if (i >= MAX_BUFFERS - 8) {
|
512
|
+
GGML_LOG_WARN("cann pool[%d]: slots almost full\n", device);
|
513
|
+
}
|
263
514
|
#ifdef DEBUG_CANN_MALLOC
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
515
|
+
GGML_LOG_INFO(
|
516
|
+
"cann pool[%d]: allocate %p, "
|
517
|
+
"pool_size = %5u MB, "
|
518
|
+
"size = %5u MB\n",
|
519
|
+
device, b.ptr,
|
520
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
|
521
|
+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
|
270
522
|
#endif
|
271
|
-
|
523
|
+
return b.ptr;
|
524
|
+
}
|
525
|
+
|
526
|
+
GGML_ABORT("cann pool[%d]: slots full\n", device);
|
272
527
|
}
|
273
528
|
|
274
529
|
/**
|
@@ -278,18 +533,24 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
278
533
|
* @param size Size of the buffer to free.
|
279
534
|
*/
|
280
535
|
void free(void* ptr, size_t size) override {
|
536
|
+
GGML_UNUSED(size);
|
281
537
|
for (int i = 0; i < MAX_BUFFERS; ++i) {
|
282
538
|
ggml_cann_buffer& b = buffer_pool[i];
|
283
|
-
if (b.ptr
|
284
|
-
|
285
|
-
b.size = size;
|
286
|
-
return;
|
539
|
+
if (b.ptr != ptr) {
|
540
|
+
continue;
|
287
541
|
}
|
542
|
+
b.used = false;
|
543
|
+
b.last_used = std::chrono::steady_clock::now();
|
544
|
+
#ifdef DEBUG_CANN_MALLOC
|
545
|
+
GGML_LOG_INFO(
|
546
|
+
"cann pool[%d]: return %p, "
|
547
|
+
"pool_size = %5u MB\n",
|
548
|
+
device, b.ptr,
|
549
|
+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
|
550
|
+
#endif
|
551
|
+
return;
|
288
552
|
}
|
289
|
-
|
290
|
-
// tasks in stream.
|
291
|
-
// TODO, fix me.
|
292
|
-
GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
|
553
|
+
GGML_ABORT("cann pool[%d]: slots full\n", device);
|
293
554
|
}
|
294
555
|
};
|
295
556
|
|
@@ -347,8 +608,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
347
608
|
* @param device The device ID to associate with this buffer pool.
|
348
609
|
*/
|
349
610
|
explicit ggml_cann_pool_vmm(int device)
|
350
|
-
|
351
|
-
granularity(ggml_cann_info().devices[device].vmm_granularity) {
|
611
|
+
: device(device) {
|
352
612
|
auto dev = ggml_cann_info().devices[device];
|
353
613
|
granularity = dev.vmm_granularity;
|
354
614
|
max_size = dev.total_vram;
|
@@ -471,7 +731,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
471
731
|
*/
|
472
732
|
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
473
733
|
int device) {
|
474
|
-
|
734
|
+
bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr);
|
735
|
+
if (!disable_vmm && ggml_cann_info().devices[device].vmm) {
|
736
|
+
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
|
737
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
738
|
+
}
|
739
|
+
bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr);
|
740
|
+
if (enable_buf_prio) {
|
741
|
+
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
|
742
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
|
743
|
+
}
|
744
|
+
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
|
745
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
|
475
746
|
}
|
476
747
|
|
477
748
|
// cann buffer
|
@@ -796,14 +1067,14 @@ static bool need_transform(ggml_type type) {
|
|
796
1067
|
* @param buffer The CANN buffer from which to initialize the tensor.
|
797
1068
|
* @param tensor Pointer to the tensor to be initialized.
|
798
1069
|
*/
|
799
|
-
static
|
1070
|
+
static enum ggml_status ggml_backend_cann_buffer_init_tensor(
|
800
1071
|
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
801
1072
|
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
802
1073
|
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
803
|
-
return;
|
1074
|
+
return GGML_STATUS_SUCCESS;
|
804
1075
|
}
|
805
1076
|
|
806
|
-
// TODO:
|
1077
|
+
// TODO: cann backend doesn't support quantized yet. Just leave the code
|
807
1078
|
// here.
|
808
1079
|
if (ggml_is_quantized(tensor->type)) {
|
809
1080
|
// Initialize padding to 0 to avoid possible NaN values
|
@@ -817,6 +1088,7 @@ static void ggml_backend_cann_buffer_init_tensor(
|
|
817
1088
|
memset_size, 0, memset_size));
|
818
1089
|
}
|
819
1090
|
}
|
1091
|
+
return GGML_STATUS_SUCCESS;
|
820
1092
|
}
|
821
1093
|
|
822
1094
|
// TODO: need handle tensor which has paddings.
|
@@ -1019,8 +1291,11 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
1019
1291
|
|
1020
1292
|
ggml_cann_set_device(buft_ctx->device);
|
1021
1293
|
|
1022
|
-
|
1023
|
-
|
1294
|
+
const size_t alignment = 128;
|
1295
|
+
size = GGML_PAD(size, alignment);
|
1296
|
+
if (size == 0) {
|
1297
|
+
size = alignment;
|
1298
|
+
}
|
1024
1299
|
void* dev_ptr;
|
1025
1300
|
aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST);
|
1026
1301
|
if (err != ACL_SUCCESS) {
|
@@ -1299,47 +1574,69 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
1299
1574
|
ggml_cann_dup(ctx, dst);
|
1300
1575
|
break;
|
1301
1576
|
case GGML_OP_ADD:
|
1302
|
-
|
1577
|
+
case GGML_OP_ADD1:
|
1578
|
+
ggml_cann_binary_op<aclnn_add>(ctx, dst);
|
1579
|
+
break;
|
1580
|
+
case GGML_OP_SUB:
|
1581
|
+
ggml_cann_binary_op<aclnn_sub>(ctx, dst);
|
1303
1582
|
break;
|
1304
1583
|
case GGML_OP_ACC:
|
1305
1584
|
ggml_cann_acc(ctx, dst);
|
1306
1585
|
break;
|
1307
1586
|
case GGML_OP_MUL:
|
1308
|
-
|
1587
|
+
ggml_cann_binary_op<aclnn_mul>(ctx, dst);
|
1309
1588
|
break;
|
1310
1589
|
case GGML_OP_DIV:
|
1311
|
-
|
1590
|
+
ggml_cann_binary_op<aclnn_div>(ctx, dst);
|
1312
1591
|
break;
|
1313
1592
|
case GGML_OP_UNARY:
|
1314
1593
|
switch (ggml_get_unary_op(dst)) {
|
1594
|
+
case GGML_UNARY_OP_ABS:
|
1595
|
+
GGML_CANN_CALL_UNARY_OP(Abs);
|
1596
|
+
break;
|
1597
|
+
case GGML_UNARY_OP_NEG:
|
1598
|
+
GGML_CANN_CALL_UNARY_OP(Neg);
|
1599
|
+
break;
|
1315
1600
|
case GGML_UNARY_OP_GELU:
|
1316
|
-
|
1317
|
-
ctx, dst);
|
1601
|
+
GGML_CANN_CALL_UNARY_OP(Gelu);
|
1318
1602
|
break;
|
1319
1603
|
case GGML_UNARY_OP_SILU:
|
1320
|
-
|
1321
|
-
ctx, dst);
|
1322
|
-
break;
|
1323
|
-
// TODO: Use faster gelu??
|
1324
|
-
case GGML_UNARY_OP_GELU_QUICK:
|
1325
|
-
ggml_cann_activation<aclnnGeluGetWorkspaceSize, aclnnGelu>(
|
1326
|
-
ctx, dst);
|
1604
|
+
GGML_CANN_CALL_UNARY_OP(Silu);
|
1327
1605
|
break;
|
1606
|
+
case GGML_UNARY_OP_GELU_QUICK: {
|
1607
|
+
auto lambda = [](ggml_backend_cann_context& ctx,
|
1608
|
+
aclTensor* acl_src,
|
1609
|
+
aclTensor* acl_dst) {
|
1610
|
+
GGML_CANN_CALL_ACLNN_OP(ctx, GeluV2, acl_src, 0, acl_dst);
|
1611
|
+
};
|
1612
|
+
ggml_cann_unary_op(lambda, ctx, dst);
|
1613
|
+
} break;
|
1328
1614
|
case GGML_UNARY_OP_TANH:
|
1329
|
-
|
1330
|
-
ctx, dst);
|
1615
|
+
GGML_CANN_CALL_UNARY_OP(Tanh);
|
1331
1616
|
break;
|
1332
1617
|
case GGML_UNARY_OP_RELU:
|
1333
|
-
|
1334
|
-
|
1618
|
+
GGML_CANN_CALL_UNARY_OP(Relu);
|
1619
|
+
break;
|
1620
|
+
case GGML_UNARY_OP_SIGMOID:
|
1621
|
+
GGML_CANN_CALL_UNARY_OP(Sigmoid);
|
1335
1622
|
break;
|
1336
1623
|
case GGML_UNARY_OP_HARDSIGMOID:
|
1337
|
-
|
1338
|
-
aclnnHardsigmoid>(ctx, dst);
|
1624
|
+
GGML_CANN_CALL_UNARY_OP(Hardsigmoid);
|
1339
1625
|
break;
|
1340
1626
|
case GGML_UNARY_OP_HARDSWISH:
|
1341
|
-
|
1342
|
-
|
1627
|
+
GGML_CANN_CALL_UNARY_OP(Hardswish);
|
1628
|
+
break;
|
1629
|
+
case GGML_UNARY_OP_EXP:
|
1630
|
+
GGML_CANN_CALL_UNARY_OP(Exp);
|
1631
|
+
break;
|
1632
|
+
case GGML_UNARY_OP_ELU:
|
1633
|
+
ggml_cann_elu(ctx, dst);
|
1634
|
+
break;
|
1635
|
+
case GGML_UNARY_OP_SGN:
|
1636
|
+
GGML_CANN_CALL_UNARY_OP(Sign);
|
1637
|
+
break;
|
1638
|
+
case GGML_UNARY_OP_STEP:
|
1639
|
+
ggml_cann_step(ctx, dst);
|
1343
1640
|
break;
|
1344
1641
|
default:
|
1345
1642
|
return false;
|
@@ -1376,12 +1673,18 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
1376
1673
|
ggml_cann_mul_mat(ctx, dst);
|
1377
1674
|
break;
|
1378
1675
|
case GGML_OP_MUL_MAT_ID:
|
1379
|
-
|
1676
|
+
ggml_cann_mul_mat_id(ctx, dst);
|
1677
|
+
break;
|
1380
1678
|
case GGML_OP_SCALE:
|
1381
1679
|
ggml_cann_scale(ctx, dst);
|
1382
1680
|
break;
|
1383
1681
|
case GGML_OP_SQR:
|
1384
|
-
|
1682
|
+
GGML_ASSERT(dst->src[1] == nullptr);
|
1683
|
+
dst->src[1] = dst->src[0];
|
1684
|
+
ggml_cann_binary_op<aclnn_mul>(ctx, dst);
|
1685
|
+
break;
|
1686
|
+
case GGML_OP_SQRT:
|
1687
|
+
GGML_CANN_CALL_UNARY_OP(Sqrt);
|
1385
1688
|
break;
|
1386
1689
|
case GGML_OP_CLAMP:
|
1387
1690
|
ggml_cann_clamp(ctx, dst);
|
@@ -1413,12 +1716,42 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
1413
1716
|
case GGML_OP_POOL_2D:
|
1414
1717
|
ggml_cann_pool2d(ctx, dst);
|
1415
1718
|
break;
|
1719
|
+
case GGML_OP_SUM:
|
1720
|
+
ggml_cann_sum(ctx, dst);
|
1721
|
+
break;
|
1416
1722
|
case GGML_OP_SUM_ROWS:
|
1417
1723
|
ggml_cann_sum_rows(ctx, dst);
|
1418
1724
|
break;
|
1419
1725
|
case GGML_OP_ARGSORT:
|
1420
1726
|
ggml_cann_argsort(ctx, dst);
|
1421
1727
|
break;
|
1728
|
+
case GGML_OP_ARGMAX:
|
1729
|
+
ggml_cann_argmax(ctx, dst);
|
1730
|
+
break;
|
1731
|
+
case GGML_OP_COS:
|
1732
|
+
ggml_cann_unary_op<aclnn_cos>(ctx, dst);
|
1733
|
+
break;
|
1734
|
+
case GGML_OP_SIN:
|
1735
|
+
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
1736
|
+
break;
|
1737
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
1738
|
+
ggml_cann_conv_transpose_1d(ctx, dst);
|
1739
|
+
break;
|
1740
|
+
case GGML_OP_LOG:
|
1741
|
+
GGML_CANN_CALL_UNARY_OP(Log);
|
1742
|
+
break;
|
1743
|
+
case GGML_OP_MEAN:
|
1744
|
+
ggml_cann_mean(ctx, dst);
|
1745
|
+
break;
|
1746
|
+
case GGML_OP_PAD_REFLECT_1D:
|
1747
|
+
ggml_cann_pad_reflect_1d(ctx, dst);
|
1748
|
+
break;
|
1749
|
+
case GGML_OP_COUNT_EQUAL:
|
1750
|
+
ggml_cann_count_equal(ctx, dst);
|
1751
|
+
break;
|
1752
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
1753
|
+
ggml_cann_flash_attn_ext(ctx, dst);
|
1754
|
+
break;
|
1422
1755
|
default:
|
1423
1756
|
return false;
|
1424
1757
|
}
|
@@ -1457,21 +1790,15 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|
1457
1790
|
ACL_CHECK(aclrtSynchronizeDevice());
|
1458
1791
|
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
|
1459
1792
|
|
1460
|
-
// finalize when last backend freed.
|
1461
|
-
if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) {
|
1462
|
-
ACL_CHECK(aclFinalize());
|
1463
|
-
}
|
1464
|
-
|
1465
1793
|
delete cann_ctx;
|
1466
1794
|
delete backend;
|
1467
1795
|
}
|
1468
1796
|
|
1797
|
+
|
1469
1798
|
/**
|
1470
1799
|
* @brief Sets tensor data asynchronously in the CANN backend.
|
1471
1800
|
*
|
1472
|
-
* This function asynchronously sets tensor data in the CANN backend.
|
1473
|
-
* on the tensor type, it may perform data transformations before copying data
|
1474
|
-
* to the device.
|
1801
|
+
* This function asynchronously sets tensor data in the CANN backend.
|
1475
1802
|
*
|
1476
1803
|
* @param backend Pointer to the CANN backend structure.
|
1477
1804
|
* @param tensor Pointer to the tensor structure to set data for.
|
@@ -1486,23 +1813,28 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
|
1486
1813
|
size_t size) {
|
1487
1814
|
ggml_backend_cann_context *cann_ctx =
|
1488
1815
|
(ggml_backend_cann_context *)backend->context;
|
1816
|
+
ggml_backend_buffer_t buf =
|
1817
|
+
tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
1489
1818
|
|
1490
|
-
|
1491
|
-
|
1492
|
-
|
1493
|
-
cann_ctx->stream()));
|
1494
|
-
} else {
|
1495
|
-
void *transform_buffer = malloc(size);
|
1496
|
-
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
1819
|
+
GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) &&
|
1820
|
+
"unsupported buffer type");
|
1821
|
+
GGML_ASSERT(!ggml_is_quantized(tensor->type));
|
1497
1822
|
|
1498
|
-
|
1499
|
-
|
1500
|
-
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
|
1501
|
-
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
1502
|
-
free(transform_buffer);
|
1503
|
-
}
|
1823
|
+
ggml_cann_async_memcpy(cann_ctx, (char *)tensor->data + offset, data, size,
|
1824
|
+
ACL_MEMCPY_HOST_TO_DEVICE);
|
1504
1825
|
}
|
1505
1826
|
|
1827
|
+
/**
|
1828
|
+
* @brief Gets tensor data asynchronously in the CANN backend.
|
1829
|
+
*
|
1830
|
+
* This function asynchronously gets tensor data in the CANN backend.
|
1831
|
+
*
|
1832
|
+
* @param backend Pointer to the CANN backend structure.
|
1833
|
+
* @param tensor Pointer to the tensor structure to get data from.
|
1834
|
+
* @param data Pointer to the host data to copy from the tensor.
|
1835
|
+
* @param offset Offset in bytes within the host data.
|
1836
|
+
* @param size Size of the data to copy in bytes.
|
1837
|
+
*/
|
1506
1838
|
static void ggml_backend_cann_get_tensor_async(
|
1507
1839
|
ggml_backend_t backend, const ggml_tensor *tensor, void *data,
|
1508
1840
|
size_t offset, size_t size) {
|
@@ -1513,20 +1845,11 @@ static void ggml_backend_cann_get_tensor_async(
|
|
1513
1845
|
|
1514
1846
|
GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) &&
|
1515
1847
|
"unsupported buffer type");
|
1848
|
+
GGML_ASSERT(!ggml_is_quantized(tensor->type));
|
1849
|
+
|
1850
|
+
ggml_cann_async_memcpy(cann_ctx, data, (char *)tensor->data + offset, size,
|
1851
|
+
ACL_MEMCPY_DEVICE_TO_HOST);
|
1516
1852
|
|
1517
|
-
if (!need_transform(tensor->type)) {
|
1518
|
-
ACL_CHECK(aclrtMemcpyAsync(data, size, (char *)tensor->data + offset,
|
1519
|
-
size, ACL_MEMCPY_DEVICE_TO_HOST,
|
1520
|
-
cann_ctx->stream()));
|
1521
|
-
} else {
|
1522
|
-
void *transform_buffer = malloc(size);
|
1523
|
-
ACL_CHECK(aclrtMemcpyAsync(
|
1524
|
-
transform_buffer, size, (char *)tensor->data + offset, size,
|
1525
|
-
ACL_MEMCPY_DEVICE_TO_HOST, cann_ctx->stream()));
|
1526
|
-
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
1527
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer, data);
|
1528
|
-
free(transform_buffer);
|
1529
|
-
}
|
1530
1853
|
}
|
1531
1854
|
|
1532
1855
|
/**
|
@@ -1586,6 +1909,8 @@ static bool ggml_backend_cann_cpy_tensor_async(
|
|
1586
1909
|
ggml_cann_set_device(cann_ctx_src->device);
|
1587
1910
|
ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_dst->device, 0));
|
1588
1911
|
|
1912
|
+
// wait for task_queue empty to keep task order.
|
1913
|
+
cann_ctx_src->task_queue.wait();
|
1589
1914
|
ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size,
|
1590
1915
|
ACL_MEMCPY_DEVICE_TO_DEVICE,
|
1591
1916
|
cann_ctx_src->stream()));
|
@@ -1613,9 +1938,8 @@ static bool ggml_backend_cann_cpy_tensor_async(
|
|
1613
1938
|
static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
1614
1939
|
ggml_backend_cann_context* cann_ctx =
|
1615
1940
|
(ggml_backend_cann_context*)backend->context;
|
1616
|
-
|
1941
|
+
cann_ctx->task_queue.wait();
|
1617
1942
|
ggml_cann_set_device(cann_ctx->device);
|
1618
|
-
|
1619
1943
|
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
1620
1944
|
}
|
1621
1945
|
|
@@ -1674,58 +1998,86 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1674
1998
|
switch (op->op) {
|
1675
1999
|
case GGML_OP_UNARY:
|
1676
2000
|
switch (ggml_get_unary_op(op)) {
|
2001
|
+
case GGML_UNARY_OP_ABS:
|
2002
|
+
case GGML_UNARY_OP_NEG:
|
1677
2003
|
case GGML_UNARY_OP_GELU:
|
1678
2004
|
case GGML_UNARY_OP_SILU:
|
1679
2005
|
case GGML_UNARY_OP_RELU:
|
2006
|
+
case GGML_UNARY_OP_SIGMOID:
|
1680
2007
|
case GGML_UNARY_OP_HARDSIGMOID:
|
1681
2008
|
case GGML_UNARY_OP_HARDSWISH:
|
1682
2009
|
case GGML_UNARY_OP_GELU_QUICK:
|
1683
2010
|
case GGML_UNARY_OP_TANH:
|
2011
|
+
case GGML_UNARY_OP_EXP:
|
2012
|
+
case GGML_UNARY_OP_ELU:
|
2013
|
+
case GGML_UNARY_OP_SGN:
|
2014
|
+
case GGML_UNARY_OP_STEP:
|
1684
2015
|
return true;
|
1685
2016
|
default:
|
1686
2017
|
return false;
|
1687
2018
|
}
|
1688
2019
|
case GGML_OP_MUL_MAT: {
|
1689
2020
|
switch (op->src[0]->type) {
|
1690
|
-
case GGML_TYPE_Q8_0:
|
1691
|
-
// Current groupsize should not be greater than k-1 in
|
1692
|
-
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
|
1693
|
-
if (op->src[0]->ne[0] <= QK8_0) {
|
1694
|
-
return false;
|
1695
|
-
}
|
1696
2021
|
case GGML_TYPE_F16:
|
1697
2022
|
case GGML_TYPE_F32:
|
1698
|
-
case GGML_TYPE_Q4_0:
|
1699
2023
|
return true;
|
2024
|
+
case GGML_TYPE_Q8_0:
|
2025
|
+
case GGML_TYPE_Q4_0:
|
2026
|
+
#ifdef ASCEND_310P
|
2027
|
+
// Q4 && Q8 per group is not suppor on 310p device
|
2028
|
+
return false;
|
2029
|
+
#endif
|
2030
|
+
// only support contiguous for quantized types.
|
2031
|
+
return ggml_is_contiguous(op->src[0]) &&
|
2032
|
+
ggml_is_contiguous(op->src[1]);
|
1700
2033
|
default:
|
1701
2034
|
return false;
|
1702
2035
|
}
|
1703
2036
|
}
|
1704
2037
|
case GGML_OP_MUL_MAT_ID:
|
1705
|
-
return false;
|
1706
|
-
// embedding
|
1707
|
-
case GGML_OP_GET_ROWS: {
|
1708
2038
|
switch (op->src[0]->type) {
|
1709
|
-
case GGML_TYPE_F32:
|
1710
2039
|
case GGML_TYPE_F16:
|
1711
|
-
case
|
1712
|
-
case GGML_TYPE_Q8_0:
|
2040
|
+
case GGML_TYPE_F32:
|
1713
2041
|
return true;
|
2042
|
+
case GGML_TYPE_Q8_0:
|
2043
|
+
case GGML_TYPE_Q4_0:
|
2044
|
+
#ifdef ASCEND_310P
|
2045
|
+
// Q4 && Q8 per group is not suppor on 310p device
|
2046
|
+
return false;
|
2047
|
+
#endif
|
2048
|
+
// only support contiguous for quantized types.
|
2049
|
+
return ggml_is_contiguous(op->src[0]) &&
|
2050
|
+
ggml_is_contiguous(op->src[1]);
|
1714
2051
|
default:
|
1715
2052
|
return false;
|
1716
2053
|
}
|
1717
|
-
|
1718
|
-
case
|
1719
|
-
switch (op->type) {
|
2054
|
+
// embedding
|
2055
|
+
case GGML_OP_GET_ROWS: {
|
2056
|
+
switch (op->src[0]->type) {
|
1720
2057
|
case GGML_TYPE_F32:
|
1721
2058
|
case GGML_TYPE_F16:
|
1722
2059
|
case GGML_TYPE_Q8_0:
|
1723
|
-
case GGML_TYPE_Q4_0:
|
1724
2060
|
return true;
|
1725
2061
|
default:
|
1726
2062
|
return false;
|
1727
2063
|
}
|
1728
|
-
}
|
2064
|
+
} break;
|
2065
|
+
case GGML_OP_CPY: {
|
2066
|
+
ggml_tensor *src = op->src[0];
|
2067
|
+
if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
|
2068
|
+
(src->type != GGML_TYPE_F32 &&
|
2069
|
+
src->type != GGML_TYPE_F16)) {
|
2070
|
+
// only support F32 and F16.
|
2071
|
+
return false;
|
2072
|
+
}
|
2073
|
+
|
2074
|
+
if (!ggml_are_same_shape(op, src) && !ggml_is_contiguous(op)) {
|
2075
|
+
// unsupport dst is not contiguous.
|
2076
|
+
return false;
|
2077
|
+
}
|
2078
|
+
|
2079
|
+
return true;
|
2080
|
+
} break;
|
1729
2081
|
case GGML_OP_CONT: {
|
1730
2082
|
// TODO: support GGML_TYPE_BF16
|
1731
2083
|
switch (op->src[0]->type) {
|
@@ -1738,13 +2090,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1738
2090
|
}
|
1739
2091
|
case GGML_OP_ROPE: {
|
1740
2092
|
// TODO: with ops-test v == 1
|
1741
|
-
float
|
2093
|
+
float ext_factor = 0.0f;
|
2094
|
+
memcpy(&ext_factor, (const float *) op->op_params + 7, sizeof(float));
|
1742
2095
|
// TODO: n_dims <= ne0
|
1743
2096
|
if (op->src[0]->ne[0] != op->op_params[1]) {
|
1744
2097
|
return false;
|
1745
2098
|
}
|
1746
2099
|
// TODO: ext_factor != 0
|
1747
|
-
if (
|
2100
|
+
if (ext_factor != 0) {
|
1748
2101
|
return false;
|
1749
2102
|
}
|
1750
2103
|
|
@@ -1756,6 +2109,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1756
2109
|
return false;
|
1757
2110
|
}
|
1758
2111
|
|
2112
|
+
if(!ggml_is_contiguous(op->src[0])){
|
2113
|
+
return false;
|
2114
|
+
}
|
1759
2115
|
return true;
|
1760
2116
|
}
|
1761
2117
|
case GGML_OP_UPSCALE: {
|
@@ -1764,11 +2120,31 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1764
2120
|
if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
|
1765
2121
|
return false;
|
1766
2122
|
}
|
2123
|
+
if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
|
2124
|
+
return false;
|
2125
|
+
}
|
1767
2126
|
return true;
|
1768
2127
|
}
|
2128
|
+
case GGML_OP_POOL_2D: {
|
2129
|
+
const int32_t * opts = (const int32_t *) op->op_params;
|
2130
|
+
#ifdef ASCEND_310P
|
2131
|
+
enum ggml_op_pool opt = static_cast<ggml_op_pool>(opts[0]);
|
2132
|
+
if(opt == GGML_OP_POOL_MAX){
|
2133
|
+
return false;
|
2134
|
+
}
|
2135
|
+
#endif
|
2136
|
+
const int k0 = opts[1];
|
2137
|
+
const int k1 = opts[2];
|
2138
|
+
const int p0 = opts[5];
|
2139
|
+
const int p1 = opts[6];
|
2140
|
+
// value of paddingH should be at most half of kernelH
|
2141
|
+
// value of paddingW should be at most half of kernelW
|
2142
|
+
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
|
2143
|
+
}
|
2144
|
+
case GGML_OP_SUM:
|
2145
|
+
case GGML_OP_DUP:
|
1769
2146
|
case GGML_OP_IM2COL:
|
1770
2147
|
case GGML_OP_CONCAT:
|
1771
|
-
case GGML_OP_DUP:
|
1772
2148
|
case GGML_OP_REPEAT:
|
1773
2149
|
case GGML_OP_NONE:
|
1774
2150
|
case GGML_OP_RESHAPE:
|
@@ -1777,15 +2153,17 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1777
2153
|
case GGML_OP_TRANSPOSE:
|
1778
2154
|
case GGML_OP_NORM:
|
1779
2155
|
case GGML_OP_ADD:
|
2156
|
+
case GGML_OP_ADD1:
|
2157
|
+
case GGML_OP_SUB:
|
1780
2158
|
case GGML_OP_MUL:
|
1781
2159
|
case GGML_OP_DIV:
|
1782
2160
|
case GGML_OP_RMS_NORM:
|
1783
2161
|
case GGML_OP_SCALE:
|
1784
2162
|
case GGML_OP_SQR:
|
2163
|
+
case GGML_OP_SQRT:
|
1785
2164
|
case GGML_OP_CLAMP:
|
1786
2165
|
case GGML_OP_DIAG_MASK_INF:
|
1787
2166
|
case GGML_OP_SOFT_MAX:
|
1788
|
-
case GGML_OP_POOL_2D:
|
1789
2167
|
case GGML_OP_SUM_ROWS:
|
1790
2168
|
case GGML_OP_ARGSORT:
|
1791
2169
|
case GGML_OP_ACC:
|
@@ -1794,7 +2172,47 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1794
2172
|
case GGML_OP_ARANGE:
|
1795
2173
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
1796
2174
|
case GGML_OP_LEAKY_RELU:
|
2175
|
+
case GGML_OP_ARGMAX:
|
2176
|
+
case GGML_OP_COS:
|
2177
|
+
case GGML_OP_SIN:
|
2178
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
2179
|
+
case GGML_OP_LOG:
|
2180
|
+
case GGML_OP_MEAN:
|
2181
|
+
case GGML_OP_PAD_REFLECT_1D:
|
2182
|
+
case GGML_OP_COUNT_EQUAL:
|
2183
|
+
return true;
|
2184
|
+
case GGML_OP_FLASH_ATTN_EXT:{
|
2185
|
+
// derived from [ggml-cuda.cu]
|
2186
|
+
if(op->src[1]->type != GGML_TYPE_F16 || op->src[2]->type != GGML_TYPE_F16){
|
2187
|
+
return false;
|
2188
|
+
}
|
2189
|
+
if(op->src[1]->type != GGML_TYPE_F16 && op->src[1]->type != GGML_TYPE_F32 && op->src[1]->type != GGML_TYPE_BF16){
|
2190
|
+
return false;
|
2191
|
+
}
|
2192
|
+
if(op->type != GGML_TYPE_F16 && op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_BF16){
|
2193
|
+
return false;
|
2194
|
+
}
|
2195
|
+
if (op->src[1]->ne[0] != op->src[2]->ne[0]) {
|
2196
|
+
// different head sizes of K and V are not supported yet
|
2197
|
+
return false;
|
2198
|
+
}
|
2199
|
+
if (op->src[0]->ne[0] == 192) {
|
2200
|
+
return false;
|
2201
|
+
}
|
2202
|
+
if (op->src[0]->ne[0] == 576) {
|
2203
|
+
// DeepSeek MLA
|
2204
|
+
return false;
|
2205
|
+
}
|
2206
|
+
if (op->src[0]->ne[3] != 1) {
|
2207
|
+
return false;
|
2208
|
+
}
|
2209
|
+
float logitSoftcap = 0.0f;
|
2210
|
+
memcpy(&logitSoftcap, (float*)op->op_params + 2, sizeof(float));
|
2211
|
+
if(logitSoftcap != 0.0f) {
|
2212
|
+
return false;
|
2213
|
+
}
|
1797
2214
|
return true;
|
2215
|
+
}
|
1798
2216
|
default:
|
1799
2217
|
return false;
|
1800
2218
|
}
|