whispercpp 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +7 -3
- data/README.md +161 -43
- data/Rakefile +45 -13
- data/ext/.gitignore +4 -8
- data/ext/dependencies.rb +73 -0
- data/ext/extconf.rb +21 -198
- data/ext/options.rb +85 -0
- data/ext/ruby_whisper.c +177 -0
- data/ext/ruby_whisper.h +17 -2
- data/ext/ruby_whisper_context.c +672 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1303 -0
- data/ext/ruby_whisper_segment.c +220 -0
- data/ext/ruby_whisper_transcribe.cpp +93 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +255 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +133 -0
- data/ext/sources/examples/addon.node/addon.cpp +557 -0
- data/ext/sources/examples/addon.node/index.js +57 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +176 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1295 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +800 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +175 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +469 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +226 -0
- data/ext/sources/examples/server/CMakeLists.txt +15 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1238 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +435 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +43 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1914 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +464 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +843 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +147 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +685 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +59 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2845 -0
- data/ext/sources/examples/talk-llama/llama-context.h +297 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1693 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +710 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +103 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +207 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +44 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +439 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +59 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +116 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1163 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +282 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +15114 -0
- data/ext/sources/examples/talk-llama/llama-model.h +452 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +1049 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3377 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +132 -0
- data/ext/sources/examples/talk-llama/llama.cpp +358 -0
- data/ext/sources/examples/talk-llama/llama.h +1484 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +810 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +854 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +149 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +251 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +435 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +50 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-backend.h +10 -8
- data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
- data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +11 -1
- data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
- data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
- data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
- data/ext/{ggml → sources/ggml}/include/ggml.h +325 -269
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +404 -0
- data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
- data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +92 -53
- data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +69 -34
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +75 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
- data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +140 -1
- data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +588 -146
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
- data/ext/{ggml → sources/ggml}/src/ggml-common.h +16 -8
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +597 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +3 -2
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- data/ext/{ggml/src/ggml-cpu/cpu-feats-x86.cpp → sources/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp} +5 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +3285 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +73 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +172 -41
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3551 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +78 -25
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.cpp → sources/ggml/src/ggml-cpu/hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3594 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +9786 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +118 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-quants.h → sources/ggml/src/ggml-cpu/quants.h} +26 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +1184 -0
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.cpp → sources/ggml/src/ggml-cpu/traits.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +345 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +1027 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +851 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +752 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +31 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +638 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3647 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +26 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +378 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +66 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-impl.h +147 -158
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +121 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +649 -0
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2504 -1108
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +2102 -1463
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +110 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +6494 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
- data/ext/{ggml → sources/ggml}/src/ggml-quants.c +120 -128
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +494 -84
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +344 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +561 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +56 -70
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +8 -12
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +575 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +839 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +823 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +188 -67
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1120 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +84 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +102 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +212 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1197 -1295
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +60 -81
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +482 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +111 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +472 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +38 -28
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +15 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +6 -11
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +289 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +200 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3822 -1335
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +61 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +203 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/{ggml → sources/ggml}/src/ggml.c +918 -1782
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +1351 -0
- data/ext/{include → sources/include}/whisper.h +70 -2
- data/ext/sources/src/CMakeLists.txt +145 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +36 -10
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +29 -3
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{src → sources/src}/whisper.cpp +1966 -386
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +39 -5
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +202 -126
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +510 -0
- data/test/helper.rb +24 -0
- data/{tests → test}/test_callback.rb +45 -3
- data/{tests → test}/test_error.rb +2 -2
- data/{tests → test}/test_model.rb +47 -0
- data/test/test_package.rb +51 -0
- data/test/test_params.rb +297 -0
- data/test/test_segment.rb +146 -0
- data/test/test_vad.rb +19 -0
- data/test/test_vad_params.rb +103 -0
- data/{tests → test}/test_whisper.rb +106 -36
- data/whispercpp.gemspec +5 -5
- metadata +837 -134
- data/ext/cpu.mk +0 -9
- data/ext/examples/dr_wav.h +0 -8815
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -10835
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
- data/ext/ggml/src/ggml-sycl/convert.cpp +0 -547
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
- data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +0 -1015
- data/ext/ggml/src/ggml-sycl/norm.cpp +0 -378
- data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
- data/ext/metal-embed.mk +0 -17
- data/ext/metal.mk +0 -6
- data/ext/ruby_whisper.cpp +0 -1909
- data/ext/scripts/get-flags.mk +0 -38
- data/lib/whisper.rb +0 -2
- data/tests/helper.rb +0 -7
- data/tests/test_package.rb +0 -31
- data/tests/test_params.rb +0 -160
- data/tests/test_segment.rb +0 -83
- /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
- /data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.h → sources/ggml/src/ggml-cpu/hbm.h} +0 -0
- /data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.h → sources/ggml/src/ggml-cpu/traits.h} +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
- /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
@@ -198,7 +198,7 @@
|
|
198
198
|
|
199
199
|
#ifndef __GNUC__
|
200
200
|
# define GGML_ATTRIBUTE_FORMAT(...)
|
201
|
-
#elif defined(__MINGW32__)
|
201
|
+
#elif defined(__MINGW32__) && !defined(__clang__)
|
202
202
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
203
203
|
#else
|
204
204
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
@@ -241,12 +241,6 @@
|
|
241
241
|
#define GGML_ROPE_TYPE_MROPE 8
|
242
242
|
#define GGML_ROPE_TYPE_VISION 24
|
243
243
|
|
244
|
-
#define GGUF_MAGIC "GGUF"
|
245
|
-
|
246
|
-
#define GGUF_VERSION 3
|
247
|
-
|
248
|
-
#define GGUF_DEFAULT_ALIGNMENT 32
|
249
|
-
|
250
244
|
#define GGML_UNUSED(x) (void)(x)
|
251
245
|
|
252
246
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
@@ -399,14 +393,8 @@ extern "C" {
|
|
399
393
|
|
400
394
|
// precision
|
401
395
|
enum ggml_prec {
|
402
|
-
GGML_PREC_DEFAULT,
|
403
|
-
GGML_PREC_F32,
|
404
|
-
};
|
405
|
-
|
406
|
-
enum ggml_backend_type {
|
407
|
-
GGML_BACKEND_TYPE_CPU = 0,
|
408
|
-
GGML_BACKEND_TYPE_GPU = 10,
|
409
|
-
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
|
396
|
+
GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default
|
397
|
+
GGML_PREC_F32 = 10,
|
410
398
|
};
|
411
399
|
|
412
400
|
// model file types
|
@@ -466,6 +454,7 @@ extern "C" {
|
|
466
454
|
GGML_OP_RMS_NORM,
|
467
455
|
GGML_OP_RMS_NORM_BACK,
|
468
456
|
GGML_OP_GROUP_NORM,
|
457
|
+
GGML_OP_L2_NORM,
|
469
458
|
|
470
459
|
GGML_OP_MUL_MAT,
|
471
460
|
GGML_OP_MUL_MAT_ID,
|
@@ -481,6 +470,7 @@ extern "C" {
|
|
481
470
|
GGML_OP_TRANSPOSE,
|
482
471
|
GGML_OP_GET_ROWS,
|
483
472
|
GGML_OP_GET_ROWS_BACK,
|
473
|
+
GGML_OP_SET_ROWS,
|
484
474
|
GGML_OP_DIAG,
|
485
475
|
GGML_OP_DIAG_MASK_INF,
|
486
476
|
GGML_OP_DIAG_MASK_ZERO,
|
@@ -492,6 +482,8 @@ extern "C" {
|
|
492
482
|
GGML_OP_CONV_TRANSPOSE_1D,
|
493
483
|
GGML_OP_IM2COL,
|
494
484
|
GGML_OP_IM2COL_BACK,
|
485
|
+
GGML_OP_CONV_2D,
|
486
|
+
GGML_OP_CONV_2D_DW,
|
495
487
|
GGML_OP_CONV_TRANSPOSE_2D,
|
496
488
|
GGML_OP_POOL_1D,
|
497
489
|
GGML_OP_POOL_2D,
|
@@ -499,6 +491,7 @@ extern "C" {
|
|
499
491
|
GGML_OP_UPSCALE, // nearest interpolate
|
500
492
|
GGML_OP_PAD,
|
501
493
|
GGML_OP_PAD_REFLECT_1D,
|
494
|
+
GGML_OP_ROLL,
|
502
495
|
GGML_OP_ARANGE,
|
503
496
|
GGML_OP_TIMESTEP_EMBEDDING,
|
504
497
|
GGML_OP_ARGSORT,
|
@@ -513,24 +506,23 @@ extern "C" {
|
|
513
506
|
GGML_OP_GET_REL_POS,
|
514
507
|
GGML_OP_ADD_REL_POS,
|
515
508
|
GGML_OP_RWKV_WKV6,
|
509
|
+
GGML_OP_GATED_LINEAR_ATTN,
|
510
|
+
GGML_OP_RWKV_WKV7,
|
516
511
|
|
517
512
|
GGML_OP_UNARY,
|
518
513
|
|
519
|
-
GGML_OP_MAP_UNARY,
|
520
|
-
GGML_OP_MAP_BINARY,
|
521
|
-
|
522
|
-
GGML_OP_MAP_CUSTOM1_F32,
|
523
|
-
GGML_OP_MAP_CUSTOM2_F32,
|
524
|
-
GGML_OP_MAP_CUSTOM3_F32,
|
525
|
-
|
526
514
|
GGML_OP_MAP_CUSTOM1,
|
527
515
|
GGML_OP_MAP_CUSTOM2,
|
528
516
|
GGML_OP_MAP_CUSTOM3,
|
529
517
|
|
518
|
+
GGML_OP_CUSTOM,
|
519
|
+
|
530
520
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
531
521
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
532
522
|
GGML_OP_OPT_STEP_ADAMW,
|
533
523
|
|
524
|
+
GGML_OP_GLU,
|
525
|
+
|
534
526
|
GGML_OP_COUNT,
|
535
527
|
};
|
536
528
|
|
@@ -549,10 +541,19 @@ extern "C" {
|
|
549
541
|
GGML_UNARY_OP_HARDSWISH,
|
550
542
|
GGML_UNARY_OP_HARDSIGMOID,
|
551
543
|
GGML_UNARY_OP_EXP,
|
544
|
+
GGML_UNARY_OP_GELU_ERF,
|
552
545
|
|
553
546
|
GGML_UNARY_OP_COUNT,
|
554
547
|
};
|
555
548
|
|
549
|
+
enum ggml_glu_op {
|
550
|
+
GGML_GLU_OP_REGLU,
|
551
|
+
GGML_GLU_OP_GEGLU,
|
552
|
+
GGML_GLU_OP_SWIGLU,
|
553
|
+
|
554
|
+
GGML_GLU_OP_COUNT,
|
555
|
+
};
|
556
|
+
|
556
557
|
enum ggml_object_type {
|
557
558
|
GGML_OBJECT_TYPE_TENSOR,
|
558
559
|
GGML_OBJECT_TYPE_GRAPH,
|
@@ -587,8 +588,6 @@ extern "C" {
|
|
587
588
|
struct ggml_tensor {
|
588
589
|
enum ggml_type type;
|
589
590
|
|
590
|
-
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
591
|
-
|
592
591
|
struct ggml_backend_buffer * buffer;
|
593
592
|
|
594
593
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
@@ -670,6 +669,7 @@ extern "C" {
|
|
670
669
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
671
670
|
|
672
671
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
672
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
673
673
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
674
674
|
|
675
675
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
@@ -688,11 +688,21 @@ extern "C" {
|
|
688
688
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
689
689
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
690
690
|
|
691
|
+
// returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
|
691
692
|
GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
692
693
|
GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
693
694
|
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
694
695
|
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
695
696
|
|
697
|
+
// returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
|
698
|
+
GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor);
|
699
|
+
|
700
|
+
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
701
|
+
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
702
|
+
|
703
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
704
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
705
|
+
|
696
706
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
697
707
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
698
708
|
|
@@ -764,6 +774,7 @@ extern "C" {
|
|
764
774
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
765
775
|
|
766
776
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
777
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
767
778
|
|
768
779
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
769
780
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
@@ -776,7 +787,7 @@ extern "C" {
|
|
776
787
|
// Tensor flags
|
777
788
|
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
778
789
|
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
779
|
-
GGML_API void ggml_set_param(struct
|
790
|
+
GGML_API void ggml_set_param(struct ggml_tensor * tensor);
|
780
791
|
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
781
792
|
|
782
793
|
//
|
@@ -942,11 +953,20 @@ extern "C" {
|
|
942
953
|
struct ggml_tensor * a,
|
943
954
|
struct ggml_tensor * b);
|
944
955
|
|
956
|
+
// repeat a to the specified shape
|
957
|
+
GGML_API struct ggml_tensor * ggml_repeat_4d(
|
958
|
+
struct ggml_context * ctx,
|
959
|
+
struct ggml_tensor * a,
|
960
|
+
int64_t ne0,
|
961
|
+
int64_t ne1,
|
962
|
+
int64_t ne2,
|
963
|
+
int64_t ne3);
|
964
|
+
|
945
965
|
// sums repetitions in a into shape of b
|
946
966
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
947
967
|
struct ggml_context * ctx,
|
948
968
|
struct ggml_tensor * a,
|
949
|
-
struct ggml_tensor * b);
|
969
|
+
struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
950
970
|
|
951
971
|
// concat a and b along dim
|
952
972
|
// used in stable-diffusion
|
@@ -1032,6 +1052,16 @@ extern "C" {
|
|
1032
1052
|
struct ggml_context * ctx,
|
1033
1053
|
struct ggml_tensor * a);
|
1034
1054
|
|
1055
|
+
// GELU using erf (error function) when possible
|
1056
|
+
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
1057
|
+
GGML_API struct ggml_tensor * ggml_gelu_erf(
|
1058
|
+
struct ggml_context * ctx,
|
1059
|
+
struct ggml_tensor * a);
|
1060
|
+
|
1061
|
+
GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
|
1062
|
+
struct ggml_context * ctx,
|
1063
|
+
struct ggml_tensor * a);
|
1064
|
+
|
1035
1065
|
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
1036
1066
|
struct ggml_context * ctx,
|
1037
1067
|
struct ggml_tensor * a);
|
@@ -1073,6 +1103,63 @@ extern "C" {
|
|
1073
1103
|
struct ggml_context * ctx,
|
1074
1104
|
struct ggml_tensor * a);
|
1075
1105
|
|
1106
|
+
// gated linear unit ops
|
1107
|
+
// A: n columns, r rows,
|
1108
|
+
// result is n / 2 columns, r rows,
|
1109
|
+
// expects gate in second half of row, unless swapped is true
|
1110
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
1111
|
+
struct ggml_context * ctx,
|
1112
|
+
struct ggml_tensor * a,
|
1113
|
+
enum ggml_glu_op op,
|
1114
|
+
bool swapped);
|
1115
|
+
|
1116
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
1117
|
+
struct ggml_context * ctx,
|
1118
|
+
struct ggml_tensor * a);
|
1119
|
+
|
1120
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
1121
|
+
struct ggml_context * ctx,
|
1122
|
+
struct ggml_tensor * a);
|
1123
|
+
|
1124
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
1125
|
+
struct ggml_context * ctx,
|
1126
|
+
struct ggml_tensor * a);
|
1127
|
+
|
1128
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
1129
|
+
struct ggml_context * ctx,
|
1130
|
+
struct ggml_tensor * a);
|
1131
|
+
|
1132
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
1133
|
+
struct ggml_context * ctx,
|
1134
|
+
struct ggml_tensor * a);
|
1135
|
+
|
1136
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
1137
|
+
struct ggml_context * ctx,
|
1138
|
+
struct ggml_tensor * a);
|
1139
|
+
|
1140
|
+
// A: n columns, r rows,
|
1141
|
+
// B: n columns, r rows,
|
1142
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
1143
|
+
struct ggml_context * ctx,
|
1144
|
+
struct ggml_tensor * a,
|
1145
|
+
struct ggml_tensor * b,
|
1146
|
+
enum ggml_glu_op op);
|
1147
|
+
|
1148
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
1149
|
+
struct ggml_context * ctx,
|
1150
|
+
struct ggml_tensor * a,
|
1151
|
+
struct ggml_tensor * b);
|
1152
|
+
|
1153
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
1154
|
+
struct ggml_context * ctx,
|
1155
|
+
struct ggml_tensor * a,
|
1156
|
+
struct ggml_tensor * b);
|
1157
|
+
|
1158
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
1159
|
+
struct ggml_context * ctx,
|
1160
|
+
struct ggml_tensor * a,
|
1161
|
+
struct ggml_tensor * b);
|
1162
|
+
|
1076
1163
|
// normalize along rows
|
1077
1164
|
GGML_API struct ggml_tensor * ggml_norm(
|
1078
1165
|
struct ggml_context * ctx,
|
@@ -1108,6 +1195,18 @@ extern "C" {
|
|
1108
1195
|
int n_groups,
|
1109
1196
|
float eps);
|
1110
1197
|
|
1198
|
+
// l2 normalize along rows
|
1199
|
+
// used in rwkv v7
|
1200
|
+
GGML_API struct ggml_tensor * ggml_l2_norm(
|
1201
|
+
struct ggml_context * ctx,
|
1202
|
+
struct ggml_tensor * a,
|
1203
|
+
float eps);
|
1204
|
+
|
1205
|
+
GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
|
1206
|
+
struct ggml_context * ctx,
|
1207
|
+
struct ggml_tensor * a,
|
1208
|
+
float eps);
|
1209
|
+
|
1111
1210
|
// a - x
|
1112
1211
|
// b - dy
|
1113
1212
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
@@ -1350,6 +1449,23 @@ extern "C" {
|
|
1350
1449
|
struct ggml_tensor * b, // row indices
|
1351
1450
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
1352
1451
|
|
1452
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
1453
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
1454
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
1455
|
+
//
|
1456
|
+
// undefined behavior if destination rows overlap
|
1457
|
+
//
|
1458
|
+
// broadcast:
|
1459
|
+
// ne2 % ne11 == 0
|
1460
|
+
// ne3 % ne12 == 0
|
1461
|
+
//
|
1462
|
+
// return view(a)
|
1463
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
1464
|
+
struct ggml_context * ctx,
|
1465
|
+
struct ggml_tensor * a, // destination
|
1466
|
+
struct ggml_tensor * b, // source
|
1467
|
+
struct ggml_tensor * c); // row indices
|
1468
|
+
|
1353
1469
|
GGML_API struct ggml_tensor * ggml_diag(
|
1354
1470
|
struct ggml_context * ctx,
|
1355
1471
|
struct ggml_tensor * a);
|
@@ -1397,16 +1513,20 @@ extern "C" {
|
|
1397
1513
|
float scale,
|
1398
1514
|
float max_bias);
|
1399
1515
|
|
1400
|
-
GGML_API struct ggml_tensor *
|
1516
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
|
1401
1517
|
struct ggml_context * ctx,
|
1402
1518
|
struct ggml_tensor * a,
|
1403
|
-
struct ggml_tensor * b
|
1519
|
+
struct ggml_tensor * b,
|
1520
|
+
float scale,
|
1521
|
+
float max_bias);
|
1404
1522
|
|
1405
1523
|
// in-place, returns view(a)
|
1406
|
-
GGML_API struct ggml_tensor *
|
1524
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
|
1407
1525
|
struct ggml_context * ctx,
|
1408
1526
|
struct ggml_tensor * a,
|
1409
|
-
struct ggml_tensor * b
|
1527
|
+
struct ggml_tensor * b,
|
1528
|
+
float scale,
|
1529
|
+
float max_bias);
|
1410
1530
|
|
1411
1531
|
// rotary position embedding
|
1412
1532
|
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
|
@@ -1513,7 +1633,7 @@ extern "C" {
|
|
1513
1633
|
|
1514
1634
|
// rotary position embedding backward, i.e compute dx from dy
|
1515
1635
|
// a - dy
|
1516
|
-
GGML_API struct ggml_tensor *
|
1636
|
+
GGML_API struct ggml_tensor * ggml_rope_ext_back(
|
1517
1637
|
struct ggml_context * ctx,
|
1518
1638
|
struct ggml_tensor * a, // gradients of ggml_rope result
|
1519
1639
|
struct ggml_tensor * b, // positions
|
@@ -1528,6 +1648,23 @@ extern "C" {
|
|
1528
1648
|
float beta_fast,
|
1529
1649
|
float beta_slow);
|
1530
1650
|
|
1651
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_back(
|
1652
|
+
struct ggml_context * ctx,
|
1653
|
+
struct ggml_tensor * a,
|
1654
|
+
struct ggml_tensor * b,
|
1655
|
+
struct ggml_tensor * c,
|
1656
|
+
int n_dims,
|
1657
|
+
int sections[4],
|
1658
|
+
int mode,
|
1659
|
+
int n_ctx_orig,
|
1660
|
+
float freq_base,
|
1661
|
+
float freq_scale,
|
1662
|
+
float ext_factor,
|
1663
|
+
float attn_factor,
|
1664
|
+
float beta_fast,
|
1665
|
+
float beta_slow);
|
1666
|
+
|
1667
|
+
|
1531
1668
|
// clamp
|
1532
1669
|
// in-place, returns view(a)
|
1533
1670
|
GGML_API struct ggml_tensor * ggml_clamp(
|
@@ -1564,17 +1701,6 @@ extern "C" {
|
|
1564
1701
|
int d1, // dilation dimension 1
|
1565
1702
|
bool is_2D);
|
1566
1703
|
|
1567
|
-
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
1568
|
-
struct ggml_context * ctx,
|
1569
|
-
struct ggml_tensor * a, // convolution kernel
|
1570
|
-
struct ggml_tensor * b, // data
|
1571
|
-
int s0, // stride dimension 0
|
1572
|
-
int s1, // stride dimension 1
|
1573
|
-
int p0, // padding dimension 0
|
1574
|
-
int p1, // padding dimension 1
|
1575
|
-
int d0, // dilation dimension 0
|
1576
|
-
int d1); // dilation dimension 1
|
1577
|
-
|
1578
1704
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
1579
1705
|
struct ggml_context * ctx,
|
1580
1706
|
struct ggml_tensor * a, // convolution kernel
|
@@ -1592,6 +1718,23 @@ extern "C" {
|
|
1592
1718
|
int s, // stride
|
1593
1719
|
int d); // dilation
|
1594
1720
|
|
1721
|
+
// depthwise
|
1722
|
+
// TODO: this is very likely wrong for some cases! - needs more testing
|
1723
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw(
|
1724
|
+
struct ggml_context * ctx,
|
1725
|
+
struct ggml_tensor * a, // convolution kernel
|
1726
|
+
struct ggml_tensor * b, // data
|
1727
|
+
int s0, // stride
|
1728
|
+
int p0, // padding
|
1729
|
+
int d0); // dilation
|
1730
|
+
|
1731
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
|
1732
|
+
struct ggml_context * ctx,
|
1733
|
+
struct ggml_tensor * a, // convolution kernel
|
1734
|
+
struct ggml_tensor * b, // data
|
1735
|
+
int s0, // stride
|
1736
|
+
int d0); // dilation
|
1737
|
+
|
1595
1738
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
1596
1739
|
struct ggml_context * ctx,
|
1597
1740
|
struct ggml_tensor * a, // convolution kernel
|
@@ -1611,7 +1754,6 @@ extern "C" {
|
|
1611
1754
|
int d0, // dilation dimension 0
|
1612
1755
|
int d1); // dilation dimension 1
|
1613
1756
|
|
1614
|
-
|
1615
1757
|
// kernel size is a->ne[0] x a->ne[1]
|
1616
1758
|
// stride is equal to kernel size
|
1617
1759
|
// padding is zero
|
@@ -1638,12 +1780,51 @@ extern "C" {
|
|
1638
1780
|
struct ggml_tensor * a,
|
1639
1781
|
struct ggml_tensor * b);
|
1640
1782
|
|
1783
|
+
// depthwise (via im2col and mul_mat)
|
1784
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
|
1785
|
+
struct ggml_context * ctx,
|
1786
|
+
struct ggml_tensor * a, // convolution kernel
|
1787
|
+
struct ggml_tensor * b, // data
|
1788
|
+
int s0, // stride dimension 0
|
1789
|
+
int s1, // stride dimension 1
|
1790
|
+
int p0, // padding dimension 0
|
1791
|
+
int p1, // padding dimension 1
|
1792
|
+
int d0, // dilation dimension 0
|
1793
|
+
int d1); // dilation dimension 1
|
1794
|
+
|
1795
|
+
// Depthwise 2D convolution
|
1796
|
+
// may be faster than ggml_conv_2d_dw, but not available in all backends
|
1797
|
+
// a: KW KH 1 C convolution kernel
|
1798
|
+
// b: W H C N input data
|
1799
|
+
// res: W_out H_out C N
|
1800
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
|
1801
|
+
struct ggml_context * ctx,
|
1802
|
+
struct ggml_tensor * a,
|
1803
|
+
struct ggml_tensor * b,
|
1804
|
+
int stride0,
|
1805
|
+
int stride1,
|
1806
|
+
int pad0,
|
1807
|
+
int pad1,
|
1808
|
+
int dilation0,
|
1809
|
+
int dilation1);
|
1810
|
+
|
1641
1811
|
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
1642
1812
|
struct ggml_context * ctx,
|
1643
1813
|
struct ggml_tensor * a,
|
1644
1814
|
struct ggml_tensor * b,
|
1645
1815
|
int stride);
|
1646
1816
|
|
1817
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
1818
|
+
struct ggml_context * ctx,
|
1819
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
1820
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
1821
|
+
int s0, // stride dimension 0
|
1822
|
+
int s1, // stride dimension 1
|
1823
|
+
int p0, // padding dimension 0
|
1824
|
+
int p1, // padding dimension 1
|
1825
|
+
int d0, // dilation dimension 0
|
1826
|
+
int d1); // dilation dimension 1
|
1827
|
+
|
1647
1828
|
enum ggml_op_pool {
|
1648
1829
|
GGML_OP_POOL_MAX,
|
1649
1830
|
GGML_OP_POOL_AVG,
|
@@ -1683,24 +1864,47 @@ extern "C" {
|
|
1683
1864
|
float p0,
|
1684
1865
|
float p1);
|
1685
1866
|
|
1686
|
-
|
1867
|
+
enum ggml_scale_mode {
|
1868
|
+
GGML_SCALE_MODE_NEAREST = 0,
|
1869
|
+
GGML_SCALE_MODE_BILINEAR = 1,
|
1870
|
+
|
1871
|
+
GGML_SCALE_MODE_COUNT
|
1872
|
+
};
|
1873
|
+
|
1874
|
+
enum ggml_scale_flag {
|
1875
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
1876
|
+
};
|
1877
|
+
|
1878
|
+
// interpolate
|
1687
1879
|
// multiplies ne0 and ne1 by scale factor
|
1688
|
-
// used in stable-diffusion
|
1689
1880
|
GGML_API struct ggml_tensor * ggml_upscale(
|
1690
1881
|
struct ggml_context * ctx,
|
1691
1882
|
struct ggml_tensor * a,
|
1692
|
-
int scale_factor
|
1883
|
+
int scale_factor,
|
1884
|
+
enum ggml_scale_mode mode);
|
1693
1885
|
|
1694
|
-
//
|
1695
|
-
//
|
1696
|
-
|
1697
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1886
|
+
// interpolate
|
1887
|
+
// interpolate scale to specified dimensions
|
1888
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1698
1889
|
struct ggml_context * ctx,
|
1699
1890
|
struct ggml_tensor * a,
|
1700
1891
|
int ne0,
|
1701
1892
|
int ne1,
|
1702
1893
|
int ne2,
|
1703
|
-
int ne3
|
1894
|
+
int ne3,
|
1895
|
+
enum ggml_scale_mode mode),
|
1896
|
+
"use ggml_interpolate instead");
|
1897
|
+
|
1898
|
+
// Up- or downsamples the input to the specified size.
|
1899
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
1900
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
1901
|
+
struct ggml_context * ctx,
|
1902
|
+
struct ggml_tensor * a,
|
1903
|
+
int64_t ne0,
|
1904
|
+
int64_t ne1,
|
1905
|
+
int64_t ne2,
|
1906
|
+
int64_t ne3,
|
1907
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
1704
1908
|
|
1705
1909
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1706
1910
|
GGML_API struct ggml_tensor * ggml_pad(
|
@@ -1718,6 +1922,17 @@ extern "C" {
|
|
1718
1922
|
int p0,
|
1719
1923
|
int p1);
|
1720
1924
|
|
1925
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
1926
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
1927
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
1928
|
+
struct ggml_context * ctx,
|
1929
|
+
struct ggml_tensor * a,
|
1930
|
+
int shift0,
|
1931
|
+
int shift1,
|
1932
|
+
int shift2,
|
1933
|
+
int shift3);
|
1934
|
+
|
1935
|
+
|
1721
1936
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
1722
1937
|
// timesteps: [N,]
|
1723
1938
|
// return: [N, dim]
|
@@ -1750,13 +1965,13 @@ extern "C" {
|
|
1750
1965
|
struct ggml_tensor * a,
|
1751
1966
|
int k);
|
1752
1967
|
|
1753
|
-
#define GGML_KQ_MASK_PAD
|
1968
|
+
#define GGML_KQ_MASK_PAD 64
|
1754
1969
|
|
1755
|
-
// q: [
|
1756
|
-
// k: [
|
1757
|
-
// v: [
|
1758
|
-
// mask: [n_kv,
|
1759
|
-
// res: [
|
1970
|
+
// q: [n_embd_k, n_batch, n_head, 1]
|
1971
|
+
// k: [n_embd_k, n_kv, n_head_kv, 1]
|
1972
|
+
// v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
|
1973
|
+
// mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
1974
|
+
// res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
|
1760
1975
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
1761
1976
|
struct ggml_context * ctx,
|
1762
1977
|
struct ggml_tensor * q,
|
@@ -1856,84 +2071,26 @@ extern "C" {
|
|
1856
2071
|
struct ggml_tensor * td,
|
1857
2072
|
struct ggml_tensor * state);
|
1858
2073
|
|
1859
|
-
|
2074
|
+
GGML_API struct ggml_tensor * ggml_gated_linear_attn(
|
2075
|
+
struct ggml_context * ctx,
|
2076
|
+
struct ggml_tensor * k,
|
2077
|
+
struct ggml_tensor * v,
|
2078
|
+
struct ggml_tensor * q,
|
2079
|
+
struct ggml_tensor * g,
|
2080
|
+
struct ggml_tensor * state,
|
2081
|
+
float scale);
|
2082
|
+
|
2083
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
|
2084
|
+
struct ggml_context * ctx,
|
2085
|
+
struct ggml_tensor * r,
|
2086
|
+
struct ggml_tensor * w,
|
2087
|
+
struct ggml_tensor * k,
|
2088
|
+
struct ggml_tensor * v,
|
2089
|
+
struct ggml_tensor * a,
|
2090
|
+
struct ggml_tensor * b,
|
2091
|
+
struct ggml_tensor * state);
|
1860
2092
|
|
1861
|
-
|
1862
|
-
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1863
|
-
|
1864
|
-
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1865
|
-
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1866
|
-
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1867
|
-
|
1868
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
1869
|
-
struct ggml_context * ctx,
|
1870
|
-
struct ggml_tensor * a,
|
1871
|
-
ggml_unary_op_f32_t fun),
|
1872
|
-
"use ggml_map_custom1 instead");
|
1873
|
-
|
1874
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1875
|
-
struct ggml_context * ctx,
|
1876
|
-
struct ggml_tensor * a,
|
1877
|
-
ggml_unary_op_f32_t fun),
|
1878
|
-
"use ggml_map_custom1_inplace instead");
|
1879
|
-
|
1880
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1881
|
-
struct ggml_context * ctx,
|
1882
|
-
struct ggml_tensor * a,
|
1883
|
-
struct ggml_tensor * b,
|
1884
|
-
ggml_binary_op_f32_t fun),
|
1885
|
-
"use ggml_map_custom2 instead");
|
1886
|
-
|
1887
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1888
|
-
struct ggml_context * ctx,
|
1889
|
-
struct ggml_tensor * a,
|
1890
|
-
struct ggml_tensor * b,
|
1891
|
-
ggml_binary_op_f32_t fun),
|
1892
|
-
"use ggml_map_custom2_inplace instead");
|
1893
|
-
|
1894
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1895
|
-
struct ggml_context * ctx,
|
1896
|
-
struct ggml_tensor * a,
|
1897
|
-
ggml_custom1_op_f32_t fun),
|
1898
|
-
"use ggml_map_custom1 instead");
|
1899
|
-
|
1900
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1901
|
-
struct ggml_context * ctx,
|
1902
|
-
struct ggml_tensor * a,
|
1903
|
-
ggml_custom1_op_f32_t fun),
|
1904
|
-
"use ggml_map_custom1_inplace instead");
|
1905
|
-
|
1906
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1907
|
-
struct ggml_context * ctx,
|
1908
|
-
struct ggml_tensor * a,
|
1909
|
-
struct ggml_tensor * b,
|
1910
|
-
ggml_custom2_op_f32_t fun),
|
1911
|
-
"use ggml_map_custom2 instead");
|
1912
|
-
|
1913
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1914
|
-
struct ggml_context * ctx,
|
1915
|
-
struct ggml_tensor * a,
|
1916
|
-
struct ggml_tensor * b,
|
1917
|
-
ggml_custom2_op_f32_t fun),
|
1918
|
-
"use ggml_map_custom2_inplace instead");
|
1919
|
-
|
1920
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1921
|
-
struct ggml_context * ctx,
|
1922
|
-
struct ggml_tensor * a,
|
1923
|
-
struct ggml_tensor * b,
|
1924
|
-
struct ggml_tensor * c,
|
1925
|
-
ggml_custom3_op_f32_t fun),
|
1926
|
-
"use ggml_map_custom3 instead");
|
1927
|
-
|
1928
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1929
|
-
struct ggml_context * ctx,
|
1930
|
-
struct ggml_tensor * a,
|
1931
|
-
struct ggml_tensor * b,
|
1932
|
-
struct ggml_tensor * c,
|
1933
|
-
ggml_custom3_op_f32_t fun),
|
1934
|
-
"use ggml_map_custom3_inplace instead");
|
1935
|
-
|
1936
|
-
// custom operators v2
|
2093
|
+
// custom operators
|
1937
2094
|
|
1938
2095
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
1939
2096
|
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
@@ -1990,6 +2147,30 @@ extern "C" {
|
|
1990
2147
|
int n_tasks,
|
1991
2148
|
void * userdata);
|
1992
2149
|
|
2150
|
+
typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
|
2151
|
+
|
2152
|
+
GGML_API struct ggml_tensor * ggml_custom_4d(
|
2153
|
+
struct ggml_context * ctx,
|
2154
|
+
enum ggml_type type,
|
2155
|
+
int64_t ne0,
|
2156
|
+
int64_t ne1,
|
2157
|
+
int64_t ne2,
|
2158
|
+
int64_t ne3,
|
2159
|
+
struct ggml_tensor ** args,
|
2160
|
+
int n_args,
|
2161
|
+
ggml_custom_op_t fun,
|
2162
|
+
int n_tasks,
|
2163
|
+
void * userdata);
|
2164
|
+
|
2165
|
+
GGML_API struct ggml_tensor * ggml_custom_inplace(
|
2166
|
+
struct ggml_context * ctx,
|
2167
|
+
struct ggml_tensor * a,
|
2168
|
+
struct ggml_tensor ** args,
|
2169
|
+
int n_args,
|
2170
|
+
ggml_custom_op_t fun,
|
2171
|
+
int n_tasks,
|
2172
|
+
void * userdata);
|
2173
|
+
|
1993
2174
|
// loss function
|
1994
2175
|
|
1995
2176
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
@@ -2020,15 +2201,14 @@ extern "C" {
|
|
2020
2201
|
|
2021
2202
|
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
2022
2203
|
GGML_API void ggml_build_backward_expand(
|
2023
|
-
struct ggml_context *
|
2024
|
-
struct
|
2025
|
-
struct
|
2026
|
-
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
2204
|
+
struct ggml_context * ctx, // context for gradient computation
|
2205
|
+
struct ggml_cgraph * cgraph,
|
2206
|
+
struct ggml_tensor ** grad_accs);
|
2027
2207
|
|
2028
2208
|
// graph allocation in a context
|
2029
2209
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2030
2210
|
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
2031
|
-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
2211
|
+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
|
2032
2212
|
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
2033
2213
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
2034
2214
|
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
@@ -2047,9 +2227,6 @@ extern "C" {
|
|
2047
2227
|
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
2048
2228
|
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
2049
2229
|
|
2050
|
-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
2051
|
-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
2052
|
-
|
2053
2230
|
// print info and performance information for the graph
|
2054
2231
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
2055
2232
|
|
@@ -2094,132 +2271,6 @@ extern "C" {
|
|
2094
2271
|
int64_t n_per_row,
|
2095
2272
|
const float * imatrix);
|
2096
2273
|
|
2097
|
-
//
|
2098
|
-
// gguf
|
2099
|
-
//
|
2100
|
-
|
2101
|
-
enum gguf_type {
|
2102
|
-
GGUF_TYPE_UINT8 = 0,
|
2103
|
-
GGUF_TYPE_INT8 = 1,
|
2104
|
-
GGUF_TYPE_UINT16 = 2,
|
2105
|
-
GGUF_TYPE_INT16 = 3,
|
2106
|
-
GGUF_TYPE_UINT32 = 4,
|
2107
|
-
GGUF_TYPE_INT32 = 5,
|
2108
|
-
GGUF_TYPE_FLOAT32 = 6,
|
2109
|
-
GGUF_TYPE_BOOL = 7,
|
2110
|
-
GGUF_TYPE_STRING = 8,
|
2111
|
-
GGUF_TYPE_ARRAY = 9,
|
2112
|
-
GGUF_TYPE_UINT64 = 10,
|
2113
|
-
GGUF_TYPE_INT64 = 11,
|
2114
|
-
GGUF_TYPE_FLOAT64 = 12,
|
2115
|
-
GGUF_TYPE_COUNT, // marks the end of the enum
|
2116
|
-
};
|
2117
|
-
|
2118
|
-
struct gguf_context;
|
2119
|
-
|
2120
|
-
struct gguf_init_params {
|
2121
|
-
bool no_alloc;
|
2122
|
-
|
2123
|
-
// if not NULL, create a ggml_context and allocate the tensor data in it
|
2124
|
-
struct ggml_context ** ctx;
|
2125
|
-
};
|
2126
|
-
|
2127
|
-
GGML_API struct gguf_context * gguf_init_empty(void);
|
2128
|
-
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
2129
|
-
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
2130
|
-
|
2131
|
-
GGML_API void gguf_free(struct gguf_context * ctx);
|
2132
|
-
|
2133
|
-
GGML_API const char * gguf_type_name(enum gguf_type type);
|
2134
|
-
|
2135
|
-
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
2136
|
-
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
2137
|
-
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
2138
|
-
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
2139
|
-
|
2140
|
-
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
2141
|
-
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
2142
|
-
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
|
2143
|
-
|
2144
|
-
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
|
2145
|
-
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
|
2146
|
-
|
2147
|
-
// will abort if the wrong type is used for the key
|
2148
|
-
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
|
2149
|
-
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
|
2150
|
-
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
|
2151
|
-
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
|
2152
|
-
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
|
2153
|
-
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
|
2154
|
-
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
|
2155
|
-
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
|
2156
|
-
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
|
2157
|
-
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
|
2158
|
-
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
|
2159
|
-
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
|
2160
|
-
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
|
2161
|
-
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
|
2162
|
-
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
2163
|
-
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
2164
|
-
|
2165
|
-
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
2166
|
-
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
2167
|
-
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
2168
|
-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
2169
|
-
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
2170
|
-
|
2171
|
-
// removes key if it exists
|
2172
|
-
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
|
2173
|
-
|
2174
|
-
// overrides existing values or adds a new one
|
2175
|
-
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
2176
|
-
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
2177
|
-
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
2178
|
-
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
2179
|
-
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
2180
|
-
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
2181
|
-
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
2182
|
-
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
2183
|
-
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
2184
|
-
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
2185
|
-
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
2186
|
-
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
2187
|
-
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
2188
|
-
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
|
2189
|
-
|
2190
|
-
// set or add KV pairs from another context
|
2191
|
-
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
2192
|
-
|
2193
|
-
// manage tensor info
|
2194
|
-
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
2195
|
-
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
2196
|
-
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
2197
|
-
|
2198
|
-
// writing gguf files can be done in 2 ways:
|
2199
|
-
//
|
2200
|
-
// - write the entire gguf_context to a binary file in a single pass:
|
2201
|
-
//
|
2202
|
-
// gguf_write_to_file(ctx, fname);
|
2203
|
-
//
|
2204
|
-
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
2205
|
-
//
|
2206
|
-
// FILE * f = fopen(fname, "wb");
|
2207
|
-
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
2208
|
-
// fwrite(f, ...);
|
2209
|
-
// void * data = gguf_meta_get_meta_data(ctx);
|
2210
|
-
// fseek(f, 0, SEEK_SET);
|
2211
|
-
// fwrite(f, data, gguf_get_meta_size(ctx));
|
2212
|
-
// free(data);
|
2213
|
-
// fclose(f);
|
2214
|
-
//
|
2215
|
-
|
2216
|
-
// write the entire context to a binary file
|
2217
|
-
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
2218
|
-
|
2219
|
-
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
2220
|
-
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
2221
|
-
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
2222
|
-
|
2223
2274
|
#ifdef __cplusplus
|
2224
2275
|
// restrict not standard in C++
|
2225
2276
|
# if defined(__GNUC__)
|
@@ -2232,7 +2283,11 @@ extern "C" {
|
|
2232
2283
|
# define GGML_RESTRICT
|
2233
2284
|
# endif
|
2234
2285
|
#else
|
2235
|
-
#
|
2286
|
+
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
|
2287
|
+
# define GGML_RESTRICT __restrict
|
2288
|
+
# else
|
2289
|
+
# define GGML_RESTRICT restrict
|
2290
|
+
# endif
|
2236
2291
|
#endif
|
2237
2292
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
2238
2293
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
@@ -2255,6 +2310,7 @@ extern "C" {
|
|
2255
2310
|
|
2256
2311
|
// scheduling priorities
|
2257
2312
|
enum ggml_sched_priority {
|
2313
|
+
GGML_SCHED_PRIO_LOW = -1,
|
2258
2314
|
GGML_SCHED_PRIO_NORMAL,
|
2259
2315
|
GGML_SCHED_PRIO_MEDIUM,
|
2260
2316
|
GGML_SCHED_PRIO_HIGH,
|