whispercpp 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/LICENSE +1 -1
- data/README.md +216 -424
- data/Rakefile +79 -11
- data/ext/.gitignore +11 -0
- data/ext/dependencies.rb +61 -0
- data/ext/extconf.rb +18 -26
- data/ext/options.rb +221 -0
- data/ext/ruby_whisper.c +159 -0
- data/ext/ruby_whisper.h +27 -2
- data/ext/ruby_whisper_context.c +641 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1301 -0
- data/ext/ruby_whisper_segment.c +143 -0
- data/ext/ruby_whisper_transcribe.cpp +87 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/.dockerignore +3 -0
- data/ext/sources/.github/workflows/bindings-ruby.yml +21 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +251 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/ci/run.sh +336 -0
- data/ext/sources/close-issue.yml +28 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +37 -0
- data/ext/sources/examples/addon.node/addon.cpp +438 -0
- data/ext/sources/examples/addon.node/index.js +54 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +175 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1294 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +776 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +168 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +467 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +223 -0
- data/ext/sources/examples/server/CMakeLists.txt +12 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1091 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +429 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +40 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1746 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +437 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +374 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +89 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +663 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +58 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2676 -0
- data/ext/sources/examples/talk-llama/llama-context.h +276 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1618 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +640 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +95 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +190 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2739 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +502 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +379 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +32 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1138 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +281 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +13814 -0
- data/ext/sources/examples/talk-llama/llama-model.h +425 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +966 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3340 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +131 -0
- data/ext/sources/examples/talk-llama/llama.cpp +354 -0
- data/ext/sources/examples/talk-llama/llama.h +1377 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +808 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +849 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +143 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +249 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +390 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +26 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/sources/ggml/include/ggml-alloc.h +76 -0
- data/ext/sources/ggml/include/ggml-backend.h +354 -0
- data/ext/sources/ggml/include/ggml-blas.h +25 -0
- data/ext/sources/ggml/include/ggml-cann.h +123 -0
- data/ext/sources/ggml/include/ggml-cpp.h +39 -0
- data/ext/sources/ggml/include/ggml-cpu.h +143 -0
- data/ext/sources/ggml/include/ggml-cuda.h +47 -0
- data/ext/sources/ggml/include/ggml-kompute.h +50 -0
- data/ext/sources/ggml/include/ggml-metal.h +66 -0
- data/ext/sources/ggml/include/ggml-opencl.h +26 -0
- data/ext/sources/ggml/include/ggml-opt.h +237 -0
- data/ext/sources/ggml/include/ggml-rpc.h +33 -0
- data/ext/sources/ggml/include/ggml-sycl.h +49 -0
- data/ext/sources/ggml/include/ggml-vulkan.h +29 -0
- data/ext/{ggml.h → sources/ggml/include/ggml.h} +621 -821
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +346 -0
- data/ext/sources/ggml/src/ggml-alloc.c +1042 -0
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/sources/ggml/src/ggml-amx/common.h +94 -0
- data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/sources/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/sources/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/sources/ggml/src/ggml-backend-impl.h +255 -0
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +586 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +2011 -0
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +74 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +181 -0
- data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +3193 -0
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +420 -0
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +2606 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +234 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/sources/ggml/src/ggml-common.h +1857 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +504 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +72 -0
- data/ext/sources/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +508 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +13747 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3510 -0
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +8903 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +110 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +252 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +818 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +828 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +730 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +26 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1471 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3505 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +336 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +289 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +59 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +243 -0
- data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +140 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +131 -0
- data/ext/sources/ggml/src/ggml-impl.h +601 -0
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +120 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +5998 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +7089 -0
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +5124 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/sources/ggml/src/ggml-opt.cpp +1037 -0
- data/ext/sources/ggml/src/ggml-quants.c +5232 -0
- data/ext/sources/ggml/src/ggml-quants.h +100 -0
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +345 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/sources/ggml/src/ggml-sycl/common.cpp +83 -0
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +589 -0
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +195 -0
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +101 -0
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +623 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +700 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +791 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +1162 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +75 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +99 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +4493 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +3030 -0
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1110 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +501 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +47 -0
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +83 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +361 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +261 -0
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1215 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +293 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-threading.cpp +12 -0
- data/ext/sources/ggml/src/ggml-threading.h +14 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +196 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10700 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +751 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/sources/ggml/src/ggml.c +6550 -0
- data/ext/sources/ggml/src/gguf.cpp +1330 -0
- data/ext/{whisper.h → sources/include/whisper.h} +91 -24
- data/ext/sources/src/CMakeLists.txt +143 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.h +158 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +226 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.h +154 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +222 -0
- data/ext/sources/src/coreml/whisper-encoder.h +26 -0
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/sources/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{whisper.cpp → sources/src/whisper.cpp} +2535 -835
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +34 -0
- data/lib/whisper/model/uri.rb +178 -0
- data/sig/whisper.rbs +480 -0
- data/tests/helper.rb +35 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +202 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +109 -0
- data/tests/test_package.rb +46 -0
- data/tests/test_params.rb +297 -0
- data/tests/test_segment.rb +74 -0
- data/tests/test_vad.rb +19 -0
- data/tests/test_vad_params.rb +103 -0
- data/tests/test_whisper.rb +212 -124
- data/whispercpp.gemspec +37 -0
- metadata +794 -13
- data/ext/dr_wav.h +0 -6434
- data/ext/ggml.c +0 -21755
- data/ext/ruby_whisper.cpp +0 -426
@@ -176,25 +176,15 @@
|
|
176
176
|
#ifdef GGML_SHARED
|
177
177
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
178
178
|
# ifdef GGML_BUILD
|
179
|
-
# define GGML_API __declspec(dllexport)
|
179
|
+
# define GGML_API __declspec(dllexport) extern
|
180
180
|
# else
|
181
|
-
# define GGML_API __declspec(dllimport)
|
181
|
+
# define GGML_API __declspec(dllimport) extern
|
182
182
|
# endif
|
183
183
|
# else
|
184
|
-
# define GGML_API __attribute__ ((visibility ("default")))
|
184
|
+
# define GGML_API __attribute__ ((visibility ("default"))) extern
|
185
185
|
# endif
|
186
186
|
#else
|
187
|
-
# define GGML_API
|
188
|
-
#endif
|
189
|
-
|
190
|
-
#ifdef GGML_MULTIPLATFORM
|
191
|
-
# if defined(_WIN32)
|
192
|
-
# define GGML_CALL
|
193
|
-
# else
|
194
|
-
# define GGML_CALL __attribute__((__ms_abi__))
|
195
|
-
# endif
|
196
|
-
#else
|
197
|
-
# define GGML_CALL
|
187
|
+
# define GGML_API extern
|
198
188
|
#endif
|
199
189
|
|
200
190
|
// TODO: support for clang
|
@@ -208,7 +198,7 @@
|
|
208
198
|
|
209
199
|
#ifndef __GNUC__
|
210
200
|
# define GGML_ATTRIBUTE_FORMAT(...)
|
211
|
-
#elif defined(__MINGW32__)
|
201
|
+
#elif defined(__MINGW32__) && !defined(__clang__)
|
212
202
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
213
203
|
#else
|
214
204
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
@@ -220,21 +210,24 @@
|
|
220
210
|
#include <stdio.h>
|
221
211
|
|
222
212
|
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
223
|
-
#define GGML_FILE_VERSION
|
213
|
+
#define GGML_FILE_VERSION 2
|
224
214
|
|
225
215
|
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
226
216
|
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
227
217
|
|
228
218
|
#define GGML_MAX_DIMS 4
|
229
219
|
#define GGML_MAX_PARAMS 2048
|
230
|
-
#define GGML_MAX_CONTEXTS 64
|
231
220
|
#define GGML_MAX_SRC 10
|
221
|
+
#define GGML_MAX_N_THREADS 512
|
222
|
+
#define GGML_MAX_OP_PARAMS 64
|
223
|
+
|
232
224
|
#ifndef GGML_MAX_NAME
|
233
|
-
#define GGML_MAX_NAME
|
225
|
+
# define GGML_MAX_NAME 64
|
234
226
|
#endif
|
235
|
-
|
227
|
+
|
236
228
|
#define GGML_DEFAULT_N_THREADS 4
|
237
229
|
#define GGML_DEFAULT_GRAPH_SIZE 2048
|
230
|
+
|
238
231
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
239
232
|
#define GGML_MEM_ALIGN 4
|
240
233
|
#else
|
@@ -244,36 +237,35 @@
|
|
244
237
|
#define GGML_EXIT_SUCCESS 0
|
245
238
|
#define GGML_EXIT_ABORTED 1
|
246
239
|
|
247
|
-
#define
|
248
|
-
|
249
|
-
#define
|
250
|
-
|
251
|
-
#define GGUF_DEFAULT_ALIGNMENT 32
|
240
|
+
#define GGML_ROPE_TYPE_NEOX 2
|
241
|
+
#define GGML_ROPE_TYPE_MROPE 8
|
242
|
+
#define GGML_ROPE_TYPE_VISION 24
|
252
243
|
|
253
244
|
#define GGML_UNUSED(x) (void)(x)
|
254
245
|
|
255
246
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
256
247
|
|
257
|
-
#define GGML_ASSERT(x) \
|
258
|
-
do { \
|
259
|
-
if (!(x)) { \
|
260
|
-
fflush(stdout); \
|
261
|
-
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
262
|
-
ggml_print_backtrace(); \
|
263
|
-
abort(); \
|
264
|
-
} \
|
265
|
-
} while (0)
|
266
|
-
|
267
248
|
#ifndef NDEBUG
|
268
|
-
#define GGML_UNREACHABLE()
|
249
|
+
# define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
269
250
|
#elif defined(__GNUC__)
|
270
|
-
#define GGML_UNREACHABLE() __builtin_unreachable()
|
251
|
+
# define GGML_UNREACHABLE() __builtin_unreachable()
|
252
|
+
#elif defined(_MSC_VER)
|
253
|
+
# define GGML_UNREACHABLE() __assume(0)
|
254
|
+
#else
|
255
|
+
# define GGML_UNREACHABLE() ((void) 0)
|
256
|
+
#endif
|
257
|
+
|
258
|
+
#ifdef __cplusplus
|
259
|
+
# define GGML_NORETURN [[noreturn]]
|
271
260
|
#elif defined(_MSC_VER)
|
272
|
-
#define
|
261
|
+
# define GGML_NORETURN __declspec(noreturn)
|
273
262
|
#else
|
274
|
-
#define
|
263
|
+
# define GGML_NORETURN _Noreturn
|
275
264
|
#endif
|
276
265
|
|
266
|
+
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
|
267
|
+
#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
|
268
|
+
|
277
269
|
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
278
270
|
// main purpose is to reduce code duplication and improve readability.
|
279
271
|
//
|
@@ -312,10 +304,19 @@
|
|
312
304
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
313
305
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
314
306
|
|
307
|
+
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
308
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
309
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
310
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
311
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
312
|
+
|
315
313
|
#ifdef __cplusplus
|
316
314
|
extern "C" {
|
317
315
|
#endif
|
318
316
|
|
317
|
+
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
318
|
+
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
319
|
+
|
319
320
|
enum ggml_status {
|
320
321
|
GGML_STATUS_ALLOC_FAILED = -2,
|
321
322
|
GGML_STATUS_FAILED = -1,
|
@@ -324,19 +325,27 @@ extern "C" {
|
|
324
325
|
};
|
325
326
|
|
326
327
|
// get ggml_status name string
|
327
|
-
GGML_API
|
328
|
+
GGML_API const char * ggml_status_to_string(enum ggml_status status);
|
328
329
|
|
330
|
+
// ieee 754-2008 half-precision float16
|
331
|
+
// todo: make this not an integral type
|
329
332
|
typedef uint16_t ggml_fp16_t;
|
330
|
-
|
331
|
-
|
332
|
-
GGML_API
|
333
|
-
GGML_API
|
334
|
-
|
335
|
-
|
336
|
-
|
333
|
+
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t);
|
334
|
+
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float);
|
335
|
+
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t *, float *, int64_t);
|
336
|
+
GGML_API void ggml_fp32_to_fp16_row(const float *, ggml_fp16_t *, int64_t);
|
337
|
+
|
338
|
+
// google brain half-precision bfloat16
|
339
|
+
typedef struct { uint16_t bits; } ggml_bf16_t;
|
340
|
+
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
|
341
|
+
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
|
342
|
+
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
|
343
|
+
GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
|
344
|
+
GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
|
337
345
|
|
338
346
|
struct ggml_object;
|
339
347
|
struct ggml_context;
|
348
|
+
struct ggml_cgraph;
|
340
349
|
|
341
350
|
// NOTE: always add types at the end of the enum to keep backward compatibility
|
342
351
|
enum ggml_type {
|
@@ -370,19 +379,22 @@ extern "C" {
|
|
370
379
|
GGML_TYPE_I64 = 27,
|
371
380
|
GGML_TYPE_F64 = 28,
|
372
381
|
GGML_TYPE_IQ1_M = 29,
|
373
|
-
|
382
|
+
GGML_TYPE_BF16 = 30,
|
383
|
+
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
384
|
+
// GGML_TYPE_Q4_0_4_8 = 32,
|
385
|
+
// GGML_TYPE_Q4_0_8_8 = 33,
|
386
|
+
GGML_TYPE_TQ1_0 = 34,
|
387
|
+
GGML_TYPE_TQ2_0 = 35,
|
388
|
+
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
389
|
+
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
390
|
+
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
391
|
+
GGML_TYPE_COUNT = 39,
|
374
392
|
};
|
375
393
|
|
376
394
|
// precision
|
377
395
|
enum ggml_prec {
|
378
|
-
GGML_PREC_DEFAULT,
|
379
|
-
GGML_PREC_F32,
|
380
|
-
};
|
381
|
-
|
382
|
-
enum ggml_backend_type {
|
383
|
-
GGML_BACKEND_TYPE_CPU = 0,
|
384
|
-
GGML_BACKEND_TYPE_GPU = 10,
|
385
|
-
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
|
396
|
+
GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default
|
397
|
+
GGML_PREC_F32 = 10,
|
386
398
|
};
|
387
399
|
|
388
400
|
// model file types
|
@@ -410,6 +422,7 @@ extern "C" {
|
|
410
422
|
GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
|
411
423
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
412
424
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
425
|
+
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
413
426
|
};
|
414
427
|
|
415
428
|
// available tensor operations:
|
@@ -426,10 +439,13 @@ extern "C" {
|
|
426
439
|
GGML_OP_SQR,
|
427
440
|
GGML_OP_SQRT,
|
428
441
|
GGML_OP_LOG,
|
442
|
+
GGML_OP_SIN,
|
443
|
+
GGML_OP_COS,
|
429
444
|
GGML_OP_SUM,
|
430
445
|
GGML_OP_SUM_ROWS,
|
431
446
|
GGML_OP_MEAN,
|
432
447
|
GGML_OP_ARGMAX,
|
448
|
+
GGML_OP_COUNT_EQUAL,
|
433
449
|
GGML_OP_REPEAT,
|
434
450
|
GGML_OP_REPEAT_BACK,
|
435
451
|
GGML_OP_CONCAT,
|
@@ -438,6 +454,7 @@ extern "C" {
|
|
438
454
|
GGML_OP_RMS_NORM,
|
439
455
|
GGML_OP_RMS_NORM_BACK,
|
440
456
|
GGML_OP_GROUP_NORM,
|
457
|
+
GGML_OP_L2_NORM,
|
441
458
|
|
442
459
|
GGML_OP_MUL_MAT,
|
443
460
|
GGML_OP_MUL_MAT_ID,
|
@@ -460,22 +477,24 @@ extern "C" {
|
|
460
477
|
GGML_OP_SOFT_MAX_BACK,
|
461
478
|
GGML_OP_ROPE,
|
462
479
|
GGML_OP_ROPE_BACK,
|
463
|
-
GGML_OP_ALIBI,
|
464
480
|
GGML_OP_CLAMP,
|
465
481
|
GGML_OP_CONV_TRANSPOSE_1D,
|
466
482
|
GGML_OP_IM2COL,
|
483
|
+
GGML_OP_IM2COL_BACK,
|
484
|
+
GGML_OP_CONV_2D_DW,
|
467
485
|
GGML_OP_CONV_TRANSPOSE_2D,
|
468
486
|
GGML_OP_POOL_1D,
|
469
487
|
GGML_OP_POOL_2D,
|
488
|
+
GGML_OP_POOL_2D_BACK,
|
470
489
|
GGML_OP_UPSCALE, // nearest interpolate
|
471
490
|
GGML_OP_PAD,
|
491
|
+
GGML_OP_PAD_REFLECT_1D,
|
472
492
|
GGML_OP_ARANGE,
|
473
493
|
GGML_OP_TIMESTEP_EMBEDDING,
|
474
494
|
GGML_OP_ARGSORT,
|
475
495
|
GGML_OP_LEAKY_RELU,
|
476
496
|
|
477
|
-
|
478
|
-
GGML_OP_FLASH_FF,
|
497
|
+
GGML_OP_FLASH_ATTN_EXT,
|
479
498
|
GGML_OP_FLASH_ATTN_BACK,
|
480
499
|
GGML_OP_SSM_CONV,
|
481
500
|
GGML_OP_SSM_SCAN,
|
@@ -483,22 +502,21 @@ extern "C" {
|
|
483
502
|
GGML_OP_WIN_UNPART,
|
484
503
|
GGML_OP_GET_REL_POS,
|
485
504
|
GGML_OP_ADD_REL_POS,
|
505
|
+
GGML_OP_RWKV_WKV6,
|
506
|
+
GGML_OP_GATED_LINEAR_ATTN,
|
507
|
+
GGML_OP_RWKV_WKV7,
|
486
508
|
|
487
509
|
GGML_OP_UNARY,
|
488
510
|
|
489
|
-
GGML_OP_MAP_UNARY,
|
490
|
-
GGML_OP_MAP_BINARY,
|
491
|
-
|
492
|
-
GGML_OP_MAP_CUSTOM1_F32,
|
493
|
-
GGML_OP_MAP_CUSTOM2_F32,
|
494
|
-
GGML_OP_MAP_CUSTOM3_F32,
|
495
|
-
|
496
511
|
GGML_OP_MAP_CUSTOM1,
|
497
512
|
GGML_OP_MAP_CUSTOM2,
|
498
513
|
GGML_OP_MAP_CUSTOM3,
|
499
514
|
|
515
|
+
GGML_OP_CUSTOM,
|
516
|
+
|
500
517
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
501
518
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
519
|
+
GGML_OP_OPT_STEP_ADAMW,
|
502
520
|
|
503
521
|
GGML_OP_COUNT,
|
504
522
|
};
|
@@ -511,11 +529,14 @@ extern "C" {
|
|
511
529
|
GGML_UNARY_OP_TANH,
|
512
530
|
GGML_UNARY_OP_ELU,
|
513
531
|
GGML_UNARY_OP_RELU,
|
532
|
+
GGML_UNARY_OP_SIGMOID,
|
514
533
|
GGML_UNARY_OP_GELU,
|
515
534
|
GGML_UNARY_OP_GELU_QUICK,
|
516
535
|
GGML_UNARY_OP_SILU,
|
517
536
|
GGML_UNARY_OP_HARDSWISH,
|
518
537
|
GGML_UNARY_OP_HARDSIGMOID,
|
538
|
+
GGML_UNARY_OP_EXP,
|
539
|
+
GGML_UNARY_OP_GELU_ERF,
|
519
540
|
|
520
541
|
GGML_UNARY_OP_COUNT,
|
521
542
|
};
|
@@ -527,36 +548,32 @@ extern "C" {
|
|
527
548
|
};
|
528
549
|
|
529
550
|
enum ggml_log_level {
|
530
|
-
|
551
|
+
GGML_LOG_LEVEL_NONE = 0,
|
552
|
+
GGML_LOG_LEVEL_DEBUG = 1,
|
553
|
+
GGML_LOG_LEVEL_INFO = 2,
|
531
554
|
GGML_LOG_LEVEL_WARN = 3,
|
532
|
-
|
533
|
-
|
555
|
+
GGML_LOG_LEVEL_ERROR = 4,
|
556
|
+
GGML_LOG_LEVEL_CONT = 5, // continue previous log
|
534
557
|
};
|
535
558
|
|
559
|
+
// this tensor...
|
536
560
|
enum ggml_tensor_flag {
|
537
|
-
GGML_TENSOR_FLAG_INPUT =
|
538
|
-
GGML_TENSOR_FLAG_OUTPUT =
|
539
|
-
GGML_TENSOR_FLAG_PARAM =
|
561
|
+
GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
|
562
|
+
GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
|
563
|
+
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
|
564
|
+
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
540
565
|
};
|
541
566
|
|
542
|
-
|
543
|
-
|
544
|
-
size_t
|
545
|
-
|
546
|
-
|
547
|
-
struct ggml_object * next;
|
548
|
-
|
549
|
-
enum ggml_object_type type;
|
550
|
-
|
551
|
-
char padding[4];
|
567
|
+
struct ggml_init_params {
|
568
|
+
// memory pool
|
569
|
+
size_t mem_size; // bytes
|
570
|
+
void * mem_buffer; // if NULL, memory will be allocated internally
|
571
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
552
572
|
};
|
553
573
|
|
554
|
-
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
555
|
-
|
556
574
|
// n-dimensional tensor
|
557
575
|
struct ggml_tensor {
|
558
|
-
enum ggml_type
|
559
|
-
enum ggml_backend_type backend;
|
576
|
+
enum ggml_type type;
|
560
577
|
|
561
578
|
struct ggml_backend_buffer * buffer;
|
562
579
|
|
@@ -574,14 +591,9 @@ extern "C" {
|
|
574
591
|
|
575
592
|
int32_t flags;
|
576
593
|
|
577
|
-
struct ggml_tensor * grad;
|
578
594
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
579
595
|
|
580
|
-
//
|
581
|
-
int perf_runs;
|
582
|
-
int64_t perf_cycles;
|
583
|
-
int64_t perf_time_us;
|
584
|
-
|
596
|
+
// source tensor and offset for views
|
585
597
|
struct ggml_tensor * view_src;
|
586
598
|
size_t view_offs;
|
587
599
|
|
@@ -601,95 +613,6 @@ extern "C" {
|
|
601
613
|
// If it returns true, the computation is aborted
|
602
614
|
typedef bool (*ggml_abort_callback)(void * data);
|
603
615
|
|
604
|
-
// the compute plan that needs to be prepared for ggml_graph_compute()
|
605
|
-
// since https://github.com/ggerganov/ggml/issues/287
|
606
|
-
struct ggml_cplan {
|
607
|
-
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
608
|
-
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
609
|
-
|
610
|
-
int n_threads;
|
611
|
-
|
612
|
-
// abort ggml_graph_compute when true
|
613
|
-
ggml_abort_callback abort_callback;
|
614
|
-
void * abort_callback_data;
|
615
|
-
};
|
616
|
-
|
617
|
-
enum ggml_cgraph_eval_order {
|
618
|
-
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
619
|
-
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
620
|
-
GGML_CGRAPH_EVAL_ORDER_COUNT
|
621
|
-
};
|
622
|
-
|
623
|
-
struct ggml_hash_set {
|
624
|
-
size_t size;
|
625
|
-
struct ggml_tensor ** keys;
|
626
|
-
};
|
627
|
-
|
628
|
-
// computation graph
|
629
|
-
struct ggml_cgraph {
|
630
|
-
int size;
|
631
|
-
int n_nodes;
|
632
|
-
int n_leafs;
|
633
|
-
|
634
|
-
struct ggml_tensor ** nodes;
|
635
|
-
struct ggml_tensor ** grads;
|
636
|
-
struct ggml_tensor ** leafs;
|
637
|
-
|
638
|
-
struct ggml_hash_set visited_hash_table;
|
639
|
-
|
640
|
-
enum ggml_cgraph_eval_order order;
|
641
|
-
|
642
|
-
// performance
|
643
|
-
int perf_runs;
|
644
|
-
int64_t perf_cycles;
|
645
|
-
int64_t perf_time_us;
|
646
|
-
};
|
647
|
-
|
648
|
-
// scratch buffer
|
649
|
-
struct ggml_scratch {
|
650
|
-
size_t offs;
|
651
|
-
size_t size;
|
652
|
-
void * data;
|
653
|
-
};
|
654
|
-
|
655
|
-
struct ggml_init_params {
|
656
|
-
// memory pool
|
657
|
-
size_t mem_size; // bytes
|
658
|
-
void * mem_buffer; // if NULL, memory will be allocated internally
|
659
|
-
bool no_alloc; // don't allocate memory for the tensor data
|
660
|
-
};
|
661
|
-
|
662
|
-
|
663
|
-
// compute types
|
664
|
-
|
665
|
-
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
666
|
-
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
667
|
-
enum ggml_task_type {
|
668
|
-
GGML_TASK_TYPE_INIT = 0,
|
669
|
-
GGML_TASK_TYPE_COMPUTE,
|
670
|
-
GGML_TASK_TYPE_FINALIZE,
|
671
|
-
};
|
672
|
-
|
673
|
-
struct ggml_compute_params {
|
674
|
-
enum ggml_task_type type;
|
675
|
-
|
676
|
-
// ith = thread index, nth = number of threads
|
677
|
-
int ith, nth;
|
678
|
-
|
679
|
-
// work buffer for all threads
|
680
|
-
size_t wsize;
|
681
|
-
void * wdata;
|
682
|
-
};
|
683
|
-
|
684
|
-
// numa strategies
|
685
|
-
enum ggml_numa_strategy {
|
686
|
-
GGML_NUMA_STRATEGY_DISABLED = 0,
|
687
|
-
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
688
|
-
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
689
|
-
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
690
|
-
GGML_NUMA_STRATEGY_MIRROR = 4,
|
691
|
-
GGML_NUMA_STRATEGY_COUNT
|
692
|
-
};
|
693
616
|
|
694
617
|
//
|
695
618
|
// GUID
|
@@ -709,67 +632,78 @@ extern "C" {
|
|
709
632
|
GGML_API int64_t ggml_cycles(void);
|
710
633
|
GGML_API int64_t ggml_cycles_per_ms(void);
|
711
634
|
|
712
|
-
GGML_API void ggml_print_backtrace(void);
|
713
|
-
|
714
635
|
// accepts a UTF-8 path, even on Windows
|
715
636
|
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
716
637
|
|
717
|
-
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
718
|
-
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
719
|
-
|
720
638
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
721
639
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
722
640
|
|
723
|
-
GGML_API
|
724
|
-
GGML_API
|
725
|
-
GGML_API
|
726
|
-
GGML_API
|
641
|
+
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
642
|
+
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
643
|
+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
644
|
+
GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
727
645
|
|
728
|
-
GGML_API
|
729
|
-
GGML_API
|
730
|
-
GGML_API
|
646
|
+
GGML_API int64_t ggml_blck_size(enum ggml_type type);
|
647
|
+
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
|
648
|
+
GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
731
649
|
|
732
650
|
GGML_DEPRECATED(
|
733
651
|
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
|
734
652
|
"use ggml_row_size() instead");
|
735
653
|
|
736
|
-
GGML_API
|
737
|
-
GGML_API
|
738
|
-
GGML_API
|
654
|
+
GGML_API const char * ggml_type_name(enum ggml_type type);
|
655
|
+
GGML_API const char * ggml_op_name (enum ggml_op op);
|
656
|
+
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
739
657
|
|
740
|
-
GGML_API
|
741
|
-
GGML_API
|
658
|
+
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
659
|
+
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
742
660
|
|
743
|
-
GGML_API
|
661
|
+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
744
662
|
|
745
|
-
GGML_API
|
663
|
+
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
746
664
|
|
747
665
|
// TODO: temporary until model loading of ggml examples is refactored
|
748
666
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
749
667
|
|
750
|
-
GGML_API
|
751
|
-
GGML_API
|
752
|
-
GGML_API
|
753
|
-
GGML_API
|
754
|
-
GGML_API
|
755
|
-
GGML_API
|
756
|
-
GGML_API
|
757
|
-
GGML_API
|
758
|
-
|
668
|
+
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
669
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
670
|
+
GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
|
671
|
+
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
672
|
+
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
673
|
+
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
674
|
+
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
675
|
+
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
676
|
+
|
677
|
+
// returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
|
678
|
+
GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
679
|
+
GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
680
|
+
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
681
|
+
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
682
|
+
|
683
|
+
// returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
|
684
|
+
GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor);
|
685
|
+
|
686
|
+
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
687
|
+
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
759
688
|
|
760
|
-
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
689
|
+
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
690
|
+
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
691
|
+
|
692
|
+
GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
761
693
|
|
762
694
|
// use this to compute the memory overhead of a tensor
|
763
695
|
GGML_API size_t ggml_tensor_overhead(void);
|
764
696
|
|
697
|
+
GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
|
698
|
+
|
765
699
|
// main
|
766
700
|
|
767
|
-
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
768
|
-
GGML_API void
|
701
|
+
GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
|
702
|
+
GGML_API void ggml_reset(struct ggml_context * ctx);
|
703
|
+
GGML_API void ggml_free (struct ggml_context * ctx);
|
769
704
|
|
770
705
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
771
706
|
|
772
|
-
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
773
707
|
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
774
708
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
775
709
|
|
@@ -809,8 +743,7 @@ extern "C" {
|
|
809
743
|
int64_t ne2,
|
810
744
|
int64_t ne3);
|
811
745
|
|
812
|
-
GGML_API
|
813
|
-
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
746
|
+
GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
|
814
747
|
|
815
748
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
816
749
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
@@ -820,35 +753,25 @@ extern "C" {
|
|
820
753
|
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
|
821
754
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
822
755
|
|
823
|
-
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
824
|
-
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
825
|
-
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
826
|
-
|
827
756
|
// Converts a flat index into coordinates
|
828
|
-
GGML_API void
|
829
|
-
|
830
|
-
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
831
|
-
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
832
|
-
|
833
|
-
GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
834
|
-
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
757
|
+
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
835
758
|
|
836
|
-
GGML_API
|
837
|
-
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
838
|
-
|
839
|
-
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
840
|
-
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
759
|
+
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
841
760
|
|
842
761
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
843
762
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
844
763
|
|
845
|
-
GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
846
|
-
|
847
764
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
848
765
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
849
766
|
GGML_ATTRIBUTE_FORMAT(2, 3)
|
850
767
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
851
768
|
|
769
|
+
// Tensor flags
|
770
|
+
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
771
|
+
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
772
|
+
GGML_API void ggml_set_param(struct ggml_tensor * tensor);
|
773
|
+
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
774
|
+
|
852
775
|
//
|
853
776
|
// operations on tensors with backpropagation
|
854
777
|
//
|
@@ -963,6 +886,22 @@ extern "C" {
|
|
963
886
|
struct ggml_context * ctx,
|
964
887
|
struct ggml_tensor * a);
|
965
888
|
|
889
|
+
GGML_API struct ggml_tensor * ggml_sin(
|
890
|
+
struct ggml_context * ctx,
|
891
|
+
struct ggml_tensor * a);
|
892
|
+
|
893
|
+
GGML_API struct ggml_tensor * ggml_sin_inplace(
|
894
|
+
struct ggml_context * ctx,
|
895
|
+
struct ggml_tensor * a);
|
896
|
+
|
897
|
+
GGML_API struct ggml_tensor * ggml_cos(
|
898
|
+
struct ggml_context * ctx,
|
899
|
+
struct ggml_tensor * a);
|
900
|
+
|
901
|
+
GGML_API struct ggml_tensor * ggml_cos_inplace(
|
902
|
+
struct ggml_context * ctx,
|
903
|
+
struct ggml_tensor * a);
|
904
|
+
|
966
905
|
// return scalar
|
967
906
|
GGML_API struct ggml_tensor * ggml_sum(
|
968
907
|
struct ggml_context * ctx,
|
@@ -983,6 +922,12 @@ extern "C" {
|
|
983
922
|
struct ggml_context * ctx,
|
984
923
|
struct ggml_tensor * a);
|
985
924
|
|
925
|
+
// count number of equal elements in a and b
|
926
|
+
GGML_API struct ggml_tensor * ggml_count_equal(
|
927
|
+
struct ggml_context * ctx,
|
928
|
+
struct ggml_tensor * a,
|
929
|
+
struct ggml_tensor * b);
|
930
|
+
|
986
931
|
// if a is the same shape as b, and a is not parameter, return a
|
987
932
|
// otherwise, return a new tensor: repeat(a) to fit in b
|
988
933
|
GGML_API struct ggml_tensor * ggml_repeat(
|
@@ -994,14 +939,15 @@ extern "C" {
|
|
994
939
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
995
940
|
struct ggml_context * ctx,
|
996
941
|
struct ggml_tensor * a,
|
997
|
-
struct ggml_tensor * b);
|
942
|
+
struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
998
943
|
|
999
|
-
// concat a and b
|
944
|
+
// concat a and b along dim
|
1000
945
|
// used in stable-diffusion
|
1001
946
|
GGML_API struct ggml_tensor * ggml_concat(
|
1002
947
|
struct ggml_context * ctx,
|
1003
948
|
struct ggml_tensor * a,
|
1004
|
-
struct ggml_tensor * b
|
949
|
+
struct ggml_tensor * b,
|
950
|
+
int dim);
|
1005
951
|
|
1006
952
|
GGML_API struct ggml_tensor * ggml_abs(
|
1007
953
|
struct ggml_context * ctx,
|
@@ -1063,6 +1009,14 @@ extern "C" {
|
|
1063
1009
|
struct ggml_context * ctx,
|
1064
1010
|
struct ggml_tensor * a);
|
1065
1011
|
|
1012
|
+
GGML_API struct ggml_tensor * ggml_sigmoid(
|
1013
|
+
struct ggml_context * ctx,
|
1014
|
+
struct ggml_tensor * a);
|
1015
|
+
|
1016
|
+
GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
|
1017
|
+
struct ggml_context * ctx,
|
1018
|
+
struct ggml_tensor * a);
|
1019
|
+
|
1066
1020
|
GGML_API struct ggml_tensor * ggml_gelu(
|
1067
1021
|
struct ggml_context * ctx,
|
1068
1022
|
struct ggml_tensor * a);
|
@@ -1071,6 +1025,16 @@ extern "C" {
|
|
1071
1025
|
struct ggml_context * ctx,
|
1072
1026
|
struct ggml_tensor * a);
|
1073
1027
|
|
1028
|
+
// GELU using erf (error function) when possible
|
1029
|
+
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
1030
|
+
GGML_API struct ggml_tensor * ggml_gelu_erf(
|
1031
|
+
struct ggml_context * ctx,
|
1032
|
+
struct ggml_tensor * a);
|
1033
|
+
|
1034
|
+
GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
|
1035
|
+
struct ggml_context * ctx,
|
1036
|
+
struct ggml_tensor * a);
|
1037
|
+
|
1074
1038
|
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
1075
1039
|
struct ggml_context * ctx,
|
1076
1040
|
struct ggml_tensor * a);
|
@@ -1104,6 +1068,14 @@ extern "C" {
|
|
1104
1068
|
struct ggml_context * ctx,
|
1105
1069
|
struct ggml_tensor * a);
|
1106
1070
|
|
1071
|
+
GGML_API struct ggml_tensor * ggml_exp(
|
1072
|
+
struct ggml_context * ctx,
|
1073
|
+
struct ggml_tensor * a);
|
1074
|
+
|
1075
|
+
GGML_API struct ggml_tensor * ggml_exp_inplace(
|
1076
|
+
struct ggml_context * ctx,
|
1077
|
+
struct ggml_tensor * a);
|
1078
|
+
|
1107
1079
|
// normalize along rows
|
1108
1080
|
GGML_API struct ggml_tensor * ggml_norm(
|
1109
1081
|
struct ggml_context * ctx,
|
@@ -1127,16 +1099,29 @@ extern "C" {
|
|
1127
1099
|
|
1128
1100
|
// group normalize along ne0*ne1*n_groups
|
1129
1101
|
// used in stable-diffusion
|
1130
|
-
// TODO: eps is hardcoded to 1e-6 for now
|
1131
1102
|
GGML_API struct ggml_tensor * ggml_group_norm(
|
1132
1103
|
struct ggml_context * ctx,
|
1133
1104
|
struct ggml_tensor * a,
|
1134
|
-
int n_groups
|
1105
|
+
int n_groups,
|
1106
|
+
float eps);
|
1135
1107
|
|
1136
1108
|
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
1137
1109
|
struct ggml_context * ctx,
|
1138
1110
|
struct ggml_tensor * a,
|
1139
|
-
int n_groups
|
1111
|
+
int n_groups,
|
1112
|
+
float eps);
|
1113
|
+
|
1114
|
+
// l2 normalize along rows
|
1115
|
+
// used in rwkv v7
|
1116
|
+
GGML_API struct ggml_tensor * ggml_l2_norm(
|
1117
|
+
struct ggml_context * ctx,
|
1118
|
+
struct ggml_tensor * a,
|
1119
|
+
float eps);
|
1120
|
+
|
1121
|
+
GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
|
1122
|
+
struct ggml_context * ctx,
|
1123
|
+
struct ggml_tensor * a,
|
1124
|
+
float eps);
|
1140
1125
|
|
1141
1126
|
// a - x
|
1142
1127
|
// b - dy
|
@@ -1161,13 +1146,11 @@ extern "C" {
|
|
1161
1146
|
enum ggml_prec prec);
|
1162
1147
|
|
1163
1148
|
// indirect matrix multiplication
|
1164
|
-
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1165
1149
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
1166
1150
|
struct ggml_context * ctx,
|
1167
1151
|
struct ggml_tensor * as,
|
1168
|
-
struct ggml_tensor *
|
1169
|
-
|
1170
|
-
struct ggml_tensor * b);
|
1152
|
+
struct ggml_tensor * b,
|
1153
|
+
struct ggml_tensor * ids);
|
1171
1154
|
|
1172
1155
|
// A: m columns, n rows,
|
1173
1156
|
// B: p columns, n rows,
|
@@ -1200,7 +1183,7 @@ extern "C" {
|
|
1200
1183
|
size_t nb1,
|
1201
1184
|
size_t nb2,
|
1202
1185
|
size_t nb3,
|
1203
|
-
size_t offset);
|
1186
|
+
size_t offset); // in bytes
|
1204
1187
|
|
1205
1188
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1206
1189
|
GGML_API struct ggml_tensor * ggml_set_inplace(
|
@@ -1210,19 +1193,19 @@ extern "C" {
|
|
1210
1193
|
size_t nb1,
|
1211
1194
|
size_t nb2,
|
1212
1195
|
size_t nb3,
|
1213
|
-
size_t offset);
|
1196
|
+
size_t offset); // in bytes
|
1214
1197
|
|
1215
1198
|
GGML_API struct ggml_tensor * ggml_set_1d(
|
1216
1199
|
struct ggml_context * ctx,
|
1217
1200
|
struct ggml_tensor * a,
|
1218
1201
|
struct ggml_tensor * b,
|
1219
|
-
size_t offset);
|
1202
|
+
size_t offset); // in bytes
|
1220
1203
|
|
1221
1204
|
GGML_API struct ggml_tensor * ggml_set_1d_inplace(
|
1222
1205
|
struct ggml_context * ctx,
|
1223
1206
|
struct ggml_tensor * a,
|
1224
1207
|
struct ggml_tensor * b,
|
1225
|
-
size_t offset);
|
1208
|
+
size_t offset); // in bytes
|
1226
1209
|
|
1227
1210
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1228
1211
|
GGML_API struct ggml_tensor * ggml_set_2d(
|
@@ -1230,7 +1213,7 @@ extern "C" {
|
|
1230
1213
|
struct ggml_tensor * a,
|
1231
1214
|
struct ggml_tensor * b,
|
1232
1215
|
size_t nb1,
|
1233
|
-
size_t offset);
|
1216
|
+
size_t offset); // in bytes
|
1234
1217
|
|
1235
1218
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1236
1219
|
GGML_API struct ggml_tensor * ggml_set_2d_inplace(
|
@@ -1238,7 +1221,7 @@ extern "C" {
|
|
1238
1221
|
struct ggml_tensor * a,
|
1239
1222
|
struct ggml_tensor * b,
|
1240
1223
|
size_t nb1,
|
1241
|
-
size_t offset);
|
1224
|
+
size_t offset); // in bytes
|
1242
1225
|
|
1243
1226
|
// a -> b, return view(b)
|
1244
1227
|
GGML_API struct ggml_tensor * ggml_cpy(
|
@@ -1373,14 +1356,14 @@ extern "C" {
|
|
1373
1356
|
// supports 3D: a->ne[2] == b->ne[1]
|
1374
1357
|
GGML_API struct ggml_tensor * ggml_get_rows(
|
1375
1358
|
struct ggml_context * ctx,
|
1376
|
-
struct ggml_tensor * a,
|
1377
|
-
struct ggml_tensor * b);
|
1359
|
+
struct ggml_tensor * a, // data
|
1360
|
+
struct ggml_tensor * b); // row indices
|
1378
1361
|
|
1379
1362
|
GGML_API struct ggml_tensor * ggml_get_rows_back(
|
1380
1363
|
struct ggml_context * ctx,
|
1381
|
-
struct ggml_tensor * a,
|
1382
|
-
struct ggml_tensor * b,
|
1383
|
-
struct ggml_tensor * c);
|
1364
|
+
struct ggml_tensor * a, // gradients of ggml_get_rows result
|
1365
|
+
struct ggml_tensor * b, // row indices
|
1366
|
+
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
1384
1367
|
|
1385
1368
|
GGML_API struct ggml_tensor * ggml_diag(
|
1386
1369
|
struct ggml_context * ctx,
|
@@ -1419,33 +1402,34 @@ extern "C" {
|
|
1419
1402
|
struct ggml_context * ctx,
|
1420
1403
|
struct ggml_tensor * a);
|
1421
1404
|
|
1422
|
-
// fused soft_max(a*scale + mask
|
1405
|
+
// fused soft_max(a*scale + mask*(ALiBi slope))
|
1423
1406
|
// mask is optional
|
1424
|
-
// pos is required when max_bias > 0.0f
|
1425
1407
|
// max_bias = 0.0f for no ALiBi
|
1426
1408
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
1427
1409
|
struct ggml_context * ctx,
|
1428
1410
|
struct ggml_tensor * a,
|
1429
1411
|
struct ggml_tensor * mask,
|
1430
|
-
struct ggml_tensor * pos,
|
1431
1412
|
float scale,
|
1432
1413
|
float max_bias);
|
1433
1414
|
|
1434
|
-
GGML_API struct ggml_tensor *
|
1415
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
|
1435
1416
|
struct ggml_context * ctx,
|
1436
1417
|
struct ggml_tensor * a,
|
1437
|
-
struct ggml_tensor * b
|
1418
|
+
struct ggml_tensor * b,
|
1419
|
+
float scale,
|
1420
|
+
float max_bias);
|
1438
1421
|
|
1439
1422
|
// in-place, returns view(a)
|
1440
|
-
GGML_API struct ggml_tensor *
|
1423
|
+
GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
|
1441
1424
|
struct ggml_context * ctx,
|
1442
1425
|
struct ggml_tensor * a,
|
1443
|
-
struct ggml_tensor * b
|
1426
|
+
struct ggml_tensor * b,
|
1427
|
+
float scale,
|
1428
|
+
float max_bias);
|
1444
1429
|
|
1445
1430
|
// rotary position embedding
|
1446
|
-
// if mode & 1
|
1447
|
-
// if mode &
|
1448
|
-
// if mode & 4 == 1, ChatGLM style
|
1431
|
+
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
|
1432
|
+
// if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
|
1449
1433
|
//
|
1450
1434
|
// b is an int32 vector with size a->ne[2], it contains the positions
|
1451
1435
|
GGML_API struct ggml_tensor * ggml_rope(
|
@@ -1453,8 +1437,7 @@ extern "C" {
|
|
1453
1437
|
struct ggml_tensor * a,
|
1454
1438
|
struct ggml_tensor * b,
|
1455
1439
|
int n_dims,
|
1456
|
-
int mode
|
1457
|
-
int n_ctx);
|
1440
|
+
int mode);
|
1458
1441
|
|
1459
1442
|
// in-place, returns view(a)
|
1460
1443
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
@@ -1462,18 +1445,34 @@ extern "C" {
|
|
1462
1445
|
struct ggml_tensor * a,
|
1463
1446
|
struct ggml_tensor * b,
|
1464
1447
|
int n_dims,
|
1465
|
-
int mode
|
1466
|
-
int n_ctx);
|
1448
|
+
int mode);
|
1467
1449
|
|
1468
1450
|
// custom RoPE
|
1469
|
-
|
1451
|
+
// c is freq factors (e.g. phi3-128k), (optional)
|
1452
|
+
GGML_API struct ggml_tensor * ggml_rope_ext(
|
1470
1453
|
struct ggml_context * ctx,
|
1471
1454
|
struct ggml_tensor * a,
|
1472
1455
|
struct ggml_tensor * b,
|
1456
|
+
struct ggml_tensor * c,
|
1473
1457
|
int n_dims,
|
1474
1458
|
int mode,
|
1475
|
-
int
|
1476
|
-
|
1459
|
+
int n_ctx_orig,
|
1460
|
+
float freq_base,
|
1461
|
+
float freq_scale,
|
1462
|
+
float ext_factor,
|
1463
|
+
float attn_factor,
|
1464
|
+
float beta_fast,
|
1465
|
+
float beta_slow);
|
1466
|
+
|
1467
|
+
GGML_API struct ggml_tensor * ggml_rope_multi(
|
1468
|
+
struct ggml_context * ctx,
|
1469
|
+
struct ggml_tensor * a,
|
1470
|
+
struct ggml_tensor * b,
|
1471
|
+
struct ggml_tensor * c,
|
1472
|
+
int n_dims,
|
1473
|
+
int sections[4],
|
1474
|
+
int mode,
|
1475
|
+
int n_ctx_orig,
|
1477
1476
|
float freq_base,
|
1478
1477
|
float freq_scale,
|
1479
1478
|
float ext_factor,
|
@@ -1482,14 +1481,14 @@ extern "C" {
|
|
1482
1481
|
float beta_slow);
|
1483
1482
|
|
1484
1483
|
// in-place, returns view(a)
|
1485
|
-
GGML_API struct ggml_tensor *
|
1484
|
+
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
|
1486
1485
|
struct ggml_context * ctx,
|
1487
1486
|
struct ggml_tensor * a,
|
1488
1487
|
struct ggml_tensor * b,
|
1488
|
+
struct ggml_tensor * c,
|
1489
1489
|
int n_dims,
|
1490
1490
|
int mode,
|
1491
|
-
int
|
1492
|
-
int n_orig_ctx,
|
1491
|
+
int n_ctx_orig,
|
1493
1492
|
float freq_base,
|
1494
1493
|
float freq_scale,
|
1495
1494
|
float ext_factor,
|
@@ -1497,47 +1496,73 @@ extern "C" {
|
|
1497
1496
|
float beta_fast,
|
1498
1497
|
float beta_slow);
|
1499
1498
|
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1499
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
1500
|
+
struct ggml_context * ctx,
|
1501
|
+
struct ggml_tensor * a,
|
1502
|
+
struct ggml_tensor * b,
|
1503
|
+
int n_dims,
|
1504
|
+
int mode,
|
1505
|
+
int n_ctx_orig,
|
1506
|
+
float freq_base,
|
1507
|
+
float freq_scale,
|
1508
|
+
float ext_factor,
|
1509
|
+
float attn_factor,
|
1510
|
+
float beta_fast,
|
1511
|
+
float beta_slow),
|
1512
|
+
"use ggml_rope_ext instead");
|
1503
1513
|
|
1504
|
-
|
1505
|
-
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
1514
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
1506
1515
|
struct ggml_context * ctx,
|
1507
1516
|
struct ggml_tensor * a,
|
1508
1517
|
struct ggml_tensor * b,
|
1509
1518
|
int n_dims,
|
1510
|
-
|
1511
|
-
|
1519
|
+
int mode,
|
1520
|
+
int n_ctx_orig,
|
1521
|
+
float freq_base,
|
1522
|
+
float freq_scale,
|
1523
|
+
float ext_factor,
|
1524
|
+
float attn_factor,
|
1525
|
+
float beta_fast,
|
1526
|
+
float beta_slow),
|
1527
|
+
"use ggml_rope_ext_inplace instead");
|
1528
|
+
|
1529
|
+
// compute correction dims for YaRN RoPE scaling
|
1530
|
+
GGML_API void ggml_rope_yarn_corr_dims(
|
1531
|
+
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
1512
1532
|
|
1513
1533
|
// rotary position embedding backward, i.e compute dx from dy
|
1514
1534
|
// a - dy
|
1515
|
-
GGML_API struct ggml_tensor *
|
1535
|
+
GGML_API struct ggml_tensor * ggml_rope_ext_back(
|
1516
1536
|
struct ggml_context * ctx,
|
1517
|
-
struct ggml_tensor * a,
|
1518
|
-
struct ggml_tensor * b,
|
1537
|
+
struct ggml_tensor * a, // gradients of ggml_rope result
|
1538
|
+
struct ggml_tensor * b, // positions
|
1539
|
+
struct ggml_tensor * c, // freq factors
|
1519
1540
|
int n_dims,
|
1520
1541
|
int mode,
|
1521
|
-
int
|
1522
|
-
int n_orig_ctx,
|
1542
|
+
int n_ctx_orig,
|
1523
1543
|
float freq_base,
|
1524
1544
|
float freq_scale,
|
1525
1545
|
float ext_factor,
|
1526
1546
|
float attn_factor,
|
1527
1547
|
float beta_fast,
|
1528
|
-
float beta_slow
|
1529
|
-
float xpos_base,
|
1530
|
-
bool xpos_down);
|
1548
|
+
float beta_slow);
|
1531
1549
|
|
1532
|
-
|
1533
|
-
// in-place, returns view(a)
|
1534
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
1550
|
+
GGML_API struct ggml_tensor * ggml_rope_multi_back(
|
1535
1551
|
struct ggml_context * ctx,
|
1536
1552
|
struct ggml_tensor * a,
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1553
|
+
struct ggml_tensor * b,
|
1554
|
+
struct ggml_tensor * c,
|
1555
|
+
int n_dims,
|
1556
|
+
int sections[4],
|
1557
|
+
int mode,
|
1558
|
+
int n_ctx_orig,
|
1559
|
+
float freq_base,
|
1560
|
+
float freq_scale,
|
1561
|
+
float ext_factor,
|
1562
|
+
float attn_factor,
|
1563
|
+
float beta_fast,
|
1564
|
+
float beta_slow);
|
1565
|
+
|
1541
1566
|
|
1542
1567
|
// clamp
|
1543
1568
|
// in-place, returns view(a)
|
@@ -1547,34 +1572,38 @@ extern "C" {
|
|
1547
1572
|
float min,
|
1548
1573
|
float max);
|
1549
1574
|
|
1575
|
+
// im2col
|
1576
|
+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
1550
1577
|
GGML_API struct ggml_tensor * ggml_im2col(
|
1551
1578
|
struct ggml_context * ctx,
|
1552
|
-
struct ggml_tensor * a,
|
1553
|
-
struct ggml_tensor * b,
|
1554
|
-
int
|
1555
|
-
int
|
1556
|
-
int
|
1557
|
-
int
|
1558
|
-
int
|
1559
|
-
int
|
1560
|
-
bool
|
1561
|
-
enum ggml_type
|
1562
|
-
|
1563
|
-
GGML_API struct ggml_tensor *
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1572
|
-
|
1579
|
+
struct ggml_tensor * a, // convolution kernel
|
1580
|
+
struct ggml_tensor * b, // data
|
1581
|
+
int s0, // stride dimension 0
|
1582
|
+
int s1, // stride dimension 1
|
1583
|
+
int p0, // padding dimension 0
|
1584
|
+
int p1, // padding dimension 1
|
1585
|
+
int d0, // dilation dimension 0
|
1586
|
+
int d1, // dilation dimension 1
|
1587
|
+
bool is_2D,
|
1588
|
+
enum ggml_type dst_type);
|
1589
|
+
|
1590
|
+
GGML_API struct ggml_tensor * ggml_im2col_back(
|
1591
|
+
struct ggml_context * ctx,
|
1592
|
+
struct ggml_tensor * a, // convolution kernel
|
1593
|
+
struct ggml_tensor * b, // gradient of im2col output
|
1594
|
+
int64_t * ne, // shape of im2col input
|
1595
|
+
int s0, // stride dimension 0
|
1596
|
+
int s1, // stride dimension 1
|
1597
|
+
int p0, // padding dimension 0
|
1598
|
+
int p1, // padding dimension 1
|
1599
|
+
int d0, // dilation dimension 0
|
1600
|
+
int d1, // dilation dimension 1
|
1601
|
+
bool is_2D);
|
1573
1602
|
|
1574
1603
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
1575
1604
|
struct ggml_context * ctx,
|
1576
|
-
struct ggml_tensor * a,
|
1577
|
-
struct ggml_tensor * b,
|
1605
|
+
struct ggml_tensor * a, // convolution kernel
|
1606
|
+
struct ggml_tensor * b, // data
|
1578
1607
|
int s0, // stride
|
1579
1608
|
int p0, // padding
|
1580
1609
|
int d0); // dilation
|
@@ -1583,30 +1612,46 @@ extern "C" {
|
|
1583
1612
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1584
1613
|
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
1585
1614
|
struct ggml_context * ctx,
|
1586
|
-
struct ggml_tensor * a,
|
1587
|
-
struct ggml_tensor * b,
|
1588
|
-
int s,
|
1589
|
-
int d);
|
1615
|
+
struct ggml_tensor * a, // convolution kernel
|
1616
|
+
struct ggml_tensor * b, // data
|
1617
|
+
int s, // stride
|
1618
|
+
int d); // dilation
|
1619
|
+
|
1620
|
+
// depthwise
|
1621
|
+
// TODO: this is very likely wrong for some cases! - needs more testing
|
1622
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw(
|
1623
|
+
struct ggml_context * ctx,
|
1624
|
+
struct ggml_tensor * a, // convolution kernel
|
1625
|
+
struct ggml_tensor * b, // data
|
1626
|
+
int s0, // stride
|
1627
|
+
int p0, // padding
|
1628
|
+
int d0); // dilation
|
1629
|
+
|
1630
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
|
1631
|
+
struct ggml_context * ctx,
|
1632
|
+
struct ggml_tensor * a, // convolution kernel
|
1633
|
+
struct ggml_tensor * b, // data
|
1634
|
+
int s0, // stride
|
1635
|
+
int d0); // dilation
|
1590
1636
|
|
1591
1637
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
1592
1638
|
struct ggml_context * ctx,
|
1593
|
-
struct ggml_tensor * a,
|
1594
|
-
struct ggml_tensor * b,
|
1595
|
-
int s0,
|
1596
|
-
int p0,
|
1597
|
-
int d0);
|
1639
|
+
struct ggml_tensor * a, // convolution kernel
|
1640
|
+
struct ggml_tensor * b, // data
|
1641
|
+
int s0, // stride
|
1642
|
+
int p0, // padding
|
1643
|
+
int d0); // dilation
|
1598
1644
|
|
1599
1645
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
1600
1646
|
struct ggml_context * ctx,
|
1601
|
-
struct ggml_tensor * a,
|
1602
|
-
struct ggml_tensor * b,
|
1603
|
-
int s0,
|
1604
|
-
int s1,
|
1605
|
-
int p0,
|
1606
|
-
int p1,
|
1607
|
-
int d0,
|
1608
|
-
int d1);
|
1609
|
-
|
1647
|
+
struct ggml_tensor * a, // convolution kernel
|
1648
|
+
struct ggml_tensor * b, // data
|
1649
|
+
int s0, // stride dimension 0
|
1650
|
+
int s1, // stride dimension 1
|
1651
|
+
int p0, // padding dimension 0
|
1652
|
+
int p1, // padding dimension 1
|
1653
|
+
int d0, // dilation dimension 0
|
1654
|
+
int d1); // dilation dimension 1
|
1610
1655
|
|
1611
1656
|
// kernel size is a->ne[0] x a->ne[1]
|
1612
1657
|
// stride is equal to kernel size
|
@@ -1634,6 +1679,34 @@ extern "C" {
|
|
1634
1679
|
struct ggml_tensor * a,
|
1635
1680
|
struct ggml_tensor * b);
|
1636
1681
|
|
1682
|
+
// depthwise (via im2col and mul_mat)
|
1683
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
|
1684
|
+
struct ggml_context * ctx,
|
1685
|
+
struct ggml_tensor * a, // convolution kernel
|
1686
|
+
struct ggml_tensor * b, // data
|
1687
|
+
int s0, // stride dimension 0
|
1688
|
+
int s1, // stride dimension 1
|
1689
|
+
int p0, // padding dimension 0
|
1690
|
+
int p1, // padding dimension 1
|
1691
|
+
int d0, // dilation dimension 0
|
1692
|
+
int d1); // dilation dimension 1
|
1693
|
+
|
1694
|
+
// Depthwise 2D convolution
|
1695
|
+
// may be faster than ggml_conv_2d_dw, but not available in all backends
|
1696
|
+
// a: KW KH 1 C convolution kernel
|
1697
|
+
// b: W H C N input data
|
1698
|
+
// res: W_out H_out C N
|
1699
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
|
1700
|
+
struct ggml_context * ctx,
|
1701
|
+
struct ggml_tensor * a,
|
1702
|
+
struct ggml_tensor * b,
|
1703
|
+
int stride0,
|
1704
|
+
int stride1,
|
1705
|
+
int pad0,
|
1706
|
+
int pad1,
|
1707
|
+
int dilation0,
|
1708
|
+
int dilation1);
|
1709
|
+
|
1637
1710
|
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
1638
1711
|
struct ggml_context * ctx,
|
1639
1712
|
struct ggml_tensor * a,
|
@@ -1667,12 +1740,41 @@ extern "C" {
|
|
1667
1740
|
float p0,
|
1668
1741
|
float p1);
|
1669
1742
|
|
1670
|
-
|
1671
|
-
|
1743
|
+
GGML_API struct ggml_tensor * ggml_pool_2d_back(
|
1744
|
+
struct ggml_context * ctx,
|
1745
|
+
struct ggml_tensor * a,
|
1746
|
+
struct ggml_tensor * af, // "a"/input used in forward pass
|
1747
|
+
enum ggml_op_pool op,
|
1748
|
+
int k0,
|
1749
|
+
int k1,
|
1750
|
+
int s0,
|
1751
|
+
int s1,
|
1752
|
+
float p0,
|
1753
|
+
float p1);
|
1754
|
+
|
1755
|
+
enum ggml_scale_mode {
|
1756
|
+
GGML_SCALE_MODE_NEAREST = 0,
|
1757
|
+
GGML_SCALE_MODE_BILINEAR = 1,
|
1758
|
+
};
|
1759
|
+
|
1760
|
+
// interpolate
|
1761
|
+
// multiplies ne0 and ne1 by scale factor
|
1672
1762
|
GGML_API struct ggml_tensor * ggml_upscale(
|
1673
1763
|
struct ggml_context * ctx,
|
1674
1764
|
struct ggml_tensor * a,
|
1675
|
-
int scale_factor
|
1765
|
+
int scale_factor,
|
1766
|
+
enum ggml_scale_mode mode);
|
1767
|
+
|
1768
|
+
// interpolate
|
1769
|
+
// interpolate scale to specified dimensions
|
1770
|
+
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1771
|
+
struct ggml_context * ctx,
|
1772
|
+
struct ggml_tensor * a,
|
1773
|
+
int ne0,
|
1774
|
+
int ne1,
|
1775
|
+
int ne2,
|
1776
|
+
int ne3,
|
1777
|
+
enum ggml_scale_mode mode);
|
1676
1778
|
|
1677
1779
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1678
1780
|
GGML_API struct ggml_tensor * ggml_pad(
|
@@ -1683,6 +1785,13 @@ extern "C" {
|
|
1683
1785
|
int p2,
|
1684
1786
|
int p3);
|
1685
1787
|
|
1788
|
+
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
1789
|
+
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
1790
|
+
struct ggml_context * ctx,
|
1791
|
+
struct ggml_tensor * a,
|
1792
|
+
int p0,
|
1793
|
+
int p1);
|
1794
|
+
|
1686
1795
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
1687
1796
|
// timesteps: [N,]
|
1688
1797
|
// return: [N, dim]
|
@@ -1715,13 +1824,31 @@ extern "C" {
|
|
1715
1824
|
struct ggml_tensor * a,
|
1716
1825
|
int k);
|
1717
1826
|
|
1718
|
-
|
1827
|
+
#define GGML_KQ_MASK_PAD 64
|
1828
|
+
|
1829
|
+
// q: [n_embd_k, n_batch, n_head, 1]
|
1830
|
+
// k: [n_embd_k, n_kv, n_head_kv, 1]
|
1831
|
+
// v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
|
1832
|
+
// mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
1833
|
+
// res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
|
1834
|
+
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
1719
1835
|
struct ggml_context * ctx,
|
1720
1836
|
struct ggml_tensor * q,
|
1721
1837
|
struct ggml_tensor * k,
|
1722
1838
|
struct ggml_tensor * v,
|
1723
|
-
|
1839
|
+
struct ggml_tensor * mask,
|
1840
|
+
float scale,
|
1841
|
+
float max_bias,
|
1842
|
+
float logit_softcap);
|
1843
|
+
|
1844
|
+
GGML_API void ggml_flash_attn_ext_set_prec(
|
1845
|
+
struct ggml_tensor * a,
|
1846
|
+
enum ggml_prec prec);
|
1847
|
+
|
1848
|
+
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
1849
|
+
const struct ggml_tensor * a);
|
1724
1850
|
|
1851
|
+
// TODO: needs to be adapted to ggml_flash_attn_ext
|
1725
1852
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1726
1853
|
struct ggml_context * ctx,
|
1727
1854
|
struct ggml_tensor * q,
|
@@ -1730,20 +1857,10 @@ extern "C" {
|
|
1730
1857
|
struct ggml_tensor * d,
|
1731
1858
|
bool masked);
|
1732
1859
|
|
1733
|
-
GGML_API struct ggml_tensor * ggml_flash_ff(
|
1734
|
-
struct ggml_context * ctx,
|
1735
|
-
struct ggml_tensor * a,
|
1736
|
-
struct ggml_tensor * b0,
|
1737
|
-
struct ggml_tensor * b1,
|
1738
|
-
struct ggml_tensor * c0,
|
1739
|
-
struct ggml_tensor * c1);
|
1740
|
-
|
1741
1860
|
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
1742
1861
|
struct ggml_context * ctx,
|
1743
|
-
struct ggml_tensor *
|
1744
|
-
struct ggml_tensor *
|
1745
|
-
struct ggml_tensor * c,
|
1746
|
-
struct ggml_tensor * sq);
|
1862
|
+
struct ggml_tensor * sx,
|
1863
|
+
struct ggml_tensor * c);
|
1747
1864
|
|
1748
1865
|
GGML_API struct ggml_tensor * ggml_ssm_scan(
|
1749
1866
|
struct ggml_context * ctx,
|
@@ -1752,8 +1869,7 @@ extern "C" {
|
|
1752
1869
|
struct ggml_tensor * dt,
|
1753
1870
|
struct ggml_tensor * A,
|
1754
1871
|
struct ggml_tensor * B,
|
1755
|
-
struct ggml_tensor * C
|
1756
|
-
struct ggml_tensor * sq);
|
1872
|
+
struct ggml_tensor * C);
|
1757
1873
|
|
1758
1874
|
// partition into non-overlapping windows with padding if needed
|
1759
1875
|
// example:
|
@@ -1805,90 +1921,42 @@ extern "C" {
|
|
1805
1921
|
struct ggml_tensor * pw,
|
1806
1922
|
struct ggml_tensor * ph);
|
1807
1923
|
|
1808
|
-
|
1924
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
|
1925
|
+
struct ggml_context * ctx,
|
1926
|
+
struct ggml_tensor * k,
|
1927
|
+
struct ggml_tensor * v,
|
1928
|
+
struct ggml_tensor * r,
|
1929
|
+
struct ggml_tensor * tf,
|
1930
|
+
struct ggml_tensor * td,
|
1931
|
+
struct ggml_tensor * state);
|
1932
|
+
|
1933
|
+
GGML_API struct ggml_tensor * ggml_gated_linear_attn(
|
1934
|
+
struct ggml_context * ctx,
|
1935
|
+
struct ggml_tensor * k,
|
1936
|
+
struct ggml_tensor * v,
|
1937
|
+
struct ggml_tensor * q,
|
1938
|
+
struct ggml_tensor * g,
|
1939
|
+
struct ggml_tensor * state,
|
1940
|
+
float scale);
|
1941
|
+
|
1942
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
|
1943
|
+
struct ggml_context * ctx,
|
1944
|
+
struct ggml_tensor * r,
|
1945
|
+
struct ggml_tensor * w,
|
1946
|
+
struct ggml_tensor * k,
|
1947
|
+
struct ggml_tensor * v,
|
1948
|
+
struct ggml_tensor * a,
|
1949
|
+
struct ggml_tensor * b,
|
1950
|
+
struct ggml_tensor * state);
|
1809
1951
|
|
1810
|
-
|
1811
|
-
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1812
|
-
|
1813
|
-
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1814
|
-
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1815
|
-
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1816
|
-
|
1817
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
1818
|
-
struct ggml_context * ctx,
|
1819
|
-
struct ggml_tensor * a,
|
1820
|
-
ggml_unary_op_f32_t fun),
|
1821
|
-
"use ggml_map_custom1 instead");
|
1822
|
-
|
1823
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1824
|
-
struct ggml_context * ctx,
|
1825
|
-
struct ggml_tensor * a,
|
1826
|
-
ggml_unary_op_f32_t fun),
|
1827
|
-
"use ggml_map_custom1_inplace instead");
|
1828
|
-
|
1829
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1830
|
-
struct ggml_context * ctx,
|
1831
|
-
struct ggml_tensor * a,
|
1832
|
-
struct ggml_tensor * b,
|
1833
|
-
ggml_binary_op_f32_t fun),
|
1834
|
-
"use ggml_map_custom2 instead");
|
1835
|
-
|
1836
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1837
|
-
struct ggml_context * ctx,
|
1838
|
-
struct ggml_tensor * a,
|
1839
|
-
struct ggml_tensor * b,
|
1840
|
-
ggml_binary_op_f32_t fun),
|
1841
|
-
"use ggml_map_custom2_inplace instead");
|
1842
|
-
|
1843
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1844
|
-
struct ggml_context * ctx,
|
1845
|
-
struct ggml_tensor * a,
|
1846
|
-
ggml_custom1_op_f32_t fun),
|
1847
|
-
"use ggml_map_custom1 instead");
|
1848
|
-
|
1849
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1850
|
-
struct ggml_context * ctx,
|
1851
|
-
struct ggml_tensor * a,
|
1852
|
-
ggml_custom1_op_f32_t fun),
|
1853
|
-
"use ggml_map_custom1_inplace instead");
|
1854
|
-
|
1855
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1856
|
-
struct ggml_context * ctx,
|
1857
|
-
struct ggml_tensor * a,
|
1858
|
-
struct ggml_tensor * b,
|
1859
|
-
ggml_custom2_op_f32_t fun),
|
1860
|
-
"use ggml_map_custom2 instead");
|
1861
|
-
|
1862
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1863
|
-
struct ggml_context * ctx,
|
1864
|
-
struct ggml_tensor * a,
|
1865
|
-
struct ggml_tensor * b,
|
1866
|
-
ggml_custom2_op_f32_t fun),
|
1867
|
-
"use ggml_map_custom2_inplace instead");
|
1868
|
-
|
1869
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1870
|
-
struct ggml_context * ctx,
|
1871
|
-
struct ggml_tensor * a,
|
1872
|
-
struct ggml_tensor * b,
|
1873
|
-
struct ggml_tensor * c,
|
1874
|
-
ggml_custom3_op_f32_t fun),
|
1875
|
-
"use ggml_map_custom3 instead");
|
1876
|
-
|
1877
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1878
|
-
struct ggml_context * ctx,
|
1879
|
-
struct ggml_tensor * a,
|
1880
|
-
struct ggml_tensor * b,
|
1881
|
-
struct ggml_tensor * c,
|
1882
|
-
ggml_custom3_op_f32_t fun),
|
1883
|
-
"use ggml_map_custom3_inplace instead");
|
1884
|
-
|
1885
|
-
// custom operators v2
|
1952
|
+
// custom operators
|
1886
1953
|
|
1887
1954
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
1888
1955
|
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
1889
1956
|
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
1890
1957
|
|
1891
|
-
|
1958
|
+
#define GGML_N_TASKS_MAX (-1)
|
1959
|
+
// n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
|
1892
1960
|
|
1893
1961
|
GGML_API struct ggml_tensor * ggml_map_custom1(
|
1894
1962
|
struct ggml_context * ctx,
|
@@ -1938,52 +2006,85 @@ extern "C" {
|
|
1938
2006
|
int n_tasks,
|
1939
2007
|
void * userdata);
|
1940
2008
|
|
2009
|
+
typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
|
2010
|
+
|
2011
|
+
GGML_API struct ggml_tensor * ggml_custom_4d(
|
2012
|
+
struct ggml_context * ctx,
|
2013
|
+
enum ggml_type type,
|
2014
|
+
int64_t ne0,
|
2015
|
+
int64_t ne1,
|
2016
|
+
int64_t ne2,
|
2017
|
+
int64_t ne3,
|
2018
|
+
struct ggml_tensor ** args,
|
2019
|
+
int n_args,
|
2020
|
+
ggml_custom_op_t fun,
|
2021
|
+
int n_tasks,
|
2022
|
+
void * userdata);
|
2023
|
+
|
2024
|
+
GGML_API struct ggml_tensor * ggml_custom_inplace(
|
2025
|
+
struct ggml_context * ctx,
|
2026
|
+
struct ggml_tensor * a,
|
2027
|
+
struct ggml_tensor ** args,
|
2028
|
+
int n_args,
|
2029
|
+
ggml_custom_op_t fun,
|
2030
|
+
int n_tasks,
|
2031
|
+
void * userdata);
|
2032
|
+
|
1941
2033
|
// loss function
|
1942
2034
|
|
1943
2035
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
1944
|
-
struct ggml_context
|
1945
|
-
struct ggml_tensor
|
1946
|
-
struct ggml_tensor
|
2036
|
+
struct ggml_context * ctx,
|
2037
|
+
struct ggml_tensor * a, // logits
|
2038
|
+
struct ggml_tensor * b); // labels
|
1947
2039
|
|
1948
2040
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
1949
|
-
struct ggml_context
|
1950
|
-
struct ggml_tensor
|
1951
|
-
struct ggml_tensor
|
1952
|
-
struct ggml_tensor
|
2041
|
+
struct ggml_context * ctx,
|
2042
|
+
struct ggml_tensor * a, // logits
|
2043
|
+
struct ggml_tensor * b, // labels
|
2044
|
+
struct ggml_tensor * c); // gradients of cross_entropy_loss result
|
2045
|
+
|
2046
|
+
// AdamW optimizer step
|
2047
|
+
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
|
2048
|
+
// PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
|
2049
|
+
GGML_API struct ggml_tensor * ggml_opt_step_adamw(
|
2050
|
+
struct ggml_context * ctx,
|
2051
|
+
struct ggml_tensor * a,
|
2052
|
+
struct ggml_tensor * grad,
|
2053
|
+
struct ggml_tensor * m,
|
2054
|
+
struct ggml_tensor * v,
|
2055
|
+
struct ggml_tensor * adamw_params); // parameters such a the learning rate
|
1953
2056
|
|
1954
2057
|
//
|
1955
2058
|
// automatic differentiation
|
1956
2059
|
//
|
1957
2060
|
|
1958
|
-
GGML_API void
|
1959
|
-
|
1960
|
-
|
2061
|
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
2062
|
+
GGML_API void ggml_build_backward_expand(
|
2063
|
+
struct ggml_context * ctx, // context for gradient computation
|
2064
|
+
struct ggml_cgraph * cgraph,
|
2065
|
+
struct ggml_tensor ** grad_accs);
|
1961
2066
|
|
2067
|
+
// graph allocation in a context
|
2068
|
+
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2069
|
+
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
2070
|
+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
|
2071
|
+
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
2072
|
+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
2073
|
+
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
1962
2074
|
|
1963
|
-
GGML_API
|
1964
|
-
GGML_API
|
2075
|
+
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
|
2076
|
+
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
|
2077
|
+
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
|
2078
|
+
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
|
1965
2079
|
|
1966
|
-
|
1967
|
-
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
1968
|
-
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
|
1969
|
-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
1970
|
-
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
|
1971
|
-
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
1972
|
-
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
1973
|
-
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
2080
|
+
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
1974
2081
|
|
1975
2082
|
GGML_API size_t ggml_graph_overhead(void);
|
1976
2083
|
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
1977
2084
|
|
1978
|
-
|
1979
|
-
|
1980
|
-
GGML_API struct
|
1981
|
-
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
1982
|
-
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
1983
|
-
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
1984
|
-
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
1985
|
-
|
1986
|
-
GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
|
2085
|
+
GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name);
|
2086
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
2087
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
1987
2088
|
|
1988
2089
|
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
1989
2090
|
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
@@ -1994,197 +2095,14 @@ extern "C" {
|
|
1994
2095
|
// dump the graph into a file using the dot format
|
1995
2096
|
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
1996
2097
|
|
1997
|
-
//
|
1998
|
-
// gb_tmp will contain original backward graph with rewritten backward process nodes,
|
1999
|
-
// but without the second forward pass nodes.
|
2000
|
-
GGML_API void ggml_build_backward_gradient_checkpointing(
|
2001
|
-
struct ggml_context * ctx,
|
2002
|
-
struct ggml_cgraph * gf,
|
2003
|
-
struct ggml_cgraph * gb,
|
2004
|
-
struct ggml_cgraph * gb_tmp,
|
2005
|
-
struct ggml_tensor * * checkpoints,
|
2006
|
-
int n_checkpoints);
|
2007
|
-
//
|
2008
|
-
// optimization
|
2009
|
-
//
|
2010
|
-
|
2011
|
-
// optimization methods
|
2012
|
-
enum ggml_opt_type {
|
2013
|
-
GGML_OPT_TYPE_ADAM,
|
2014
|
-
GGML_OPT_TYPE_LBFGS,
|
2015
|
-
};
|
2016
|
-
|
2017
|
-
// linesearch methods
|
2018
|
-
enum ggml_linesearch {
|
2019
|
-
GGML_LINESEARCH_DEFAULT = 1,
|
2020
|
-
|
2021
|
-
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
2022
|
-
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
2023
|
-
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
2024
|
-
};
|
2025
|
-
|
2026
|
-
// optimization return values
|
2027
|
-
enum ggml_opt_result {
|
2028
|
-
GGML_OPT_RESULT_OK = 0,
|
2029
|
-
GGML_OPT_RESULT_DID_NOT_CONVERGE,
|
2030
|
-
GGML_OPT_RESULT_NO_CONTEXT,
|
2031
|
-
GGML_OPT_RESULT_INVALID_WOLFE,
|
2032
|
-
GGML_OPT_RESULT_FAIL,
|
2033
|
-
GGML_OPT_RESULT_CANCEL,
|
2034
|
-
|
2035
|
-
GGML_LINESEARCH_FAIL = -128,
|
2036
|
-
GGML_LINESEARCH_MINIMUM_STEP,
|
2037
|
-
GGML_LINESEARCH_MAXIMUM_STEP,
|
2038
|
-
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
2039
|
-
GGML_LINESEARCH_INVALID_PARAMETERS,
|
2040
|
-
};
|
2041
|
-
|
2042
|
-
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
2098
|
+
// TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
|
2043
2099
|
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
2044
2100
|
|
2045
|
-
//
|
2046
|
-
//
|
2047
|
-
|
2048
|
-
//
|
2049
|
-
struct ggml_opt_params {
|
2050
|
-
enum ggml_opt_type type;
|
2051
|
-
|
2052
|
-
size_t graph_size;
|
2053
|
-
|
2054
|
-
int n_threads;
|
2055
|
-
|
2056
|
-
// delta-based convergence test
|
2057
|
-
//
|
2058
|
-
// if past == 0 - disabled
|
2059
|
-
// if past > 0:
|
2060
|
-
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
2061
|
-
//
|
2062
|
-
int past;
|
2063
|
-
float delta;
|
2064
|
-
|
2065
|
-
// maximum number of iterations without improvement
|
2066
|
-
//
|
2067
|
-
// if 0 - disabled
|
2068
|
-
// if > 0:
|
2069
|
-
// assume convergence if no cost improvement in this number of iterations
|
2070
|
-
//
|
2071
|
-
int max_no_improvement;
|
2072
|
-
|
2073
|
-
bool print_forward_graph;
|
2074
|
-
bool print_backward_graph;
|
2075
|
-
|
2076
|
-
int n_gradient_accumulation;
|
2077
|
-
|
2078
|
-
// ADAM parameters
|
2079
|
-
struct {
|
2080
|
-
int n_iter;
|
2081
|
-
|
2082
|
-
float sched; // schedule multiplier (fixed, decay or warmup)
|
2083
|
-
float decay; // weight decay for AdamW, use 0.0f to disable
|
2084
|
-
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
|
2085
|
-
float alpha; // learning rate
|
2086
|
-
float beta1;
|
2087
|
-
float beta2;
|
2088
|
-
float eps; // epsilon for numerical stability
|
2089
|
-
float eps_f; // epsilon for convergence test
|
2090
|
-
float eps_g; // epsilon for convergence test
|
2091
|
-
float gclip; // gradient clipping
|
2092
|
-
} adam;
|
2093
|
-
|
2094
|
-
// LBFGS parameters
|
2095
|
-
struct {
|
2096
|
-
int m; // number of corrections to approximate the inv. Hessian
|
2097
|
-
int n_iter;
|
2098
|
-
int max_linesearch;
|
2099
|
-
|
2100
|
-
float eps; // convergence tolerance
|
2101
|
-
float ftol; // line search tolerance
|
2102
|
-
float wolfe;
|
2103
|
-
float min_step;
|
2104
|
-
float max_step;
|
2105
|
-
|
2106
|
-
enum ggml_linesearch linesearch;
|
2107
|
-
} lbfgs;
|
2108
|
-
};
|
2109
|
-
|
2110
|
-
struct ggml_opt_context {
|
2111
|
-
struct ggml_context * ctx;
|
2112
|
-
struct ggml_opt_params params;
|
2113
|
-
|
2114
|
-
int iter;
|
2115
|
-
int64_t nx; // number of parameter elements
|
2116
|
-
|
2117
|
-
bool just_initialized;
|
2118
|
-
|
2119
|
-
float loss_before;
|
2120
|
-
float loss_after;
|
2121
|
-
|
2122
|
-
struct {
|
2123
|
-
struct ggml_tensor * g; // current gradient
|
2124
|
-
struct ggml_tensor * m; // first moment
|
2125
|
-
struct ggml_tensor * v; // second moment
|
2126
|
-
struct ggml_tensor * pf; // past function values
|
2127
|
-
float fx_best;
|
2128
|
-
float fx_prev;
|
2129
|
-
int n_no_improvement;
|
2130
|
-
} adam;
|
2131
|
-
|
2132
|
-
struct {
|
2133
|
-
struct ggml_tensor * x; // current parameters
|
2134
|
-
struct ggml_tensor * xp; // previous parameters
|
2135
|
-
struct ggml_tensor * g; // current gradient
|
2136
|
-
struct ggml_tensor * gp; // previous gradient
|
2137
|
-
struct ggml_tensor * d; // search direction
|
2138
|
-
struct ggml_tensor * pf; // past function values
|
2139
|
-
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
2140
|
-
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
2141
|
-
struct ggml_tensor * lms; // the L-BFGS memory s
|
2142
|
-
struct ggml_tensor * lmy; // the L-BFGS memory y
|
2143
|
-
float fx_best;
|
2144
|
-
float step;
|
2145
|
-
int j;
|
2146
|
-
int k;
|
2147
|
-
int end;
|
2148
|
-
int n_no_improvement;
|
2149
|
-
} lbfgs;
|
2150
|
-
};
|
2151
|
-
|
2152
|
-
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
2153
|
-
|
2154
|
-
// optimize the function defined by the tensor f
|
2155
|
-
GGML_API enum ggml_opt_result ggml_opt(
|
2156
|
-
struct ggml_context * ctx,
|
2157
|
-
struct ggml_opt_params params,
|
2158
|
-
struct ggml_tensor * f);
|
2159
|
-
|
2160
|
-
// initialize optimizer context
|
2161
|
-
GGML_API void ggml_opt_init(
|
2162
|
-
struct ggml_context * ctx,
|
2163
|
-
struct ggml_opt_context * opt,
|
2164
|
-
struct ggml_opt_params params,
|
2165
|
-
int64_t nx);
|
2166
|
-
|
2167
|
-
// continue optimizing the function defined by the tensor f
|
2168
|
-
GGML_API enum ggml_opt_result ggml_opt_resume(
|
2169
|
-
struct ggml_context * ctx,
|
2170
|
-
struct ggml_opt_context * opt,
|
2171
|
-
struct ggml_tensor * f);
|
2172
|
-
|
2173
|
-
// continue optimizing the function defined by the tensor f
|
2174
|
-
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
2175
|
-
struct ggml_context * ctx,
|
2176
|
-
struct ggml_opt_context * opt,
|
2177
|
-
struct ggml_tensor * f,
|
2178
|
-
struct ggml_cgraph * gf,
|
2179
|
-
struct ggml_cgraph * gb,
|
2180
|
-
ggml_opt_callback callback,
|
2181
|
-
void * callback_data);
|
2101
|
+
// Set callback for all future logging events.
|
2102
|
+
// If this is not called, or NULL is supplied, everything is output on stderr.
|
2103
|
+
GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
|
2182
2104
|
|
2183
|
-
|
2184
|
-
// tensor flags
|
2185
|
-
//
|
2186
|
-
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
2187
|
-
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
2105
|
+
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
2188
2106
|
|
2189
2107
|
//
|
2190
2108
|
// quantization
|
@@ -2215,187 +2133,69 @@ extern "C" {
|
|
2215
2133
|
int64_t n_per_row,
|
2216
2134
|
const float * imatrix);
|
2217
2135
|
|
2218
|
-
|
2219
|
-
//
|
2220
|
-
|
2136
|
+
#ifdef __cplusplus
|
2137
|
+
// restrict not standard in C++
|
2138
|
+
# if defined(__GNUC__)
|
2139
|
+
# define GGML_RESTRICT __restrict__
|
2140
|
+
# elif defined(__clang__)
|
2141
|
+
# define GGML_RESTRICT __restrict
|
2142
|
+
# elif defined(_MSC_VER)
|
2143
|
+
# define GGML_RESTRICT __restrict
|
2144
|
+
# else
|
2145
|
+
# define GGML_RESTRICT
|
2146
|
+
# endif
|
2147
|
+
#else
|
2148
|
+
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
|
2149
|
+
# define GGML_RESTRICT __restrict
|
2150
|
+
# else
|
2151
|
+
# define GGML_RESTRICT restrict
|
2152
|
+
# endif
|
2153
|
+
#endif
|
2154
|
+
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
2155
|
+
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
2221
2156
|
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2225
|
-
|
2226
|
-
|
2227
|
-
|
2228
|
-
|
2229
|
-
|
2230
|
-
GGUF_TYPE_BOOL = 7,
|
2231
|
-
GGUF_TYPE_STRING = 8,
|
2232
|
-
GGUF_TYPE_ARRAY = 9,
|
2233
|
-
GGUF_TYPE_UINT64 = 10,
|
2234
|
-
GGUF_TYPE_INT64 = 11,
|
2235
|
-
GGUF_TYPE_FLOAT64 = 12,
|
2236
|
-
GGUF_TYPE_COUNT, // marks the end of the enum
|
2157
|
+
struct ggml_type_traits {
|
2158
|
+
const char * type_name;
|
2159
|
+
int64_t blck_size;
|
2160
|
+
int64_t blck_size_interleave; // interleave elements in blocks
|
2161
|
+
size_t type_size;
|
2162
|
+
bool is_quantized;
|
2163
|
+
ggml_to_float_t to_float;
|
2164
|
+
ggml_from_float_t from_float_ref;
|
2237
2165
|
};
|
2238
2166
|
|
2239
|
-
struct
|
2167
|
+
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
2240
2168
|
|
2241
|
-
|
2242
|
-
|
2169
|
+
// ggml threadpool
|
2170
|
+
// TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
|
2171
|
+
// the goal should be to create an API that other backends can use move everything to the ggml base
|
2243
2172
|
|
2244
|
-
|
2245
|
-
|
2173
|
+
// scheduling priorities
|
2174
|
+
enum ggml_sched_priority {
|
2175
|
+
GGML_SCHED_PRIO_NORMAL,
|
2176
|
+
GGML_SCHED_PRIO_MEDIUM,
|
2177
|
+
GGML_SCHED_PRIO_HIGH,
|
2178
|
+
GGML_SCHED_PRIO_REALTIME
|
2246
2179
|
};
|
2247
2180
|
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
|
2254
|
-
|
2255
|
-
|
2256
|
-
|
2257
|
-
|
2258
|
-
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
2259
|
-
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
2260
|
-
|
2261
|
-
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
2262
|
-
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
2263
|
-
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
|
2264
|
-
|
2265
|
-
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
|
2266
|
-
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
|
2267
|
-
|
2268
|
-
// will abort if the wrong type is used for the key
|
2269
|
-
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
|
2270
|
-
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
|
2271
|
-
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
|
2272
|
-
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
|
2273
|
-
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
|
2274
|
-
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
|
2275
|
-
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
|
2276
|
-
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
|
2277
|
-
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
|
2278
|
-
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
|
2279
|
-
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
|
2280
|
-
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
|
2281
|
-
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
|
2282
|
-
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
|
2283
|
-
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
2284
|
-
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
2285
|
-
|
2286
|
-
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
2287
|
-
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
2288
|
-
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
2289
|
-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
2290
|
-
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
2291
|
-
|
2292
|
-
// overrides existing values or adds a new one
|
2293
|
-
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
2294
|
-
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
2295
|
-
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
2296
|
-
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
2297
|
-
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
2298
|
-
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
2299
|
-
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
2300
|
-
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
2301
|
-
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
2302
|
-
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
2303
|
-
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
2304
|
-
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
2305
|
-
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
2306
|
-
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
|
2307
|
-
|
2308
|
-
// set or add KV pairs from another context
|
2309
|
-
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
2310
|
-
|
2311
|
-
// manage tensor info
|
2312
|
-
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
2313
|
-
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
2314
|
-
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
2315
|
-
|
2316
|
-
// writing gguf files can be done in 2 ways:
|
2317
|
-
//
|
2318
|
-
// - write the entire gguf_context to a binary file in a single pass:
|
2319
|
-
//
|
2320
|
-
// gguf_write_to_file(ctx, fname);
|
2321
|
-
//
|
2322
|
-
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
2323
|
-
//
|
2324
|
-
// FILE * f = fopen(fname, "wb");
|
2325
|
-
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
2326
|
-
// fwrite(f, ...);
|
2327
|
-
// void * data = gguf_meta_get_meta_data(ctx);
|
2328
|
-
// fseek(f, 0, SEEK_SET);
|
2329
|
-
// fwrite(f, data, gguf_get_meta_size(ctx));
|
2330
|
-
// free(data);
|
2331
|
-
// fclose(f);
|
2332
|
-
//
|
2333
|
-
|
2334
|
-
// write the entire context to a binary file
|
2335
|
-
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
2336
|
-
|
2337
|
-
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
2338
|
-
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
2339
|
-
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
2340
|
-
|
2341
|
-
//
|
2342
|
-
// system info
|
2343
|
-
//
|
2181
|
+
// threadpool params
|
2182
|
+
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
2183
|
+
struct ggml_threadpool_params {
|
2184
|
+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
2185
|
+
int n_threads; // number of threads
|
2186
|
+
enum ggml_sched_priority prio; // thread priority
|
2187
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
2188
|
+
bool strict_cpu; // strict cpu placement
|
2189
|
+
bool paused; // start in paused state
|
2190
|
+
};
|
2344
2191
|
|
2345
|
-
|
2346
|
-
GGML_API int ggml_cpu_has_avx_vnni (void);
|
2347
|
-
GGML_API int ggml_cpu_has_avx2 (void);
|
2348
|
-
GGML_API int ggml_cpu_has_avx512 (void);
|
2349
|
-
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
2350
|
-
GGML_API int ggml_cpu_has_avx512_vnni(void);
|
2351
|
-
GGML_API int ggml_cpu_has_fma (void);
|
2352
|
-
GGML_API int ggml_cpu_has_neon (void);
|
2353
|
-
GGML_API int ggml_cpu_has_arm_fma (void);
|
2354
|
-
GGML_API int ggml_cpu_has_metal (void);
|
2355
|
-
GGML_API int ggml_cpu_has_f16c (void);
|
2356
|
-
GGML_API int ggml_cpu_has_fp16_va (void);
|
2357
|
-
GGML_API int ggml_cpu_has_wasm_simd (void);
|
2358
|
-
GGML_API int ggml_cpu_has_blas (void);
|
2359
|
-
GGML_API int ggml_cpu_has_cuda (void);
|
2360
|
-
GGML_API int ggml_cpu_has_clblast (void);
|
2361
|
-
GGML_API int ggml_cpu_has_vulkan (void);
|
2362
|
-
GGML_API int ggml_cpu_has_kompute (void);
|
2363
|
-
GGML_API int ggml_cpu_has_gpublas (void);
|
2364
|
-
GGML_API int ggml_cpu_has_sse3 (void);
|
2365
|
-
GGML_API int ggml_cpu_has_ssse3 (void);
|
2366
|
-
GGML_API int ggml_cpu_has_sycl (void);
|
2367
|
-
GGML_API int ggml_cpu_has_vsx (void);
|
2368
|
-
GGML_API int ggml_cpu_has_matmul_int8(void);
|
2192
|
+
struct ggml_threadpool; // forward declaration, see ggml.c
|
2369
2193
|
|
2370
|
-
|
2371
|
-
// Internal types and functions exposed for tests and benchmarks
|
2372
|
-
//
|
2194
|
+
typedef struct ggml_threadpool * ggml_threadpool_t;
|
2373
2195
|
|
2374
|
-
|
2375
|
-
|
2376
|
-
|
2377
|
-
#else
|
2378
|
-
#define GGML_RESTRICT restrict
|
2379
|
-
#endif
|
2380
|
-
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
2381
|
-
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
2382
|
-
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
2383
|
-
const void * GGML_RESTRICT y, size_t by, int nrc);
|
2384
|
-
|
2385
|
-
typedef struct {
|
2386
|
-
const char * type_name;
|
2387
|
-
int blck_size;
|
2388
|
-
size_t type_size;
|
2389
|
-
bool is_quantized;
|
2390
|
-
ggml_to_float_t to_float;
|
2391
|
-
ggml_from_float_t from_float;
|
2392
|
-
ggml_from_float_t from_float_reference;
|
2393
|
-
ggml_vec_dot_t vec_dot;
|
2394
|
-
enum ggml_type vec_dot_type;
|
2395
|
-
int64_t nrows; // number of rows to process simultaneously;
|
2396
|
-
} ggml_type_traits_t;
|
2397
|
-
|
2398
|
-
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
2196
|
+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
2197
|
+
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
|
2198
|
+
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
|
2399
2199
|
|
2400
2200
|
#ifdef __cplusplus
|
2401
2201
|
}
|