whispercpp 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -3
- data/README.md +92 -31
- data/Rakefile +26 -7
- data/ext/.gitignore +5 -7
- data/ext/dependencies.rb +61 -0
- data/ext/extconf.rb +21 -198
- data/ext/options.rb +221 -0
- data/ext/ruby_whisper.c +159 -0
- data/ext/ruby_whisper.h +17 -2
- data/ext/ruby_whisper_context.c +641 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1301 -0
- data/ext/ruby_whisper_segment.c +143 -0
- data/ext/ruby_whisper_transcribe.cpp +87 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/.dockerignore +3 -0
- data/ext/sources/.github/workflows/bindings-ruby.yml +21 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +251 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/ci/run.sh +336 -0
- data/ext/sources/close-issue.yml +28 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +37 -0
- data/ext/sources/examples/addon.node/addon.cpp +438 -0
- data/ext/sources/examples/addon.node/index.js +54 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +175 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1294 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +776 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +168 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +467 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +223 -0
- data/ext/sources/examples/server/CMakeLists.txt +12 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1091 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +429 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +40 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1746 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +437 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +374 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +89 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +663 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +58 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2676 -0
- data/ext/sources/examples/talk-llama/llama-context.h +276 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1618 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +640 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +95 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +190 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2739 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +502 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +379 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +32 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1138 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +281 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +13814 -0
- data/ext/sources/examples/talk-llama/llama-model.h +425 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +966 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3340 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +131 -0
- data/ext/sources/examples/talk-llama/llama.cpp +354 -0
- data/ext/sources/examples/talk-llama/llama.h +1377 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +808 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +849 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +143 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +249 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +390 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +26 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-backend.h +9 -7
- data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
- data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +9 -1
- data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
- data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
- data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
- data/ext/{ggml → sources/ggml}/include/ggml.h +182 -265
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +346 -0
- data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
- data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +87 -53
- data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +26 -14
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +74 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
- data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +135 -1
- data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +564 -146
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
- data/ext/{ggml → sources/ggml}/src/ggml-common.h +12 -8
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +504 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +2 -1
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +72 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/cpu-feats-x86.cpp +5 -1
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +163 -41
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.c +4029 -1117
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3510 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +67 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +8903 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +110 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +252 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +818 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +828 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +730 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +26 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1471 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3505 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +336 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +289 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +59 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +131 -0
- data/ext/{ggml → sources/ggml}/src/ggml-impl.h +64 -19
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +120 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2178 -1064
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +1575 -1218
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +5124 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
- data/ext/{ggml → sources/ggml}/src/ggml-quants.c +114 -120
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +480 -73
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +345 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +589 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +32 -33
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +4 -2
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/convert.cpp +104 -28
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +700 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +791 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +156 -17
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +75 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +99 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1004 -1240
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +0 -1
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmvq.cpp +261 -166
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/norm.cpp +204 -81
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +83 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +361 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +35 -25
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1215 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +293 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +196 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3130 -1087
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +193 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/{ggml → sources/ggml}/src/ggml.c +676 -1820
- data/ext/sources/ggml/src/gguf.cpp +1330 -0
- data/ext/{include → sources/include}/whisper.h +68 -2
- data/ext/sources/src/CMakeLists.txt +143 -0
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +35 -10
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +28 -3
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{src → sources/src}/whisper.cpp +1905 -374
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +33 -5
- data/lib/whisper/model/uri.rb +149 -128
- data/sig/whisper.rbs +480 -0
- data/tests/helper.rb +28 -0
- data/tests/test_callback.rb +45 -3
- data/tests/test_error.rb +2 -2
- data/tests/test_model.rb +38 -0
- data/tests/test_package.rb +18 -3
- data/tests/test_params.rb +145 -8
- data/tests/test_segment.rb +10 -19
- data/tests/test_vad.rb +19 -0
- data/tests/test_vad_params.rb +103 -0
- data/tests/test_whisper.rb +37 -37
- data/whispercpp.gemspec +5 -4
- metadata +766 -111
- data/ext/cpu.mk +0 -9
- data/ext/examples/dr_wav.h +0 -8815
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
- data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
- data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
- data/ext/metal-embed.mk +0 -17
- data/ext/metal.mk +0 -6
- data/ext/ruby_whisper.cpp +0 -1909
- data/ext/scripts/get-flags.mk +0 -38
- data/lib/whisper.rb +0 -2
- /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-aarch64.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
- /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
@@ -19,16 +19,10 @@
|
|
19
19
|
#define GROUP_MAX_EPS_IQ1_M 1e-7f
|
20
20
|
#define GROUP_MAX_EPS_IQ1_S 1e-12f
|
21
21
|
|
22
|
-
#if defined(_MSC_VER)
|
23
|
-
// disable "possible loss of data" to avoid warnings for hundreds of casts
|
24
|
-
// we should just be careful :)
|
25
|
-
#pragma warning(disable: 4244 4267)
|
26
|
-
#endif
|
27
|
-
|
28
22
|
#define UNUSED GGML_UNUSED
|
29
23
|
|
30
24
|
// reference implementation for deterministic creation of model files
|
31
|
-
void quantize_row_q4_0_ref(const float *
|
25
|
+
void quantize_row_q4_0_ref(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k) {
|
32
26
|
static const int qk = QK4_0;
|
33
27
|
|
34
28
|
assert(k % qk == 0);
|
@@ -65,7 +59,7 @@ void quantize_row_q4_0_ref(const float * restrict x, block_q4_0 * restrict y, in
|
|
65
59
|
}
|
66
60
|
}
|
67
61
|
|
68
|
-
void quantize_row_q4_1_ref(const float *
|
62
|
+
void quantize_row_q4_1_ref(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k) {
|
69
63
|
const int qk = QK4_1;
|
70
64
|
|
71
65
|
assert(k % qk == 0);
|
@@ -102,7 +96,7 @@ void quantize_row_q4_1_ref(const float * restrict x, block_q4_1 * restrict y, in
|
|
102
96
|
}
|
103
97
|
}
|
104
98
|
|
105
|
-
void quantize_row_q5_0_ref(const float *
|
99
|
+
void quantize_row_q5_0_ref(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k) {
|
106
100
|
static const int qk = QK5_0;
|
107
101
|
|
108
102
|
assert(k % qk == 0);
|
@@ -146,7 +140,7 @@ void quantize_row_q5_0_ref(const float * restrict x, block_q5_0 * restrict y, in
|
|
146
140
|
}
|
147
141
|
}
|
148
142
|
|
149
|
-
void quantize_row_q5_1_ref(const float *
|
143
|
+
void quantize_row_q5_1_ref(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k) {
|
150
144
|
const int qk = QK5_1;
|
151
145
|
|
152
146
|
assert(k % qk == 0);
|
@@ -191,7 +185,7 @@ void quantize_row_q5_1_ref(const float * restrict x, block_q5_1 * restrict y, in
|
|
191
185
|
}
|
192
186
|
|
193
187
|
// reference implementation for deterministic creation of model files
|
194
|
-
void quantize_row_q8_0_ref(const float *
|
188
|
+
void quantize_row_q8_0_ref(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k) {
|
195
189
|
assert(k % QK8_0 == 0);
|
196
190
|
const int nb = k / QK8_0;
|
197
191
|
|
@@ -217,7 +211,7 @@ void quantize_row_q8_0_ref(const float * restrict x, block_q8_0 * restrict y, in
|
|
217
211
|
}
|
218
212
|
|
219
213
|
// reference implementation for deterministic creation of model files
|
220
|
-
void quantize_row_q8_1_ref(const float *
|
214
|
+
void quantize_row_q8_1_ref(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k) {
|
221
215
|
assert(QK8_1 == 32);
|
222
216
|
assert(k % QK8_1 == 0);
|
223
217
|
const int nb = k / QK8_1;
|
@@ -252,7 +246,7 @@ void quantize_row_q8_1_ref(const float * restrict x, block_q8_1 * restrict y, in
|
|
252
246
|
}
|
253
247
|
}
|
254
248
|
|
255
|
-
void dequantize_row_q4_0(const block_q4_0 *
|
249
|
+
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
256
250
|
static const int qk = QK4_0;
|
257
251
|
|
258
252
|
assert(k % qk == 0);
|
@@ -272,7 +266,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int6
|
|
272
266
|
}
|
273
267
|
}
|
274
268
|
|
275
|
-
void dequantize_row_q4_1(const block_q4_1 *
|
269
|
+
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
276
270
|
static const int qk = QK4_1;
|
277
271
|
|
278
272
|
assert(k % qk == 0);
|
@@ -293,7 +287,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int6
|
|
293
287
|
}
|
294
288
|
}
|
295
289
|
|
296
|
-
void dequantize_row_q5_0(const block_q5_0 *
|
290
|
+
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
297
291
|
static const int qk = QK5_0;
|
298
292
|
|
299
293
|
assert(k % qk == 0);
|
@@ -319,7 +313,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int6
|
|
319
313
|
}
|
320
314
|
}
|
321
315
|
|
322
|
-
void dequantize_row_q5_1(const block_q5_1 *
|
316
|
+
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
323
317
|
static const int qk = QK5_1;
|
324
318
|
|
325
319
|
assert(k % qk == 0);
|
@@ -346,7 +340,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int6
|
|
346
340
|
}
|
347
341
|
}
|
348
342
|
|
349
|
-
void dequantize_row_q8_0(const block_q8_0 *
|
343
|
+
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
350
344
|
static const int qk = QK8_0;
|
351
345
|
|
352
346
|
assert(k % qk == 0);
|
@@ -376,8 +370,8 @@ static inline int nearest_int(float fval) {
|
|
376
370
|
return (i & 0x007fffff) - 0x00400000;
|
377
371
|
}
|
378
372
|
|
379
|
-
static float make_qx_quants(int n, int nmax, const float *
|
380
|
-
const float *
|
373
|
+
static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
|
374
|
+
const float * GGML_RESTRICT qw) {
|
381
375
|
float max = 0;
|
382
376
|
float amax = 0;
|
383
377
|
for (int i = 0; i < n; ++i) {
|
@@ -445,7 +439,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
445
439
|
return scale;
|
446
440
|
}
|
447
441
|
|
448
|
-
static float make_q3_quants(int n, int nmax, const float *
|
442
|
+
static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
|
449
443
|
float max = 0;
|
450
444
|
float amax = 0;
|
451
445
|
for (int i = 0; i < n; ++i) {
|
@@ -504,7 +498,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
504
498
|
return 1/iscale;
|
505
499
|
}
|
506
500
|
|
507
|
-
static float make_qkx1_quants(int n, int nmax, const float *
|
501
|
+
static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
|
508
502
|
int ntry, float alpha) {
|
509
503
|
float min = x[0];
|
510
504
|
float max = x[0];
|
@@ -547,8 +541,8 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
|
547
541
|
return scale;
|
548
542
|
}
|
549
543
|
|
550
|
-
static float make_qkx2_quants(int n, int nmax, const float *
|
551
|
-
uint8_t *
|
544
|
+
static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
545
|
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
552
546
|
float rmin, float rdelta, int nstep, bool use_mad) {
|
553
547
|
float min = x[0];
|
554
548
|
float max = x[0];
|
@@ -628,7 +622,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
628
622
|
return scale;
|
629
623
|
}
|
630
624
|
|
631
|
-
static inline void get_scale_min_k4(int j, const uint8_t *
|
625
|
+
static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
|
632
626
|
if (j < 4) {
|
633
627
|
*d = q[j] & 63; *m = q[j + 4] & 63;
|
634
628
|
} else {
|
@@ -639,7 +633,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
|
|
639
633
|
|
640
634
|
//========================- 2-bit (de)-quantization
|
641
635
|
|
642
|
-
void quantize_row_q2_K_ref(const float *
|
636
|
+
void quantize_row_q2_K_ref(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k) {
|
643
637
|
assert(k % QK_K == 0);
|
644
638
|
const int nb = k / QK_K;
|
645
639
|
|
@@ -709,7 +703,7 @@ void quantize_row_q2_K_ref(const float * restrict x, block_q2_K * restrict y, in
|
|
709
703
|
}
|
710
704
|
}
|
711
705
|
|
712
|
-
void dequantize_row_q2_K(const block_q2_K *
|
706
|
+
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
713
707
|
assert(k % QK_K == 0);
|
714
708
|
const int nb = k / QK_K;
|
715
709
|
|
@@ -741,8 +735,8 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int6
|
|
741
735
|
}
|
742
736
|
}
|
743
737
|
|
744
|
-
static float make_qkx3_quants(int n, int nmax, const float *
|
745
|
-
uint8_t *
|
738
|
+
static float make_qkx3_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
739
|
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
746
740
|
float rmin, float rdelta, int nstep, bool use_mad) {
|
747
741
|
float min = x[0];
|
748
742
|
float max = x[0];
|
@@ -824,7 +818,7 @@ static float make_qkx3_quants(int n, int nmax, const float * restrict x, const f
|
|
824
818
|
return scale;
|
825
819
|
}
|
826
820
|
|
827
|
-
static float make_qp_quants(int n, int nmax, const float *
|
821
|
+
static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, const float * quant_weights) {
|
828
822
|
float max = 0;
|
829
823
|
for (int i = 0; i < n; ++i) {
|
830
824
|
max = MAX(max, x[i]);
|
@@ -897,7 +891,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
|
|
897
891
|
return sumlx/suml2;
|
898
892
|
}
|
899
893
|
|
900
|
-
static void quantize_row_q2_K_impl(const float *
|
894
|
+
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
|
901
895
|
GGML_ASSERT(quant_weights);
|
902
896
|
assert(k % QK_K == 0);
|
903
897
|
const int nb = k / QK_K;
|
@@ -917,7 +911,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
917
911
|
for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
|
918
912
|
float sigma2 = sumx2/QK_K;
|
919
913
|
for (int j = 0; j < QK_K/16; ++j) {
|
920
|
-
const float *
|
914
|
+
const float * GGML_RESTRICT qw = quant_weights + QK_K * i + 16*j;
|
921
915
|
for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
|
922
916
|
for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
|
923
917
|
scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
|
@@ -959,7 +953,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
959
953
|
}
|
960
954
|
}
|
961
955
|
|
962
|
-
size_t quantize_q2_K(const float *
|
956
|
+
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
963
957
|
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
|
964
958
|
if (!quant_weights) {
|
965
959
|
quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -977,7 +971,7 @@ size_t quantize_q2_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
977
971
|
|
978
972
|
//========================= 3-bit (de)-quantization
|
979
973
|
|
980
|
-
void quantize_row_q3_K_ref(const float *
|
974
|
+
void quantize_row_q3_K_ref(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k) {
|
981
975
|
assert(k % QK_K == 0);
|
982
976
|
const int nb = k / QK_K;
|
983
977
|
|
@@ -1053,7 +1047,7 @@ void quantize_row_q3_K_ref(const float * restrict x, block_q3_K * restrict y, in
|
|
1053
1047
|
}
|
1054
1048
|
}
|
1055
1049
|
|
1056
|
-
void dequantize_row_q3_K(const block_q3_K *
|
1050
|
+
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1057
1051
|
assert(k % QK_K == 0);
|
1058
1052
|
const int nb = k / QK_K;
|
1059
1053
|
|
@@ -1067,8 +1061,8 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
|
|
1067
1061
|
|
1068
1062
|
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
1069
1063
|
|
1070
|
-
const uint8_t *
|
1071
|
-
const uint8_t *
|
1064
|
+
const uint8_t * GGML_RESTRICT q = x[i].qs;
|
1065
|
+
const uint8_t * GGML_RESTRICT hm = x[i].hmask;
|
1072
1066
|
uint8_t m = 1;
|
1073
1067
|
|
1074
1068
|
memcpy(aux, x[i].scales, 12);
|
@@ -1103,7 +1097,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
|
|
1103
1097
|
}
|
1104
1098
|
}
|
1105
1099
|
|
1106
|
-
static void quantize_row_q3_K_impl(const float *
|
1100
|
+
static void quantize_row_q3_K_impl(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t n_per_row, const float * GGML_RESTRICT quant_weights) {
|
1107
1101
|
assert(n_per_row % QK_K == 0);
|
1108
1102
|
const int nb = n_per_row / QK_K;
|
1109
1103
|
|
@@ -1187,7 +1181,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
|
|
1187
1181
|
}
|
1188
1182
|
}
|
1189
1183
|
|
1190
|
-
size_t quantize_q3_K(const float *
|
1184
|
+
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1191
1185
|
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
|
1192
1186
|
if (!quant_weights) {
|
1193
1187
|
quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1205,7 +1199,7 @@ size_t quantize_q3_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1205
1199
|
|
1206
1200
|
// ====================== 4-bit (de)-quantization
|
1207
1201
|
|
1208
|
-
void quantize_row_q4_K_ref(const float *
|
1202
|
+
void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k) {
|
1209
1203
|
assert(k % QK_K == 0);
|
1210
1204
|
const int nb = k / QK_K;
|
1211
1205
|
|
@@ -1277,7 +1271,7 @@ void quantize_row_q4_K_ref(const float * restrict x, block_q4_K * restrict y, in
|
|
1277
1271
|
}
|
1278
1272
|
}
|
1279
1273
|
|
1280
|
-
void dequantize_row_q4_K(const block_q4_K *
|
1274
|
+
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1281
1275
|
assert(k % QK_K == 0);
|
1282
1276
|
const int nb = k / QK_K;
|
1283
1277
|
|
@@ -1301,7 +1295,7 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int6
|
|
1301
1295
|
}
|
1302
1296
|
}
|
1303
1297
|
|
1304
|
-
static void quantize_row_q4_K_impl(const float *
|
1298
|
+
static void quantize_row_q4_K_impl(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1305
1299
|
assert(n_per_row % QK_K == 0);
|
1306
1300
|
const int64_t nb = n_per_row / QK_K;
|
1307
1301
|
|
@@ -1374,7 +1368,7 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
|
|
1374
1368
|
}
|
1375
1369
|
}
|
1376
1370
|
|
1377
|
-
size_t quantize_q4_K(const float *
|
1371
|
+
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1378
1372
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
|
1379
1373
|
if (!quant_weights) {
|
1380
1374
|
quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1392,7 +1386,7 @@ size_t quantize_q4_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1392
1386
|
|
1393
1387
|
// ====================== 5-bit (de)-quantization
|
1394
1388
|
|
1395
|
-
void quantize_row_q5_K_ref(const float *
|
1389
|
+
void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k) {
|
1396
1390
|
assert(k % QK_K == 0);
|
1397
1391
|
const int64_t nb = k / QK_K;
|
1398
1392
|
|
@@ -1454,8 +1448,8 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
|
|
1454
1448
|
}
|
1455
1449
|
}
|
1456
1450
|
|
1457
|
-
uint8_t *
|
1458
|
-
uint8_t *
|
1451
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1452
|
+
uint8_t * GGML_RESTRICT ql = y[i].qs;
|
1459
1453
|
memset(qh, 0, QK_K/8);
|
1460
1454
|
|
1461
1455
|
uint8_t m1 = 1, m2 = 2;
|
@@ -1479,7 +1473,7 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
|
|
1479
1473
|
}
|
1480
1474
|
}
|
1481
1475
|
|
1482
|
-
void dequantize_row_q5_K(const block_q5_K *
|
1476
|
+
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1483
1477
|
assert(k % QK_K == 0);
|
1484
1478
|
const int64_t nb = k / QK_K;
|
1485
1479
|
|
@@ -1506,7 +1500,7 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int6
|
|
1506
1500
|
}
|
1507
1501
|
}
|
1508
1502
|
|
1509
|
-
static void quantize_row_q5_K_impl(const float *
|
1503
|
+
static void quantize_row_q5_K_impl(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1510
1504
|
assert(n_per_row % QK_K == 0);
|
1511
1505
|
const int64_t nb = n_per_row / QK_K;
|
1512
1506
|
|
@@ -1573,8 +1567,8 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
1573
1567
|
}
|
1574
1568
|
}
|
1575
1569
|
|
1576
|
-
uint8_t *
|
1577
|
-
uint8_t *
|
1570
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1571
|
+
uint8_t * GGML_RESTRICT ql = y[i].qs;
|
1578
1572
|
memset(qh, 0, QK_K/8);
|
1579
1573
|
|
1580
1574
|
uint8_t m1 = 1, m2 = 2;
|
@@ -1599,7 +1593,7 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
1599
1593
|
}
|
1600
1594
|
}
|
1601
1595
|
|
1602
|
-
size_t quantize_q5_K(const float *
|
1596
|
+
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1603
1597
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
|
1604
1598
|
if (!quant_weights) {
|
1605
1599
|
quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1617,7 +1611,7 @@ size_t quantize_q5_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1617
1611
|
|
1618
1612
|
// ====================== 6-bit (de)-quantization
|
1619
1613
|
|
1620
|
-
void quantize_row_q6_K_ref(const float *
|
1614
|
+
void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k) {
|
1621
1615
|
assert(k % QK_K == 0);
|
1622
1616
|
const int64_t nb = k / QK_K;
|
1623
1617
|
|
@@ -1667,8 +1661,8 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
|
|
1667
1661
|
}
|
1668
1662
|
}
|
1669
1663
|
|
1670
|
-
uint8_t *
|
1671
|
-
uint8_t *
|
1664
|
+
uint8_t * GGML_RESTRICT ql = y[i].ql;
|
1665
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1672
1666
|
for (int j = 0; j < QK_K; j += 128) {
|
1673
1667
|
for (int l = 0; l < 32; ++l) {
|
1674
1668
|
const uint8_t q1 = L[j + l + 0] & 0xF;
|
@@ -1687,16 +1681,16 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
|
|
1687
1681
|
}
|
1688
1682
|
}
|
1689
1683
|
|
1690
|
-
void dequantize_row_q6_K(const block_q6_K *
|
1684
|
+
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1691
1685
|
assert(k % QK_K == 0);
|
1692
1686
|
const int64_t nb = k / QK_K;
|
1693
1687
|
|
1694
1688
|
for (int i = 0; i < nb; i++) {
|
1695
1689
|
const float d = GGML_FP16_TO_FP32(x[i].d);
|
1696
1690
|
|
1697
|
-
const uint8_t *
|
1698
|
-
const uint8_t *
|
1699
|
-
const int8_t *
|
1691
|
+
const uint8_t * GGML_RESTRICT ql = x[i].ql;
|
1692
|
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
1693
|
+
const int8_t * GGML_RESTRICT sc = x[i].scales;
|
1700
1694
|
|
1701
1695
|
for (int n = 0; n < QK_K; n += 128) {
|
1702
1696
|
for (int l = 0; l < 32; ++l) {
|
@@ -1718,7 +1712,7 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int6
|
|
1718
1712
|
}
|
1719
1713
|
}
|
1720
1714
|
|
1721
|
-
static void quantize_row_q6_K_impl(const float *
|
1715
|
+
static void quantize_row_q6_K_impl(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1722
1716
|
assert(n_per_row % QK_K == 0);
|
1723
1717
|
const int64_t nb = n_per_row / QK_K;
|
1724
1718
|
|
@@ -1781,8 +1775,8 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
1781
1775
|
}
|
1782
1776
|
}
|
1783
1777
|
|
1784
|
-
uint8_t *
|
1785
|
-
uint8_t *
|
1778
|
+
uint8_t * GGML_RESTRICT ql = y[i].ql;
|
1779
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1786
1780
|
for (int j = 0; j < QK_K; j += 128) {
|
1787
1781
|
for (int l = 0; l < 32; ++l) {
|
1788
1782
|
const uint8_t q1 = L[j + l + 0] & 0xF;
|
@@ -1802,7 +1796,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
1802
1796
|
}
|
1803
1797
|
}
|
1804
1798
|
|
1805
|
-
size_t quantize_q6_K(const float *
|
1799
|
+
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1806
1800
|
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
|
1807
1801
|
if (!quant_weights) {
|
1808
1802
|
quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1818,7 +1812,7 @@ size_t quantize_q6_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1818
1812
|
return nrow * row_size;
|
1819
1813
|
}
|
1820
1814
|
|
1821
|
-
static void quantize_row_q4_0_impl(const float *
|
1815
|
+
static void quantize_row_q4_0_impl(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1822
1816
|
static_assert(QK4_0 == 32, "QK4_0 must be 32");
|
1823
1817
|
|
1824
1818
|
if (!quant_weights) {
|
@@ -1846,7 +1840,7 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
|
|
1846
1840
|
}
|
1847
1841
|
}
|
1848
1842
|
|
1849
|
-
size_t quantize_q4_0(const float *
|
1843
|
+
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1850
1844
|
if (!quant_weights) {
|
1851
1845
|
quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
1852
1846
|
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
@@ -1861,7 +1855,7 @@ size_t quantize_q4_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
1861
1855
|
return nrow * row_size;
|
1862
1856
|
}
|
1863
1857
|
|
1864
|
-
static void quantize_row_q4_1_impl(const float *
|
1858
|
+
static void quantize_row_q4_1_impl(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1865
1859
|
static_assert(QK4_1 == 32, "QK4_1 must be 32");
|
1866
1860
|
|
1867
1861
|
if (!quant_weights) {
|
@@ -1891,7 +1885,7 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
|
|
1891
1885
|
}
|
1892
1886
|
}
|
1893
1887
|
|
1894
|
-
size_t quantize_q4_1(const float *
|
1888
|
+
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1895
1889
|
if (!quant_weights) {
|
1896
1890
|
quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
1897
1891
|
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
@@ -1906,7 +1900,7 @@ size_t quantize_q4_1(const float * restrict src, void * restrict dst, int64_t nr
|
|
1906
1900
|
return nrow * row_size;
|
1907
1901
|
}
|
1908
1902
|
|
1909
|
-
static void quantize_row_q5_0_impl(const float *
|
1903
|
+
static void quantize_row_q5_0_impl(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1910
1904
|
static_assert(QK5_0 == 32, "QK5_0 must be 32");
|
1911
1905
|
|
1912
1906
|
if (!quant_weights) {
|
@@ -1945,7 +1939,7 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
|
|
1945
1939
|
}
|
1946
1940
|
}
|
1947
1941
|
|
1948
|
-
size_t quantize_q5_0(const float *
|
1942
|
+
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1949
1943
|
if (!quant_weights) {
|
1950
1944
|
quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
1951
1945
|
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
@@ -1960,7 +1954,7 @@ size_t quantize_q5_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
1960
1954
|
return nrow * row_size;
|
1961
1955
|
}
|
1962
1956
|
|
1963
|
-
static void quantize_row_q5_1_impl(const float *
|
1957
|
+
static void quantize_row_q5_1_impl(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1964
1958
|
static_assert(QK5_1 == 32, "QK5_1 must be 32");
|
1965
1959
|
|
1966
1960
|
if (!quant_weights) {
|
@@ -1998,7 +1992,7 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
|
|
1998
1992
|
}
|
1999
1993
|
}
|
2000
1994
|
|
2001
|
-
size_t quantize_q5_1(const float *
|
1995
|
+
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2002
1996
|
if (!quant_weights) {
|
2003
1997
|
quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
2004
1998
|
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
@@ -2013,7 +2007,7 @@ size_t quantize_q5_1(const float * restrict src, void * restrict dst, int64_t nr
|
|
2013
2007
|
return nrow * row_size;
|
2014
2008
|
}
|
2015
2009
|
|
2016
|
-
size_t quantize_q8_0(const float *
|
2010
|
+
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2017
2011
|
(void)quant_weights; // not used
|
2018
2012
|
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
|
2019
2013
|
quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -2022,7 +2016,7 @@ size_t quantize_q8_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
2022
2016
|
|
2023
2017
|
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
|
2024
2018
|
|
2025
|
-
void quantize_row_tq1_0_ref(const float *
|
2019
|
+
void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k) {
|
2026
2020
|
assert(k % QK_K == 0);
|
2027
2021
|
const int64_t nb = k / QK_K;
|
2028
2022
|
|
@@ -2088,7 +2082,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
|
|
2088
2082
|
}
|
2089
2083
|
}
|
2090
2084
|
|
2091
|
-
void quantize_row_tq2_0_ref(const float *
|
2085
|
+
void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k) {
|
2092
2086
|
assert(k % QK_K == 0);
|
2093
2087
|
const int64_t nb = k / QK_K;
|
2094
2088
|
|
@@ -2120,21 +2114,21 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y,
|
|
2120
2114
|
}
|
2121
2115
|
}
|
2122
2116
|
|
2123
|
-
size_t quantize_tq1_0(const float *
|
2117
|
+
size_t quantize_tq1_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2124
2118
|
(void)quant_weights; // not used
|
2125
2119
|
const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
|
2126
2120
|
quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
2127
2121
|
return nrow * row_size;
|
2128
2122
|
}
|
2129
2123
|
|
2130
|
-
size_t quantize_tq2_0(const float *
|
2124
|
+
size_t quantize_tq2_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2131
2125
|
(void)quant_weights; // not used
|
2132
2126
|
const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
|
2133
2127
|
quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
2134
2128
|
return nrow * row_size;
|
2135
2129
|
}
|
2136
2130
|
|
2137
|
-
void dequantize_row_tq1_0(const block_tq1_0 *
|
2131
|
+
void dequantize_row_tq1_0(const block_tq1_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2138
2132
|
assert(k % QK_K == 0);
|
2139
2133
|
const int64_t nb = k / QK_K;
|
2140
2134
|
|
@@ -2173,7 +2167,7 @@ void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, in
|
|
2173
2167
|
}
|
2174
2168
|
}
|
2175
2169
|
|
2176
|
-
void dequantize_row_tq2_0(const block_tq2_0 *
|
2170
|
+
void dequantize_row_tq2_0(const block_tq2_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2177
2171
|
assert(k % QK_K == 0);
|
2178
2172
|
const int64_t nb = k / QK_K;
|
2179
2173
|
|
@@ -2194,7 +2188,7 @@ void dequantize_row_tq2_0(const block_tq2_0 * restrict x, float * restrict y, in
|
|
2194
2188
|
|
2195
2189
|
// ====================== "True" 2-bit (de)-quantization
|
2196
2190
|
|
2197
|
-
void dequantize_row_iq2_xxs(const block_iq2_xxs *
|
2191
|
+
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2198
2192
|
assert(k % QK_K == 0);
|
2199
2193
|
const int64_t nb = k / QK_K;
|
2200
2194
|
|
@@ -2222,7 +2216,7 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
|
|
2222
2216
|
|
2223
2217
|
// ====================== 2.3125 bpw (de)-quantization
|
2224
2218
|
|
2225
|
-
void dequantize_row_iq2_xs(const block_iq2_xs *
|
2219
|
+
void dequantize_row_iq2_xs(const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2226
2220
|
assert(k % QK_K == 0);
|
2227
2221
|
const int64_t nb = k / QK_K;
|
2228
2222
|
|
@@ -2249,7 +2243,7 @@ void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y,
|
|
2249
2243
|
|
2250
2244
|
// ====================== 2.5625 bpw (de)-quantization
|
2251
2245
|
|
2252
|
-
void dequantize_row_iq2_s(const block_iq2_s *
|
2246
|
+
void dequantize_row_iq2_s(const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2253
2247
|
assert(k % QK_K == 0);
|
2254
2248
|
const int64_t nb = k / QK_K;
|
2255
2249
|
|
@@ -2281,7 +2275,7 @@ void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, in
|
|
2281
2275
|
|
2282
2276
|
// ====================== 3.0625 bpw (de)-quantization
|
2283
2277
|
|
2284
|
-
void dequantize_row_iq3_xxs(const block_iq3_xxs *
|
2278
|
+
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2285
2279
|
assert(k % QK_K == 0);
|
2286
2280
|
const int64_t nb = k / QK_K;
|
2287
2281
|
|
@@ -2313,7 +2307,7 @@ void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y
|
|
2313
2307
|
|
2314
2308
|
// ====================== 3.3125 bpw (de)-quantization
|
2315
2309
|
|
2316
|
-
void dequantize_row_iq3_s(const block_iq3_s *
|
2310
|
+
void dequantize_row_iq3_s(const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2317
2311
|
assert(k % QK_K == 0);
|
2318
2312
|
const int64_t nb = k / QK_K;
|
2319
2313
|
|
@@ -2356,7 +2350,7 @@ void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, in
|
|
2356
2350
|
|
2357
2351
|
// ====================== 1.5625 bpw (de)-quantization
|
2358
2352
|
|
2359
|
-
void dequantize_row_iq1_s(const block_iq1_s *
|
2353
|
+
void dequantize_row_iq1_s(const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2360
2354
|
assert(k % QK_K == 0);
|
2361
2355
|
const int64_t nb = k / QK_K;
|
2362
2356
|
|
@@ -2381,7 +2375,7 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
|
|
2381
2375
|
}
|
2382
2376
|
}
|
2383
2377
|
|
2384
|
-
void dequantize_row_iq1_m(const block_iq1_m *
|
2378
|
+
void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2385
2379
|
assert(k % QK_K == 0);
|
2386
2380
|
const int64_t nb = k / QK_K;
|
2387
2381
|
|
@@ -2433,7 +2427,7 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
|
|
2433
2427
|
|
2434
2428
|
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
2435
2429
|
|
2436
|
-
void dequantize_row_iq4_nl(const block_iq4_nl *
|
2430
|
+
void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2437
2431
|
assert(k % QK4_NL == 0);
|
2438
2432
|
const int64_t nb = k / QK4_NL;
|
2439
2433
|
|
@@ -2451,7 +2445,7 @@ void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
|
|
2451
2445
|
}
|
2452
2446
|
}
|
2453
2447
|
|
2454
|
-
void dequantize_row_iq4_xs(const block_iq4_xs *
|
2448
|
+
void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2455
2449
|
assert(k % QK_K == 0);
|
2456
2450
|
const int64_t nb = k / QK_K;
|
2457
2451
|
|
@@ -2476,7 +2470,7 @@ void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
|
|
2476
2470
|
|
2477
2471
|
//===================================== Q8_K ==============================================
|
2478
2472
|
|
2479
|
-
void quantize_row_q8_K_ref(const float *
|
2473
|
+
void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
|
2480
2474
|
assert(k % QK_K == 0);
|
2481
2475
|
const int64_t nb = k / QK_K;
|
2482
2476
|
|
@@ -2515,7 +2509,7 @@ void quantize_row_q8_K_ref(const float * restrict x, block_q8_K * restrict y, in
|
|
2515
2509
|
}
|
2516
2510
|
}
|
2517
2511
|
|
2518
|
-
void dequantize_row_q8_K(const block_q8_K *
|
2512
|
+
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2519
2513
|
assert(k % QK_K == 0);
|
2520
2514
|
const int64_t nb = k / QK_K;
|
2521
2515
|
|
@@ -2927,8 +2921,8 @@ void iq2xs_free_impl(enum ggml_type type) {
|
|
2927
2921
|
}
|
2928
2922
|
}
|
2929
2923
|
|
2930
|
-
static int iq2_find_best_neighbour(const uint16_t *
|
2931
|
-
const float *
|
2924
|
+
static int iq2_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
2925
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
|
2932
2926
|
int num_neighbors = neighbours[0];
|
2933
2927
|
GGML_ASSERT(num_neighbors > 0);
|
2934
2928
|
float best_d2 = FLT_MAX;
|
@@ -2951,7 +2945,7 @@ static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
2951
2945
|
return grid_index;
|
2952
2946
|
}
|
2953
2947
|
|
2954
|
-
static void quantize_row_iq2_xxs_impl(const float *
|
2948
|
+
static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
2955
2949
|
|
2956
2950
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
|
2957
2951
|
|
@@ -3124,7 +3118,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|
3124
3118
|
}
|
3125
3119
|
}
|
3126
3120
|
|
3127
|
-
static void quantize_row_iq2_xs_impl(const float *
|
3121
|
+
static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
3128
3122
|
|
3129
3123
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
|
3130
3124
|
|
@@ -3304,7 +3298,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
3304
3298
|
}
|
3305
3299
|
}
|
3306
3300
|
|
3307
|
-
size_t quantize_iq2_xxs(const float *
|
3301
|
+
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3308
3302
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3309
3303
|
int64_t nblock = n_per_row/QK_K;
|
3310
3304
|
char * qrow = (char *)dst;
|
@@ -3316,7 +3310,7 @@ size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int64_t
|
|
3316
3310
|
return nrow * nblock * sizeof(block_iq2_xxs);
|
3317
3311
|
}
|
3318
3312
|
|
3319
|
-
size_t quantize_iq2_xs(const float *
|
3313
|
+
size_t quantize_iq2_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3320
3314
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3321
3315
|
int64_t nblock = n_per_row/QK_K;
|
3322
3316
|
char * qrow = (char *)dst;
|
@@ -3521,8 +3515,8 @@ void iq3xs_free_impl(int grid_size) {
|
|
3521
3515
|
}
|
3522
3516
|
}
|
3523
3517
|
|
3524
|
-
static int iq3_find_best_neighbour(const uint16_t *
|
3525
|
-
const float *
|
3518
|
+
static int iq3_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint32_t * GGML_RESTRICT grid,
|
3519
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
|
3526
3520
|
int num_neighbors = neighbours[0];
|
3527
3521
|
GGML_ASSERT(num_neighbors > 0);
|
3528
3522
|
float best_d2 = FLT_MAX;
|
@@ -3545,8 +3539,8 @@ static int iq3_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
3545
3539
|
return grid_index;
|
3546
3540
|
}
|
3547
3541
|
|
3548
|
-
static void quantize_row_iq3_xxs_impl(int grid_size, const float *
|
3549
|
-
const float *
|
3542
|
+
static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n,
|
3543
|
+
const float * GGML_RESTRICT quant_weights) {
|
3550
3544
|
|
3551
3545
|
const int gindex = iq3_data_index(grid_size);
|
3552
3546
|
|
@@ -3758,7 +3752,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
3758
3752
|
}
|
3759
3753
|
}
|
3760
3754
|
|
3761
|
-
size_t quantize_iq3_xxs(const float *
|
3755
|
+
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3762
3756
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3763
3757
|
int64_t nblock = n_per_row/QK_K;
|
3764
3758
|
char * qrow = (char *)dst;
|
@@ -3770,13 +3764,13 @@ size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int64_t
|
|
3770
3764
|
return nrow * nblock * sizeof(block_iq3_xxs);
|
3771
3765
|
}
|
3772
3766
|
|
3773
|
-
void quantize_row_iq3_xxs_ref(const float *
|
3767
|
+
void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k) {
|
3774
3768
|
assert(k % QK_K == 0);
|
3775
3769
|
quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
|
3776
3770
|
}
|
3777
3771
|
|
3778
|
-
static void quantize_row_iq3_s_impl(int block_size, const float *
|
3779
|
-
const float *
|
3772
|
+
static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int n,
|
3773
|
+
const float * GGML_RESTRICT quant_weights,
|
3780
3774
|
float * scales,
|
3781
3775
|
float * weight,
|
3782
3776
|
float * xval,
|
@@ -3958,7 +3952,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
3958
3952
|
}
|
3959
3953
|
|
3960
3954
|
#define IQ3S_BLOCK_SIZE 32
|
3961
|
-
size_t quantize_iq3_s(const float *
|
3955
|
+
size_t quantize_iq3_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3962
3956
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3963
3957
|
int64_t nblock = n_per_row/QK_K;
|
3964
3958
|
float scales[QK_K/IQ3S_BLOCK_SIZE];
|
@@ -3980,7 +3974,7 @@ size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int64_t n
|
|
3980
3974
|
return nrow * nblock * sizeof(block_iq3_s);
|
3981
3975
|
}
|
3982
3976
|
|
3983
|
-
void quantize_row_iq3_s_ref(const float *
|
3977
|
+
void quantize_row_iq3_s_ref(const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k) {
|
3984
3978
|
assert(k % QK_K == 0);
|
3985
3979
|
quantize_iq3_s(x, y, 1, k, NULL);
|
3986
3980
|
}
|
@@ -3988,8 +3982,8 @@ void quantize_row_iq3_s_ref(const float * restrict x, block_iq3_s * restrict y,
|
|
3988
3982
|
|
3989
3983
|
// =================================== 1.5 bpw ===================================================
|
3990
3984
|
|
3991
|
-
static int iq1_find_best_neighbour(const uint16_t *
|
3992
|
-
const float *
|
3985
|
+
static int iq1_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
3986
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float * scale, int8_t * GGML_RESTRICT L, int ngrid) {
|
3993
3987
|
int num_neighbors = neighbours[0];
|
3994
3988
|
GGML_ASSERT(num_neighbors > 0);
|
3995
3989
|
float best_score = -FLT_MAX;
|
@@ -4048,8 +4042,8 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
4048
4042
|
return grid_index;
|
4049
4043
|
}
|
4050
4044
|
|
4051
|
-
static int iq1_find_best_neighbour2(const uint16_t *
|
4052
|
-
const float *
|
4045
|
+
static int iq1_find_best_neighbour2(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
4046
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, const float * GGML_RESTRICT xg, int8_t * GGML_RESTRICT L, int ngrid) {
|
4053
4047
|
int num_neighbors = neighbours[0];
|
4054
4048
|
GGML_ASSERT(num_neighbors > 0);
|
4055
4049
|
float best_score = FLT_MAX;
|
@@ -4113,7 +4107,7 @@ static int iq1_sort_helper(const void * left, const void * right) {
|
|
4113
4107
|
|
4114
4108
|
#define IQ1S_BLOCK_SIZE 32
|
4115
4109
|
#define IQ1M_BLOCK_SIZE 16
|
4116
|
-
static void quantize_row_iq1_s_impl(const float *
|
4110
|
+
static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
|
4117
4111
|
float * scales,
|
4118
4112
|
float * weight,
|
4119
4113
|
float * sumx,
|
@@ -4271,7 +4265,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
4271
4265
|
}
|
4272
4266
|
}
|
4273
4267
|
|
4274
|
-
size_t quantize_iq1_s(const float *
|
4268
|
+
size_t quantize_iq1_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4275
4269
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4276
4270
|
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
4277
4271
|
float weight[IQ1S_BLOCK_SIZE];
|
@@ -4291,7 +4285,7 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t n
|
|
4291
4285
|
return nrow * nblock * sizeof(block_iq1_s);
|
4292
4286
|
}
|
4293
4287
|
|
4294
|
-
static void quantize_row_iq1_m_impl(const float *
|
4288
|
+
static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
|
4295
4289
|
float * scales,
|
4296
4290
|
float * weight,
|
4297
4291
|
float * pairs,
|
@@ -4539,7 +4533,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
|
4539
4533
|
}
|
4540
4534
|
}
|
4541
4535
|
|
4542
|
-
size_t quantize_iq1_m(const float *
|
4536
|
+
size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4543
4537
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4544
4538
|
float scales[QK_K/IQ1M_BLOCK_SIZE];
|
4545
4539
|
float weight[IQ1M_BLOCK_SIZE];
|
@@ -4570,7 +4564,7 @@ static inline int best_index_int8(int n, const int8_t * val, float x) {
|
|
4570
4564
|
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
4571
4565
|
}
|
4572
4566
|
|
4573
|
-
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float *
|
4567
|
+
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * GGML_RESTRICT x,
|
4574
4568
|
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
|
4575
4569
|
float * scales, float * weight, uint8_t * L,
|
4576
4570
|
const int8_t * values,
|
@@ -4681,7 +4675,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|
4681
4675
|
}
|
4682
4676
|
}
|
4683
4677
|
|
4684
|
-
size_t quantize_iq4_nl(const float *
|
4678
|
+
size_t quantize_iq4_nl(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4685
4679
|
GGML_ASSERT(n_per_row%QK4_NL == 0);
|
4686
4680
|
int64_t nblock = n_per_row/QK4_NL;
|
4687
4681
|
char * qrow = (char *)dst;
|
@@ -4703,8 +4697,8 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int64_t
|
|
4703
4697
|
return nrow * nblock * sizeof(block_iq4_nl);
|
4704
4698
|
}
|
4705
4699
|
|
4706
|
-
//void quantize_row_iq4_nl_ref(const float *
|
4707
|
-
void quantize_row_iq4_nl_ref(const float *
|
4700
|
+
//void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
4701
|
+
void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k) {
|
4708
4702
|
GGML_ASSERT(k%QK4_NL == 0);
|
4709
4703
|
int64_t nblock = k/QK4_NL;
|
4710
4704
|
uint8_t L[QK4_NL];
|
@@ -4719,7 +4713,7 @@ void quantize_row_iq4_nl_ref(const float * restrict x, block_iq4_nl * restrict y
|
|
4719
4713
|
}
|
4720
4714
|
}
|
4721
4715
|
|
4722
|
-
size_t quantize_iq4_xs(const float *
|
4716
|
+
size_t quantize_iq4_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4723
4717
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4724
4718
|
int64_t nblock = n_per_row/QK_K;
|
4725
4719
|
char * qrow = (char *)dst;
|
@@ -4739,14 +4733,14 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int64_t
|
|
4739
4733
|
return nrow * nblock * sizeof(block_iq4_xs);
|
4740
4734
|
}
|
4741
4735
|
|
4742
|
-
void quantize_row_iq4_xs_ref(const float *
|
4736
|
+
void quantize_row_iq4_xs_ref(const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k) {
|
4743
4737
|
assert(k % QK_K == 0);
|
4744
4738
|
quantize_iq4_xs(x, y, 1, k, NULL);
|
4745
4739
|
}
|
4746
4740
|
|
4747
4741
|
// =============================== 2.5625 bpw
|
4748
4742
|
|
4749
|
-
static void quantize_row_iq2_s_impl(const float *
|
4743
|
+
static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
4750
4744
|
|
4751
4745
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
|
4752
4746
|
|
@@ -4914,7 +4908,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
4914
4908
|
}
|
4915
4909
|
}
|
4916
4910
|
|
4917
|
-
size_t quantize_iq2_s(const float *
|
4911
|
+
size_t quantize_iq2_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4918
4912
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4919
4913
|
int64_t nblock = n_per_row/QK_K;
|
4920
4914
|
char * qrow = (char *)dst;
|
@@ -4926,7 +4920,7 @@ size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int64_t n
|
|
4926
4920
|
return nrow * nblock * sizeof(block_iq2_s);
|
4927
4921
|
}
|
4928
4922
|
|
4929
|
-
void quantize_row_iq2_s_ref(const float *
|
4923
|
+
void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k) {
|
4930
4924
|
assert(k % QK_K == 0);
|
4931
4925
|
quantize_iq2_s(x, y, 1, k, NULL);
|
4932
4926
|
}
|