whispercpp 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +7 -3
- data/README.md +161 -43
- data/Rakefile +45 -13
- data/ext/.gitignore +4 -8
- data/ext/dependencies.rb +73 -0
- data/ext/extconf.rb +21 -198
- data/ext/options.rb +85 -0
- data/ext/ruby_whisper.c +177 -0
- data/ext/ruby_whisper.h +17 -2
- data/ext/ruby_whisper_context.c +672 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1303 -0
- data/ext/ruby_whisper_segment.c +220 -0
- data/ext/ruby_whisper_transcribe.cpp +93 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +255 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +133 -0
- data/ext/sources/examples/addon.node/addon.cpp +557 -0
- data/ext/sources/examples/addon.node/index.js +57 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +176 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1295 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +800 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +175 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +469 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +226 -0
- data/ext/sources/examples/server/CMakeLists.txt +15 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1238 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +435 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +43 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1914 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +464 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +843 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +147 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +685 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +59 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2845 -0
- data/ext/sources/examples/talk-llama/llama-context.h +297 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1693 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +710 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +103 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +207 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +44 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +439 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +59 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +116 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1163 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +282 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +15114 -0
- data/ext/sources/examples/talk-llama/llama-model.h +452 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +1049 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3377 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +132 -0
- data/ext/sources/examples/talk-llama/llama.cpp +358 -0
- data/ext/sources/examples/talk-llama/llama.h +1484 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +810 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +854 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +149 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +251 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +435 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +50 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-backend.h +10 -8
- data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
- data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +11 -1
- data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
- data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
- data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
- data/ext/{ggml → sources/ggml}/include/ggml.h +325 -269
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +404 -0
- data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
- data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +92 -53
- data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +69 -34
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +75 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
- data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +140 -1
- data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +588 -146
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
- data/ext/{ggml → sources/ggml}/src/ggml-common.h +16 -8
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +597 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +3 -2
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- data/ext/{ggml/src/ggml-cpu/cpu-feats-x86.cpp → sources/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp} +5 -1
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +3285 -0
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +73 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +172 -41
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3551 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +78 -25
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.cpp → sources/ggml/src/ggml-cpu/hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3594 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +9786 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +118 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-quants.h → sources/ggml/src/ggml-cpu/quants.h} +26 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +1184 -0
- data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.cpp → sources/ggml/src/ggml-cpu/traits.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +345 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +1027 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +851 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +752 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +31 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +638 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3647 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +506 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +26 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +378 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +66 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-impl.h +147 -158
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +121 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +649 -0
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2504 -1108
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +2102 -1463
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +110 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +6494 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
- data/ext/{ggml → sources/ggml}/src/ggml-quants.c +120 -128
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +494 -84
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +344 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +561 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +56 -70
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +8 -12
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +575 -0
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +839 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +823 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +188 -67
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1120 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +84 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +102 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +212 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1197 -1295
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +60 -81
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +482 -0
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +111 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +472 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +38 -28
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +15 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +6 -11
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +289 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +200 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3822 -1335
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +61 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +203 -36
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/{ggml → sources/ggml}/src/ggml.c +918 -1782
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +1351 -0
- data/ext/{include → sources/include}/whisper.h +70 -2
- data/ext/sources/src/CMakeLists.txt +145 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +36 -10
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +29 -3
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{src → sources/src}/whisper.cpp +1966 -386
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +39 -5
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +202 -126
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +510 -0
- data/test/helper.rb +24 -0
- data/{tests → test}/test_callback.rb +45 -3
- data/{tests → test}/test_error.rb +2 -2
- data/{tests → test}/test_model.rb +47 -0
- data/test/test_package.rb +51 -0
- data/test/test_params.rb +297 -0
- data/test/test_segment.rb +146 -0
- data/test/test_vad.rb +19 -0
- data/test/test_vad_params.rb +103 -0
- data/{tests → test}/test_whisper.rb +106 -36
- data/whispercpp.gemspec +5 -5
- metadata +837 -134
- data/ext/cpu.mk +0 -9
- data/ext/examples/dr_wav.h +0 -8815
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -10835
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
- data/ext/ggml/src/ggml-sycl/convert.cpp +0 -547
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
- data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +0 -1015
- data/ext/ggml/src/ggml-sycl/norm.cpp +0 -378
- data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
- data/ext/metal-embed.mk +0 -17
- data/ext/metal.mk +0 -6
- data/ext/ruby_whisper.cpp +0 -1909
- data/ext/scripts/get-flags.mk +0 -38
- data/lib/whisper.rb +0 -2
- data/tests/helper.rb +0 -7
- data/tests/test_package.rb +0 -31
- data/tests/test_params.rb +0 -160
- data/tests/test_segment.rb +0 -83
- /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
- /data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.h → sources/ggml/src/ggml-cpu/hbm.h} +0 -0
- /data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.h → sources/ggml/src/ggml-cpu/traits.h} +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
- /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
@@ -19,16 +19,10 @@
|
|
19
19
|
#define GROUP_MAX_EPS_IQ1_M 1e-7f
|
20
20
|
#define GROUP_MAX_EPS_IQ1_S 1e-12f
|
21
21
|
|
22
|
-
#if defined(_MSC_VER)
|
23
|
-
// disable "possible loss of data" to avoid warnings for hundreds of casts
|
24
|
-
// we should just be careful :)
|
25
|
-
#pragma warning(disable: 4244 4267)
|
26
|
-
#endif
|
27
|
-
|
28
22
|
#define UNUSED GGML_UNUSED
|
29
23
|
|
30
24
|
// reference implementation for deterministic creation of model files
|
31
|
-
void quantize_row_q4_0_ref(const float *
|
25
|
+
void quantize_row_q4_0_ref(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k) {
|
32
26
|
static const int qk = QK4_0;
|
33
27
|
|
34
28
|
assert(k % qk == 0);
|
@@ -65,7 +59,7 @@ void quantize_row_q4_0_ref(const float * restrict x, block_q4_0 * restrict y, in
|
|
65
59
|
}
|
66
60
|
}
|
67
61
|
|
68
|
-
void quantize_row_q4_1_ref(const float *
|
62
|
+
void quantize_row_q4_1_ref(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k) {
|
69
63
|
const int qk = QK4_1;
|
70
64
|
|
71
65
|
assert(k % qk == 0);
|
@@ -102,7 +96,7 @@ void quantize_row_q4_1_ref(const float * restrict x, block_q4_1 * restrict y, in
|
|
102
96
|
}
|
103
97
|
}
|
104
98
|
|
105
|
-
void quantize_row_q5_0_ref(const float *
|
99
|
+
void quantize_row_q5_0_ref(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k) {
|
106
100
|
static const int qk = QK5_0;
|
107
101
|
|
108
102
|
assert(k % qk == 0);
|
@@ -146,7 +140,7 @@ void quantize_row_q5_0_ref(const float * restrict x, block_q5_0 * restrict y, in
|
|
146
140
|
}
|
147
141
|
}
|
148
142
|
|
149
|
-
void quantize_row_q5_1_ref(const float *
|
143
|
+
void quantize_row_q5_1_ref(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k) {
|
150
144
|
const int qk = QK5_1;
|
151
145
|
|
152
146
|
assert(k % qk == 0);
|
@@ -191,7 +185,7 @@ void quantize_row_q5_1_ref(const float * restrict x, block_q5_1 * restrict y, in
|
|
191
185
|
}
|
192
186
|
|
193
187
|
// reference implementation for deterministic creation of model files
|
194
|
-
void quantize_row_q8_0_ref(const float *
|
188
|
+
void quantize_row_q8_0_ref(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k) {
|
195
189
|
assert(k % QK8_0 == 0);
|
196
190
|
const int nb = k / QK8_0;
|
197
191
|
|
@@ -217,7 +211,7 @@ void quantize_row_q8_0_ref(const float * restrict x, block_q8_0 * restrict y, in
|
|
217
211
|
}
|
218
212
|
|
219
213
|
// reference implementation for deterministic creation of model files
|
220
|
-
void quantize_row_q8_1_ref(const float *
|
214
|
+
void quantize_row_q8_1_ref(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k) {
|
221
215
|
assert(QK8_1 == 32);
|
222
216
|
assert(k % QK8_1 == 0);
|
223
217
|
const int nb = k / QK8_1;
|
@@ -252,7 +246,7 @@ void quantize_row_q8_1_ref(const float * restrict x, block_q8_1 * restrict y, in
|
|
252
246
|
}
|
253
247
|
}
|
254
248
|
|
255
|
-
void dequantize_row_q4_0(const block_q4_0 *
|
249
|
+
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
256
250
|
static const int qk = QK4_0;
|
257
251
|
|
258
252
|
assert(k % qk == 0);
|
@@ -272,7 +266,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int6
|
|
272
266
|
}
|
273
267
|
}
|
274
268
|
|
275
|
-
void dequantize_row_q4_1(const block_q4_1 *
|
269
|
+
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
276
270
|
static const int qk = QK4_1;
|
277
271
|
|
278
272
|
assert(k % qk == 0);
|
@@ -293,7 +287,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int6
|
|
293
287
|
}
|
294
288
|
}
|
295
289
|
|
296
|
-
void dequantize_row_q5_0(const block_q5_0 *
|
290
|
+
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
297
291
|
static const int qk = QK5_0;
|
298
292
|
|
299
293
|
assert(k % qk == 0);
|
@@ -319,7 +313,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int6
|
|
319
313
|
}
|
320
314
|
}
|
321
315
|
|
322
|
-
void dequantize_row_q5_1(const block_q5_1 *
|
316
|
+
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
323
317
|
static const int qk = QK5_1;
|
324
318
|
|
325
319
|
assert(k % qk == 0);
|
@@ -346,7 +340,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int6
|
|
346
340
|
}
|
347
341
|
}
|
348
342
|
|
349
|
-
void dequantize_row_q8_0(const block_q8_0 *
|
343
|
+
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
350
344
|
static const int qk = QK8_0;
|
351
345
|
|
352
346
|
assert(k % qk == 0);
|
@@ -376,8 +370,8 @@ static inline int nearest_int(float fval) {
|
|
376
370
|
return (i & 0x007fffff) - 0x00400000;
|
377
371
|
}
|
378
372
|
|
379
|
-
static float make_qx_quants(int n, int nmax, const float *
|
380
|
-
const float *
|
373
|
+
static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
|
374
|
+
const float * GGML_RESTRICT qw) {
|
381
375
|
float max = 0;
|
382
376
|
float amax = 0;
|
383
377
|
for (int i = 0; i < n; ++i) {
|
@@ -445,7 +439,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
445
439
|
return scale;
|
446
440
|
}
|
447
441
|
|
448
|
-
static float make_q3_quants(int n, int nmax, const float *
|
442
|
+
static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
|
449
443
|
float max = 0;
|
450
444
|
float amax = 0;
|
451
445
|
for (int i = 0; i < n; ++i) {
|
@@ -504,7 +498,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
504
498
|
return 1/iscale;
|
505
499
|
}
|
506
500
|
|
507
|
-
static float make_qkx1_quants(int n, int nmax, const float *
|
501
|
+
static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
|
508
502
|
int ntry, float alpha) {
|
509
503
|
float min = x[0];
|
510
504
|
float max = x[0];
|
@@ -547,8 +541,8 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
|
547
541
|
return scale;
|
548
542
|
}
|
549
543
|
|
550
|
-
static float make_qkx2_quants(int n, int nmax, const float *
|
551
|
-
uint8_t *
|
544
|
+
static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
545
|
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
552
546
|
float rmin, float rdelta, int nstep, bool use_mad) {
|
553
547
|
float min = x[0];
|
554
548
|
float max = x[0];
|
@@ -574,14 +568,14 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
574
568
|
}
|
575
569
|
float iscale = nmax/(max - min);
|
576
570
|
float scale = 1/iscale;
|
577
|
-
float
|
571
|
+
float best_error = 0;
|
578
572
|
for (int i = 0; i < n; ++i) {
|
579
573
|
int l = nearest_int(iscale*(x[i] - min));
|
580
574
|
L[i] = MAX(0, MIN(nmax, l));
|
581
575
|
float diff = scale * L[i] + min - x[i];
|
582
576
|
diff = use_mad ? fabsf(diff) : diff * diff;
|
583
577
|
float w = weights[i];
|
584
|
-
|
578
|
+
best_error += w * diff;
|
585
579
|
}
|
586
580
|
if (nstep < 1) {
|
587
581
|
*the_min = -min;
|
@@ -607,18 +601,18 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
607
601
|
this_min = 0;
|
608
602
|
this_scale = sum_xl / sum_l2;
|
609
603
|
}
|
610
|
-
float
|
604
|
+
float cur_error = 0;
|
611
605
|
for (int i = 0; i < n; ++i) {
|
612
606
|
float diff = this_scale * Laux[i] + this_min - x[i];
|
613
607
|
diff = use_mad ? fabsf(diff) : diff * diff;
|
614
608
|
float w = weights[i];
|
615
|
-
|
609
|
+
cur_error += w * diff;
|
616
610
|
}
|
617
|
-
if (
|
611
|
+
if (cur_error < best_error) {
|
618
612
|
for (int i = 0; i < n; ++i) {
|
619
613
|
L[i] = Laux[i];
|
620
614
|
}
|
621
|
-
|
615
|
+
best_error = cur_error;
|
622
616
|
scale = this_scale;
|
623
617
|
min = this_min;
|
624
618
|
}
|
@@ -628,7 +622,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
628
622
|
return scale;
|
629
623
|
}
|
630
624
|
|
631
|
-
static inline void get_scale_min_k4(int j, const uint8_t *
|
625
|
+
static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
|
632
626
|
if (j < 4) {
|
633
627
|
*d = q[j] & 63; *m = q[j + 4] & 63;
|
634
628
|
} else {
|
@@ -639,7 +633,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
|
|
639
633
|
|
640
634
|
//========================- 2-bit (de)-quantization
|
641
635
|
|
642
|
-
void quantize_row_q2_K_ref(const float *
|
636
|
+
void quantize_row_q2_K_ref(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k) {
|
643
637
|
assert(k % QK_K == 0);
|
644
638
|
const int nb = k / QK_K;
|
645
639
|
|
@@ -709,7 +703,7 @@ void quantize_row_q2_K_ref(const float * restrict x, block_q2_K * restrict y, in
|
|
709
703
|
}
|
710
704
|
}
|
711
705
|
|
712
|
-
void dequantize_row_q2_K(const block_q2_K *
|
706
|
+
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
713
707
|
assert(k % QK_K == 0);
|
714
708
|
const int nb = k / QK_K;
|
715
709
|
|
@@ -741,8 +735,8 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int6
|
|
741
735
|
}
|
742
736
|
}
|
743
737
|
|
744
|
-
static float make_qkx3_quants(int n, int nmax, const float *
|
745
|
-
uint8_t *
|
738
|
+
static float make_qkx3_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
739
|
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
746
740
|
float rmin, float rdelta, int nstep, bool use_mad) {
|
747
741
|
float min = x[0];
|
748
742
|
float max = x[0];
|
@@ -824,7 +818,7 @@ static float make_qkx3_quants(int n, int nmax, const float * restrict x, const f
|
|
824
818
|
return scale;
|
825
819
|
}
|
826
820
|
|
827
|
-
static float make_qp_quants(int n, int nmax, const float *
|
821
|
+
static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, const float * quant_weights) {
|
828
822
|
float max = 0;
|
829
823
|
for (int i = 0; i < n; ++i) {
|
830
824
|
max = MAX(max, x[i]);
|
@@ -897,7 +891,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
|
|
897
891
|
return sumlx/suml2;
|
898
892
|
}
|
899
893
|
|
900
|
-
static void quantize_row_q2_K_impl(const float *
|
894
|
+
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
|
901
895
|
GGML_ASSERT(quant_weights);
|
902
896
|
assert(k % QK_K == 0);
|
903
897
|
const int nb = k / QK_K;
|
@@ -917,7 +911,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
917
911
|
for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
|
918
912
|
float sigma2 = sumx2/QK_K;
|
919
913
|
for (int j = 0; j < QK_K/16; ++j) {
|
920
|
-
const float *
|
914
|
+
const float * GGML_RESTRICT qw = quant_weights + QK_K * i + 16*j;
|
921
915
|
for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
|
922
916
|
for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
|
923
917
|
scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
|
@@ -959,7 +953,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
959
953
|
}
|
960
954
|
}
|
961
955
|
|
962
|
-
size_t quantize_q2_K(const float *
|
956
|
+
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
963
957
|
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
|
964
958
|
if (!quant_weights) {
|
965
959
|
quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -977,7 +971,7 @@ size_t quantize_q2_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
977
971
|
|
978
972
|
//========================= 3-bit (de)-quantization
|
979
973
|
|
980
|
-
void quantize_row_q3_K_ref(const float *
|
974
|
+
void quantize_row_q3_K_ref(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k) {
|
981
975
|
assert(k % QK_K == 0);
|
982
976
|
const int nb = k / QK_K;
|
983
977
|
|
@@ -1053,7 +1047,7 @@ void quantize_row_q3_K_ref(const float * restrict x, block_q3_K * restrict y, in
|
|
1053
1047
|
}
|
1054
1048
|
}
|
1055
1049
|
|
1056
|
-
void dequantize_row_q3_K(const block_q3_K *
|
1050
|
+
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1057
1051
|
assert(k % QK_K == 0);
|
1058
1052
|
const int nb = k / QK_K;
|
1059
1053
|
|
@@ -1067,8 +1061,8 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
|
|
1067
1061
|
|
1068
1062
|
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
1069
1063
|
|
1070
|
-
const uint8_t *
|
1071
|
-
const uint8_t *
|
1064
|
+
const uint8_t * GGML_RESTRICT q = x[i].qs;
|
1065
|
+
const uint8_t * GGML_RESTRICT hm = x[i].hmask;
|
1072
1066
|
uint8_t m = 1;
|
1073
1067
|
|
1074
1068
|
memcpy(aux, x[i].scales, 12);
|
@@ -1103,7 +1097,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
|
|
1103
1097
|
}
|
1104
1098
|
}
|
1105
1099
|
|
1106
|
-
static void quantize_row_q3_K_impl(const float *
|
1100
|
+
static void quantize_row_q3_K_impl(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t n_per_row, const float * GGML_RESTRICT quant_weights) {
|
1107
1101
|
assert(n_per_row % QK_K == 0);
|
1108
1102
|
const int nb = n_per_row / QK_K;
|
1109
1103
|
|
@@ -1187,7 +1181,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
|
|
1187
1181
|
}
|
1188
1182
|
}
|
1189
1183
|
|
1190
|
-
size_t quantize_q3_K(const float *
|
1184
|
+
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1191
1185
|
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
|
1192
1186
|
if (!quant_weights) {
|
1193
1187
|
quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1205,7 +1199,7 @@ size_t quantize_q3_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1205
1199
|
|
1206
1200
|
// ====================== 4-bit (de)-quantization
|
1207
1201
|
|
1208
|
-
void quantize_row_q4_K_ref(const float *
|
1202
|
+
void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k) {
|
1209
1203
|
assert(k % QK_K == 0);
|
1210
1204
|
const int nb = k / QK_K;
|
1211
1205
|
|
@@ -1277,7 +1271,7 @@ void quantize_row_q4_K_ref(const float * restrict x, block_q4_K * restrict y, in
|
|
1277
1271
|
}
|
1278
1272
|
}
|
1279
1273
|
|
1280
|
-
void dequantize_row_q4_K(const block_q4_K *
|
1274
|
+
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1281
1275
|
assert(k % QK_K == 0);
|
1282
1276
|
const int nb = k / QK_K;
|
1283
1277
|
|
@@ -1301,7 +1295,7 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int6
|
|
1301
1295
|
}
|
1302
1296
|
}
|
1303
1297
|
|
1304
|
-
static void quantize_row_q4_K_impl(const float *
|
1298
|
+
static void quantize_row_q4_K_impl(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1305
1299
|
assert(n_per_row % QK_K == 0);
|
1306
1300
|
const int64_t nb = n_per_row / QK_K;
|
1307
1301
|
|
@@ -1374,7 +1368,7 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
|
|
1374
1368
|
}
|
1375
1369
|
}
|
1376
1370
|
|
1377
|
-
size_t quantize_q4_K(const float *
|
1371
|
+
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1378
1372
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
|
1379
1373
|
if (!quant_weights) {
|
1380
1374
|
quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1392,7 +1386,7 @@ size_t quantize_q4_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1392
1386
|
|
1393
1387
|
// ====================== 5-bit (de)-quantization
|
1394
1388
|
|
1395
|
-
void quantize_row_q5_K_ref(const float *
|
1389
|
+
void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k) {
|
1396
1390
|
assert(k % QK_K == 0);
|
1397
1391
|
const int64_t nb = k / QK_K;
|
1398
1392
|
|
@@ -1454,8 +1448,8 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
|
|
1454
1448
|
}
|
1455
1449
|
}
|
1456
1450
|
|
1457
|
-
uint8_t *
|
1458
|
-
uint8_t *
|
1451
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1452
|
+
uint8_t * GGML_RESTRICT ql = y[i].qs;
|
1459
1453
|
memset(qh, 0, QK_K/8);
|
1460
1454
|
|
1461
1455
|
uint8_t m1 = 1, m2 = 2;
|
@@ -1479,7 +1473,7 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
|
|
1479
1473
|
}
|
1480
1474
|
}
|
1481
1475
|
|
1482
|
-
void dequantize_row_q5_K(const block_q5_K *
|
1476
|
+
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1483
1477
|
assert(k % QK_K == 0);
|
1484
1478
|
const int64_t nb = k / QK_K;
|
1485
1479
|
|
@@ -1506,7 +1500,7 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int6
|
|
1506
1500
|
}
|
1507
1501
|
}
|
1508
1502
|
|
1509
|
-
static void quantize_row_q5_K_impl(const float *
|
1503
|
+
static void quantize_row_q5_K_impl(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1510
1504
|
assert(n_per_row % QK_K == 0);
|
1511
1505
|
const int64_t nb = n_per_row / QK_K;
|
1512
1506
|
|
@@ -1573,8 +1567,8 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
1573
1567
|
}
|
1574
1568
|
}
|
1575
1569
|
|
1576
|
-
uint8_t *
|
1577
|
-
uint8_t *
|
1570
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1571
|
+
uint8_t * GGML_RESTRICT ql = y[i].qs;
|
1578
1572
|
memset(qh, 0, QK_K/8);
|
1579
1573
|
|
1580
1574
|
uint8_t m1 = 1, m2 = 2;
|
@@ -1599,7 +1593,7 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
1599
1593
|
}
|
1600
1594
|
}
|
1601
1595
|
|
1602
|
-
size_t quantize_q5_K(const float *
|
1596
|
+
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1603
1597
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
|
1604
1598
|
if (!quant_weights) {
|
1605
1599
|
quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1617,7 +1611,7 @@ size_t quantize_q5_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1617
1611
|
|
1618
1612
|
// ====================== 6-bit (de)-quantization
|
1619
1613
|
|
1620
|
-
void quantize_row_q6_K_ref(const float *
|
1614
|
+
void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k) {
|
1621
1615
|
assert(k % QK_K == 0);
|
1622
1616
|
const int64_t nb = k / QK_K;
|
1623
1617
|
|
@@ -1667,8 +1661,8 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
|
|
1667
1661
|
}
|
1668
1662
|
}
|
1669
1663
|
|
1670
|
-
uint8_t *
|
1671
|
-
uint8_t *
|
1664
|
+
uint8_t * GGML_RESTRICT ql = y[i].ql;
|
1665
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1672
1666
|
for (int j = 0; j < QK_K; j += 128) {
|
1673
1667
|
for (int l = 0; l < 32; ++l) {
|
1674
1668
|
const uint8_t q1 = L[j + l + 0] & 0xF;
|
@@ -1687,16 +1681,16 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
|
|
1687
1681
|
}
|
1688
1682
|
}
|
1689
1683
|
|
1690
|
-
void dequantize_row_q6_K(const block_q6_K *
|
1684
|
+
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
1691
1685
|
assert(k % QK_K == 0);
|
1692
1686
|
const int64_t nb = k / QK_K;
|
1693
1687
|
|
1694
1688
|
for (int i = 0; i < nb; i++) {
|
1695
1689
|
const float d = GGML_FP16_TO_FP32(x[i].d);
|
1696
1690
|
|
1697
|
-
const uint8_t *
|
1698
|
-
const uint8_t *
|
1699
|
-
const int8_t *
|
1691
|
+
const uint8_t * GGML_RESTRICT ql = x[i].ql;
|
1692
|
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
1693
|
+
const int8_t * GGML_RESTRICT sc = x[i].scales;
|
1700
1694
|
|
1701
1695
|
for (int n = 0; n < QK_K; n += 128) {
|
1702
1696
|
for (int l = 0; l < 32; ++l) {
|
@@ -1718,7 +1712,7 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int6
|
|
1718
1712
|
}
|
1719
1713
|
}
|
1720
1714
|
|
1721
|
-
static void quantize_row_q6_K_impl(const float *
|
1715
|
+
static void quantize_row_q6_K_impl(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1722
1716
|
assert(n_per_row % QK_K == 0);
|
1723
1717
|
const int64_t nb = n_per_row / QK_K;
|
1724
1718
|
|
@@ -1781,8 +1775,8 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
1781
1775
|
}
|
1782
1776
|
}
|
1783
1777
|
|
1784
|
-
uint8_t *
|
1785
|
-
uint8_t *
|
1778
|
+
uint8_t * GGML_RESTRICT ql = y[i].ql;
|
1779
|
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
1786
1780
|
for (int j = 0; j < QK_K; j += 128) {
|
1787
1781
|
for (int l = 0; l < 32; ++l) {
|
1788
1782
|
const uint8_t q1 = L[j + l + 0] & 0xF;
|
@@ -1802,7 +1796,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
1802
1796
|
}
|
1803
1797
|
}
|
1804
1798
|
|
1805
|
-
size_t quantize_q6_K(const float *
|
1799
|
+
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1806
1800
|
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
|
1807
1801
|
if (!quant_weights) {
|
1808
1802
|
quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -1818,7 +1812,7 @@ size_t quantize_q6_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
1818
1812
|
return nrow * row_size;
|
1819
1813
|
}
|
1820
1814
|
|
1821
|
-
static void quantize_row_q4_0_impl(const float *
|
1815
|
+
static void quantize_row_q4_0_impl(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1822
1816
|
static_assert(QK4_0 == 32, "QK4_0 must be 32");
|
1823
1817
|
|
1824
1818
|
if (!quant_weights) {
|
@@ -1846,7 +1840,7 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
|
|
1846
1840
|
}
|
1847
1841
|
}
|
1848
1842
|
|
1849
|
-
size_t quantize_q4_0(const float *
|
1843
|
+
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1850
1844
|
if (!quant_weights) {
|
1851
1845
|
quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
1852
1846
|
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
@@ -1861,7 +1855,7 @@ size_t quantize_q4_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
1861
1855
|
return nrow * row_size;
|
1862
1856
|
}
|
1863
1857
|
|
1864
|
-
static void quantize_row_q4_1_impl(const float *
|
1858
|
+
static void quantize_row_q4_1_impl(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1865
1859
|
static_assert(QK4_1 == 32, "QK4_1 must be 32");
|
1866
1860
|
|
1867
1861
|
if (!quant_weights) {
|
@@ -1891,7 +1885,7 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
|
|
1891
1885
|
}
|
1892
1886
|
}
|
1893
1887
|
|
1894
|
-
size_t quantize_q4_1(const float *
|
1888
|
+
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1895
1889
|
if (!quant_weights) {
|
1896
1890
|
quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
1897
1891
|
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
@@ -1906,7 +1900,7 @@ size_t quantize_q4_1(const float * restrict src, void * restrict dst, int64_t nr
|
|
1906
1900
|
return nrow * row_size;
|
1907
1901
|
}
|
1908
1902
|
|
1909
|
-
static void quantize_row_q5_0_impl(const float *
|
1903
|
+
static void quantize_row_q5_0_impl(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1910
1904
|
static_assert(QK5_0 == 32, "QK5_0 must be 32");
|
1911
1905
|
|
1912
1906
|
if (!quant_weights) {
|
@@ -1945,7 +1939,7 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
|
|
1945
1939
|
}
|
1946
1940
|
}
|
1947
1941
|
|
1948
|
-
size_t quantize_q5_0(const float *
|
1942
|
+
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1949
1943
|
if (!quant_weights) {
|
1950
1944
|
quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
1951
1945
|
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
@@ -1960,7 +1954,7 @@ size_t quantize_q5_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
1960
1954
|
return nrow * row_size;
|
1961
1955
|
}
|
1962
1956
|
|
1963
|
-
static void quantize_row_q5_1_impl(const float *
|
1957
|
+
static void quantize_row_q5_1_impl(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
1964
1958
|
static_assert(QK5_1 == 32, "QK5_1 must be 32");
|
1965
1959
|
|
1966
1960
|
if (!quant_weights) {
|
@@ -1998,7 +1992,7 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
|
|
1998
1992
|
}
|
1999
1993
|
}
|
2000
1994
|
|
2001
|
-
size_t quantize_q5_1(const float *
|
1995
|
+
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2002
1996
|
if (!quant_weights) {
|
2003
1997
|
quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
2004
1998
|
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
@@ -2013,7 +2007,7 @@ size_t quantize_q5_1(const float * restrict src, void * restrict dst, int64_t nr
|
|
2013
2007
|
return nrow * row_size;
|
2014
2008
|
}
|
2015
2009
|
|
2016
|
-
size_t quantize_q8_0(const float *
|
2010
|
+
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2017
2011
|
(void)quant_weights; // not used
|
2018
2012
|
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
|
2019
2013
|
quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
@@ -2022,7 +2016,7 @@ size_t quantize_q8_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
2022
2016
|
|
2023
2017
|
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
|
2024
2018
|
|
2025
|
-
void quantize_row_tq1_0_ref(const float *
|
2019
|
+
void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k) {
|
2026
2020
|
assert(k % QK_K == 0);
|
2027
2021
|
const int64_t nb = k / QK_K;
|
2028
2022
|
|
@@ -2088,7 +2082,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
|
|
2088
2082
|
}
|
2089
2083
|
}
|
2090
2084
|
|
2091
|
-
void quantize_row_tq2_0_ref(const float *
|
2085
|
+
void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k) {
|
2092
2086
|
assert(k % QK_K == 0);
|
2093
2087
|
const int64_t nb = k / QK_K;
|
2094
2088
|
|
@@ -2120,21 +2114,21 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y,
|
|
2120
2114
|
}
|
2121
2115
|
}
|
2122
2116
|
|
2123
|
-
size_t quantize_tq1_0(const float *
|
2117
|
+
size_t quantize_tq1_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2124
2118
|
(void)quant_weights; // not used
|
2125
2119
|
const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
|
2126
2120
|
quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
2127
2121
|
return nrow * row_size;
|
2128
2122
|
}
|
2129
2123
|
|
2130
|
-
size_t quantize_tq2_0(const float *
|
2124
|
+
size_t quantize_tq2_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2131
2125
|
(void)quant_weights; // not used
|
2132
2126
|
const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
|
2133
2127
|
quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
2134
2128
|
return nrow * row_size;
|
2135
2129
|
}
|
2136
2130
|
|
2137
|
-
void dequantize_row_tq1_0(const block_tq1_0 *
|
2131
|
+
void dequantize_row_tq1_0(const block_tq1_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2138
2132
|
assert(k % QK_K == 0);
|
2139
2133
|
const int64_t nb = k / QK_K;
|
2140
2134
|
|
@@ -2173,7 +2167,7 @@ void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, in
|
|
2173
2167
|
}
|
2174
2168
|
}
|
2175
2169
|
|
2176
|
-
void dequantize_row_tq2_0(const block_tq2_0 *
|
2170
|
+
void dequantize_row_tq2_0(const block_tq2_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2177
2171
|
assert(k % QK_K == 0);
|
2178
2172
|
const int64_t nb = k / QK_K;
|
2179
2173
|
|
@@ -2194,7 +2188,7 @@ void dequantize_row_tq2_0(const block_tq2_0 * restrict x, float * restrict y, in
|
|
2194
2188
|
|
2195
2189
|
// ====================== "True" 2-bit (de)-quantization
|
2196
2190
|
|
2197
|
-
void dequantize_row_iq2_xxs(const block_iq2_xxs *
|
2191
|
+
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2198
2192
|
assert(k % QK_K == 0);
|
2199
2193
|
const int64_t nb = k / QK_K;
|
2200
2194
|
|
@@ -2222,7 +2216,7 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
|
|
2222
2216
|
|
2223
2217
|
// ====================== 2.3125 bpw (de)-quantization
|
2224
2218
|
|
2225
|
-
void dequantize_row_iq2_xs(const block_iq2_xs *
|
2219
|
+
void dequantize_row_iq2_xs(const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2226
2220
|
assert(k % QK_K == 0);
|
2227
2221
|
const int64_t nb = k / QK_K;
|
2228
2222
|
|
@@ -2249,7 +2243,7 @@ void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y,
|
|
2249
2243
|
|
2250
2244
|
// ====================== 2.5625 bpw (de)-quantization
|
2251
2245
|
|
2252
|
-
void dequantize_row_iq2_s(const block_iq2_s *
|
2246
|
+
void dequantize_row_iq2_s(const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2253
2247
|
assert(k % QK_K == 0);
|
2254
2248
|
const int64_t nb = k / QK_K;
|
2255
2249
|
|
@@ -2281,7 +2275,7 @@ void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, in
|
|
2281
2275
|
|
2282
2276
|
// ====================== 3.0625 bpw (de)-quantization
|
2283
2277
|
|
2284
|
-
void dequantize_row_iq3_xxs(const block_iq3_xxs *
|
2278
|
+
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2285
2279
|
assert(k % QK_K == 0);
|
2286
2280
|
const int64_t nb = k / QK_K;
|
2287
2281
|
|
@@ -2313,7 +2307,7 @@ void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y
|
|
2313
2307
|
|
2314
2308
|
// ====================== 3.3125 bpw (de)-quantization
|
2315
2309
|
|
2316
|
-
void dequantize_row_iq3_s(const block_iq3_s *
|
2310
|
+
void dequantize_row_iq3_s(const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2317
2311
|
assert(k % QK_K == 0);
|
2318
2312
|
const int64_t nb = k / QK_K;
|
2319
2313
|
|
@@ -2356,7 +2350,7 @@ void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, in
|
|
2356
2350
|
|
2357
2351
|
// ====================== 1.5625 bpw (de)-quantization
|
2358
2352
|
|
2359
|
-
void dequantize_row_iq1_s(const block_iq1_s *
|
2353
|
+
void dequantize_row_iq1_s(const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2360
2354
|
assert(k % QK_K == 0);
|
2361
2355
|
const int64_t nb = k / QK_K;
|
2362
2356
|
|
@@ -2381,7 +2375,7 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
|
|
2381
2375
|
}
|
2382
2376
|
}
|
2383
2377
|
|
2384
|
-
void dequantize_row_iq1_m(const block_iq1_m *
|
2378
|
+
void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2385
2379
|
assert(k % QK_K == 0);
|
2386
2380
|
const int64_t nb = k / QK_K;
|
2387
2381
|
|
@@ -2431,9 +2425,7 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
|
|
2431
2425
|
}
|
2432
2426
|
}
|
2433
2427
|
|
2434
|
-
|
2435
|
-
|
2436
|
-
void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y, int64_t k) {
|
2428
|
+
void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2437
2429
|
assert(k % QK4_NL == 0);
|
2438
2430
|
const int64_t nb = k / QK4_NL;
|
2439
2431
|
|
@@ -2451,7 +2443,7 @@ void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
|
|
2451
2443
|
}
|
2452
2444
|
}
|
2453
2445
|
|
2454
|
-
void dequantize_row_iq4_xs(const block_iq4_xs *
|
2446
|
+
void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2455
2447
|
assert(k % QK_K == 0);
|
2456
2448
|
const int64_t nb = k / QK_K;
|
2457
2449
|
|
@@ -2476,7 +2468,7 @@ void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
|
|
2476
2468
|
|
2477
2469
|
//===================================== Q8_K ==============================================
|
2478
2470
|
|
2479
|
-
void quantize_row_q8_K_ref(const float *
|
2471
|
+
void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
|
2480
2472
|
assert(k % QK_K == 0);
|
2481
2473
|
const int64_t nb = k / QK_K;
|
2482
2474
|
|
@@ -2515,7 +2507,7 @@ void quantize_row_q8_K_ref(const float * restrict x, block_q8_K * restrict y, in
|
|
2515
2507
|
}
|
2516
2508
|
}
|
2517
2509
|
|
2518
|
-
void dequantize_row_q8_K(const block_q8_K *
|
2510
|
+
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
2519
2511
|
assert(k % QK_K == 0);
|
2520
2512
|
const int64_t nb = k / QK_K;
|
2521
2513
|
|
@@ -2927,8 +2919,8 @@ void iq2xs_free_impl(enum ggml_type type) {
|
|
2927
2919
|
}
|
2928
2920
|
}
|
2929
2921
|
|
2930
|
-
static int iq2_find_best_neighbour(const uint16_t *
|
2931
|
-
const float *
|
2922
|
+
static int iq2_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
2923
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
|
2932
2924
|
int num_neighbors = neighbours[0];
|
2933
2925
|
GGML_ASSERT(num_neighbors > 0);
|
2934
2926
|
float best_d2 = FLT_MAX;
|
@@ -2951,7 +2943,7 @@ static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
2951
2943
|
return grid_index;
|
2952
2944
|
}
|
2953
2945
|
|
2954
|
-
static void quantize_row_iq2_xxs_impl(const float *
|
2946
|
+
static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
2955
2947
|
|
2956
2948
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
|
2957
2949
|
|
@@ -3124,7 +3116,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|
3124
3116
|
}
|
3125
3117
|
}
|
3126
3118
|
|
3127
|
-
static void quantize_row_iq2_xs_impl(const float *
|
3119
|
+
static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
3128
3120
|
|
3129
3121
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
|
3130
3122
|
|
@@ -3304,7 +3296,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
3304
3296
|
}
|
3305
3297
|
}
|
3306
3298
|
|
3307
|
-
size_t quantize_iq2_xxs(const float *
|
3299
|
+
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3308
3300
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3309
3301
|
int64_t nblock = n_per_row/QK_K;
|
3310
3302
|
char * qrow = (char *)dst;
|
@@ -3316,7 +3308,7 @@ size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int64_t
|
|
3316
3308
|
return nrow * nblock * sizeof(block_iq2_xxs);
|
3317
3309
|
}
|
3318
3310
|
|
3319
|
-
size_t quantize_iq2_xs(const float *
|
3311
|
+
size_t quantize_iq2_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3320
3312
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3321
3313
|
int64_t nblock = n_per_row/QK_K;
|
3322
3314
|
char * qrow = (char *)dst;
|
@@ -3521,8 +3513,8 @@ void iq3xs_free_impl(int grid_size) {
|
|
3521
3513
|
}
|
3522
3514
|
}
|
3523
3515
|
|
3524
|
-
static int iq3_find_best_neighbour(const uint16_t *
|
3525
|
-
const float *
|
3516
|
+
static int iq3_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint32_t * GGML_RESTRICT grid,
|
3517
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
|
3526
3518
|
int num_neighbors = neighbours[0];
|
3527
3519
|
GGML_ASSERT(num_neighbors > 0);
|
3528
3520
|
float best_d2 = FLT_MAX;
|
@@ -3545,8 +3537,8 @@ static int iq3_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
3545
3537
|
return grid_index;
|
3546
3538
|
}
|
3547
3539
|
|
3548
|
-
static void quantize_row_iq3_xxs_impl(int grid_size, const float *
|
3549
|
-
const float *
|
3540
|
+
static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n,
|
3541
|
+
const float * GGML_RESTRICT quant_weights) {
|
3550
3542
|
|
3551
3543
|
const int gindex = iq3_data_index(grid_size);
|
3552
3544
|
|
@@ -3758,7 +3750,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
3758
3750
|
}
|
3759
3751
|
}
|
3760
3752
|
|
3761
|
-
size_t quantize_iq3_xxs(const float *
|
3753
|
+
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3762
3754
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3763
3755
|
int64_t nblock = n_per_row/QK_K;
|
3764
3756
|
char * qrow = (char *)dst;
|
@@ -3770,13 +3762,13 @@ size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int64_t
|
|
3770
3762
|
return nrow * nblock * sizeof(block_iq3_xxs);
|
3771
3763
|
}
|
3772
3764
|
|
3773
|
-
void quantize_row_iq3_xxs_ref(const float *
|
3765
|
+
void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k) {
|
3774
3766
|
assert(k % QK_K == 0);
|
3775
3767
|
quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
|
3776
3768
|
}
|
3777
3769
|
|
3778
|
-
static void quantize_row_iq3_s_impl(int block_size, const float *
|
3779
|
-
const float *
|
3770
|
+
static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int n,
|
3771
|
+
const float * GGML_RESTRICT quant_weights,
|
3780
3772
|
float * scales,
|
3781
3773
|
float * weight,
|
3782
3774
|
float * xval,
|
@@ -3958,7 +3950,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
3958
3950
|
}
|
3959
3951
|
|
3960
3952
|
#define IQ3S_BLOCK_SIZE 32
|
3961
|
-
size_t quantize_iq3_s(const float *
|
3953
|
+
size_t quantize_iq3_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3962
3954
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
3963
3955
|
int64_t nblock = n_per_row/QK_K;
|
3964
3956
|
float scales[QK_K/IQ3S_BLOCK_SIZE];
|
@@ -3980,7 +3972,7 @@ size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int64_t n
|
|
3980
3972
|
return nrow * nblock * sizeof(block_iq3_s);
|
3981
3973
|
}
|
3982
3974
|
|
3983
|
-
void quantize_row_iq3_s_ref(const float *
|
3975
|
+
void quantize_row_iq3_s_ref(const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k) {
|
3984
3976
|
assert(k % QK_K == 0);
|
3985
3977
|
quantize_iq3_s(x, y, 1, k, NULL);
|
3986
3978
|
}
|
@@ -3988,8 +3980,8 @@ void quantize_row_iq3_s_ref(const float * restrict x, block_iq3_s * restrict y,
|
|
3988
3980
|
|
3989
3981
|
// =================================== 1.5 bpw ===================================================
|
3990
3982
|
|
3991
|
-
static int iq1_find_best_neighbour(const uint16_t *
|
3992
|
-
const float *
|
3983
|
+
static int iq1_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
3984
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float * scale, int8_t * GGML_RESTRICT L, int ngrid) {
|
3993
3985
|
int num_neighbors = neighbours[0];
|
3994
3986
|
GGML_ASSERT(num_neighbors > 0);
|
3995
3987
|
float best_score = -FLT_MAX;
|
@@ -4048,8 +4040,8 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
4048
4040
|
return grid_index;
|
4049
4041
|
}
|
4050
4042
|
|
4051
|
-
static int iq1_find_best_neighbour2(const uint16_t *
|
4052
|
-
const float *
|
4043
|
+
static int iq1_find_best_neighbour2(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
4044
|
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, const float * GGML_RESTRICT xg, int8_t * GGML_RESTRICT L, int ngrid) {
|
4053
4045
|
int num_neighbors = neighbours[0];
|
4054
4046
|
GGML_ASSERT(num_neighbors > 0);
|
4055
4047
|
float best_score = FLT_MAX;
|
@@ -4113,7 +4105,7 @@ static int iq1_sort_helper(const void * left, const void * right) {
|
|
4113
4105
|
|
4114
4106
|
#define IQ1S_BLOCK_SIZE 32
|
4115
4107
|
#define IQ1M_BLOCK_SIZE 16
|
4116
|
-
static void quantize_row_iq1_s_impl(const float *
|
4108
|
+
static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
|
4117
4109
|
float * scales,
|
4118
4110
|
float * weight,
|
4119
4111
|
float * sumx,
|
@@ -4271,7 +4263,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
4271
4263
|
}
|
4272
4264
|
}
|
4273
4265
|
|
4274
|
-
size_t quantize_iq1_s(const float *
|
4266
|
+
size_t quantize_iq1_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4275
4267
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4276
4268
|
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
4277
4269
|
float weight[IQ1S_BLOCK_SIZE];
|
@@ -4291,7 +4283,7 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t n
|
|
4291
4283
|
return nrow * nblock * sizeof(block_iq1_s);
|
4292
4284
|
}
|
4293
4285
|
|
4294
|
-
static void quantize_row_iq1_m_impl(const float *
|
4286
|
+
static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
|
4295
4287
|
float * scales,
|
4296
4288
|
float * weight,
|
4297
4289
|
float * pairs,
|
@@ -4539,7 +4531,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
|
4539
4531
|
}
|
4540
4532
|
}
|
4541
4533
|
|
4542
|
-
size_t quantize_iq1_m(const float *
|
4534
|
+
size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4543
4535
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4544
4536
|
float scales[QK_K/IQ1M_BLOCK_SIZE];
|
4545
4537
|
float weight[IQ1M_BLOCK_SIZE];
|
@@ -4570,7 +4562,7 @@ static inline int best_index_int8(int n, const int8_t * val, float x) {
|
|
4570
4562
|
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
4571
4563
|
}
|
4572
4564
|
|
4573
|
-
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float *
|
4565
|
+
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * GGML_RESTRICT x,
|
4574
4566
|
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
|
4575
4567
|
float * scales, float * weight, uint8_t * L,
|
4576
4568
|
const int8_t * values,
|
@@ -4681,7 +4673,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|
4681
4673
|
}
|
4682
4674
|
}
|
4683
4675
|
|
4684
|
-
size_t quantize_iq4_nl(const float *
|
4676
|
+
size_t quantize_iq4_nl(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4685
4677
|
GGML_ASSERT(n_per_row%QK4_NL == 0);
|
4686
4678
|
int64_t nblock = n_per_row/QK4_NL;
|
4687
4679
|
char * qrow = (char *)dst;
|
@@ -4703,8 +4695,8 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int64_t
|
|
4703
4695
|
return nrow * nblock * sizeof(block_iq4_nl);
|
4704
4696
|
}
|
4705
4697
|
|
4706
|
-
//void quantize_row_iq4_nl_ref(const float *
|
4707
|
-
void quantize_row_iq4_nl_ref(const float *
|
4698
|
+
//void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
4699
|
+
void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k) {
|
4708
4700
|
GGML_ASSERT(k%QK4_NL == 0);
|
4709
4701
|
int64_t nblock = k/QK4_NL;
|
4710
4702
|
uint8_t L[QK4_NL];
|
@@ -4719,7 +4711,7 @@ void quantize_row_iq4_nl_ref(const float * restrict x, block_iq4_nl * restrict y
|
|
4719
4711
|
}
|
4720
4712
|
}
|
4721
4713
|
|
4722
|
-
size_t quantize_iq4_xs(const float *
|
4714
|
+
size_t quantize_iq4_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4723
4715
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4724
4716
|
int64_t nblock = n_per_row/QK_K;
|
4725
4717
|
char * qrow = (char *)dst;
|
@@ -4739,14 +4731,14 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int64_t
|
|
4739
4731
|
return nrow * nblock * sizeof(block_iq4_xs);
|
4740
4732
|
}
|
4741
4733
|
|
4742
|
-
void quantize_row_iq4_xs_ref(const float *
|
4734
|
+
void quantize_row_iq4_xs_ref(const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k) {
|
4743
4735
|
assert(k % QK_K == 0);
|
4744
4736
|
quantize_iq4_xs(x, y, 1, k, NULL);
|
4745
4737
|
}
|
4746
4738
|
|
4747
4739
|
// =============================== 2.5625 bpw
|
4748
4740
|
|
4749
|
-
static void quantize_row_iq2_s_impl(const float *
|
4741
|
+
static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
4750
4742
|
|
4751
4743
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
|
4752
4744
|
|
@@ -4914,7 +4906,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
4914
4906
|
}
|
4915
4907
|
}
|
4916
4908
|
|
4917
|
-
size_t quantize_iq2_s(const float *
|
4909
|
+
size_t quantize_iq2_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
4918
4910
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
4919
4911
|
int64_t nblock = n_per_row/QK_K;
|
4920
4912
|
char * qrow = (char *)dst;
|
@@ -4926,7 +4918,7 @@ size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int64_t n
|
|
4926
4918
|
return nrow * nblock * sizeof(block_iq2_s);
|
4927
4919
|
}
|
4928
4920
|
|
4929
|
-
void quantize_row_iq2_s_ref(const float *
|
4921
|
+
void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k) {
|
4930
4922
|
assert(k % QK_K == 0);
|
4931
4923
|
quantize_iq2_s(x, y, 1, k, NULL);
|
4932
4924
|
}
|