whispercpp 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -3
- data/README.md +92 -31
- data/Rakefile +26 -7
- data/ext/.gitignore +5 -7
- data/ext/dependencies.rb +61 -0
- data/ext/extconf.rb +21 -198
- data/ext/options.rb +221 -0
- data/ext/ruby_whisper.c +159 -0
- data/ext/ruby_whisper.h +17 -2
- data/ext/ruby_whisper_context.c +641 -0
- data/ext/ruby_whisper_error.c +52 -0
- data/ext/ruby_whisper_model.c +232 -0
- data/ext/ruby_whisper_params.c +1301 -0
- data/ext/ruby_whisper_segment.c +143 -0
- data/ext/ruby_whisper_transcribe.cpp +87 -0
- data/ext/ruby_whisper_vad_params.c +288 -0
- data/ext/sources/.dockerignore +3 -0
- data/ext/sources/.github/workflows/bindings-ruby.yml +21 -0
- data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
- data/ext/sources/CMakeLists.txt +251 -0
- data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
- data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
- data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
- data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
- data/ext/sources/bindings/javascript/package.json +26 -0
- data/ext/sources/bindings/javascript/whisper.js +19 -0
- data/ext/sources/build-xcframework.sh +547 -0
- data/ext/sources/ci/run.sh +336 -0
- data/ext/sources/close-issue.yml +28 -0
- data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
- data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
- data/ext/sources/cmake/build-info.cmake +60 -0
- data/ext/sources/cmake/git-vars.cmake +22 -0
- data/ext/sources/cmake/whisper-config.cmake.in +65 -0
- data/ext/sources/cmake/whisper.pc.in +10 -0
- data/ext/sources/examples/CMakeLists.txt +124 -0
- data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +37 -0
- data/ext/sources/examples/addon.node/addon.cpp +438 -0
- data/ext/sources/examples/addon.node/index.js +54 -0
- data/ext/sources/examples/addon.node/package.json +16 -0
- data/ext/sources/examples/bench/CMakeLists.txt +8 -0
- data/ext/sources/examples/bench/bench.cpp +175 -0
- data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
- data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
- data/ext/sources/examples/cli/CMakeLists.txt +8 -0
- data/ext/sources/examples/cli/cli.cpp +1294 -0
- data/ext/sources/examples/coi-serviceworker.js +146 -0
- data/ext/sources/examples/command/CMakeLists.txt +10 -0
- data/ext/sources/examples/command/command.cpp +776 -0
- data/ext/sources/examples/command/commands.txt +9 -0
- data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
- data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/common-ggml.cpp +238 -0
- data/ext/sources/examples/common-ggml.h +18 -0
- data/ext/sources/examples/common-sdl.cpp +227 -0
- data/ext/sources/examples/common-sdl.h +49 -0
- data/ext/sources/examples/common-whisper.cpp +168 -0
- data/ext/sources/examples/common-whisper.h +24 -0
- data/ext/sources/examples/common.cpp +675 -0
- data/ext/sources/examples/common.h +322 -0
- data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
- data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
- data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
- data/ext/sources/examples/generate-karaoke.sh +57 -0
- data/ext/sources/examples/grammar-parser.cpp +423 -0
- data/ext/sources/examples/grammar-parser.h +29 -0
- data/ext/sources/examples/helpers.js +191 -0
- data/ext/sources/examples/json.hpp +24596 -0
- data/ext/sources/examples/livestream.sh +112 -0
- data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
- data/ext/sources/examples/lsp/lsp.cpp +467 -0
- data/ext/sources/examples/lsp/whisper.vim +362 -0
- data/ext/sources/examples/miniaudio.h +93468 -0
- data/ext/sources/examples/python/test_whisper_processor.py +7 -0
- data/ext/sources/examples/python/whisper_processor.py +54 -0
- data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
- data/ext/sources/examples/quantize/quantize.cpp +223 -0
- data/ext/sources/examples/server/CMakeLists.txt +12 -0
- data/ext/sources/examples/server/bench.js +29 -0
- data/ext/sources/examples/server/httplib.h +10497 -0
- data/ext/sources/examples/server/server.cpp +1091 -0
- data/ext/sources/examples/server.py +115 -0
- data/ext/sources/examples/stb_vorbis.c +5584 -0
- data/ext/sources/examples/stream/CMakeLists.txt +10 -0
- data/ext/sources/examples/stream/stream.cpp +429 -0
- data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
- data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
- data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
- data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
- data/ext/sources/examples/sycl/build.sh +22 -0
- data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
- data/ext/sources/examples/sycl/run-whisper.sh +17 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +40 -0
- data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
- data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
- data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
- data/ext/sources/examples/talk-llama/llama-arch.cpp +1746 -0
- data/ext/sources/examples/talk-llama/llama-arch.h +437 -0
- data/ext/sources/examples/talk-llama/llama-batch.cpp +374 -0
- data/ext/sources/examples/talk-llama/llama-batch.h +89 -0
- data/ext/sources/examples/talk-llama/llama-chat.cpp +663 -0
- data/ext/sources/examples/talk-llama/llama-chat.h +58 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +2676 -0
- data/ext/sources/examples/talk-llama/llama-context.h +276 -0
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
- data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
- data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
- data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
- data/ext/sources/examples/talk-llama/llama-graph.cpp +1618 -0
- data/ext/sources/examples/talk-llama/llama-graph.h +640 -0
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +95 -0
- data/ext/sources/examples/talk-llama/llama-hparams.h +190 -0
- data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
- data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
- data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
- data/ext/sources/examples/talk-llama/llama-io.h +35 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2739 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +502 -0
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +379 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +32 -0
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
- data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1138 -0
- data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +281 -0
- data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +13814 -0
- data/ext/sources/examples/talk-llama/llama-model.h +425 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +966 -0
- data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
- data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
- data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +3340 -0
- data/ext/sources/examples/talk-llama/llama-vocab.h +131 -0
- data/ext/sources/examples/talk-llama/llama.cpp +354 -0
- data/ext/sources/examples/talk-llama/llama.h +1377 -0
- data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
- data/ext/sources/examples/talk-llama/speak +40 -0
- data/ext/sources/examples/talk-llama/speak.bat +1 -0
- data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
- data/ext/sources/examples/talk-llama/talk-llama.cpp +808 -0
- data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
- data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +849 -0
- data/ext/sources/examples/talk-llama/unicode.h +66 -0
- data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +143 -0
- data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
- data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
- data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
- data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
- data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
- data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
- data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +249 -0
- data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
- data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
- data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
- data/ext/sources/ggml/CMakeLists.txt +390 -0
- data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
- data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
- data/ext/sources/ggml/cmake/common.cmake +26 -0
- data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
- data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-backend.h +9 -7
- data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
- data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +9 -1
- data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
- data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
- data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
- data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
- data/ext/{ggml → sources/ggml}/include/ggml.h +182 -265
- data/ext/sources/ggml/include/gguf.h +202 -0
- data/ext/sources/ggml/src/CMakeLists.txt +346 -0
- data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
- data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
- data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +87 -53
- data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +26 -14
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +74 -0
- data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
- data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
- data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
- data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +135 -1
- data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +564 -146
- data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
- data/ext/{ggml → sources/ggml}/src/ggml-common.h +12 -8
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +504 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +2 -1
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
- data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +72 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/cpu-feats-x86.cpp +5 -1
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +163 -41
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.c +4029 -1117
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3510 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +67 -18
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +8903 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.h +110 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +252 -0
- data/ext/sources/ggml/src/ggml-cpu/vec.h +818 -0
- data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
- data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
- data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
- data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
- data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +828 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
- data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +730 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +26 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
- data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
- data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
- data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
- data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1471 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
- data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3505 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
- data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
- data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
- data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +336 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
- data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
- data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
- data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
- data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
- data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
- data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
- data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
- data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
- data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
- data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
- data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +39 -0
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
- data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +289 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +59 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
- data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
- data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
- data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
- data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +131 -0
- data/ext/{ggml → sources/ggml}/src/ggml-impl.h +64 -19
- data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
- data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +120 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2178 -1064
- data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +1575 -1218
- data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +5124 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
- data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
- data/ext/{ggml → sources/ggml}/src/ggml-quants.c +114 -120
- data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +480 -73
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
- data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +345 -0
- data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +589 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +32 -33
- data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +4 -2
- data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/convert.cpp +104 -28
- data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +700 -0
- data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +791 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +156 -17
- data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +75 -0
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +99 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +309 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1004 -1240
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
- data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
- data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +0 -1
- data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmvq.cpp +261 -166
- data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/norm.cpp +204 -81
- data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
- data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
- data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +83 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +361 -0
- data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +35 -25
- data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1215 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +293 -0
- data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +196 -0
- data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3130 -1087
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
- data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +193 -35
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- data/ext/{ggml → sources/ggml}/src/ggml.c +676 -1820
- data/ext/sources/ggml/src/gguf.cpp +1330 -0
- data/ext/{include → sources/include}/whisper.h +68 -2
- data/ext/sources/src/CMakeLists.txt +143 -0
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
- data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +35 -10
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
- data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +28 -3
- data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
- data/ext/sources/src/whisper-arch.h +197 -0
- data/ext/{src → sources/src}/whisper.cpp +1905 -374
- data/ext/sources/tests/CMakeLists.txt +105 -0
- data/ext/sources/tests/earnings21/eval.mk +58 -0
- data/ext/sources/tests/earnings21/eval.py +68 -0
- data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
- data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
- data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
- data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
- data/ext/sources/tests/earnings21/requirements.txt +6 -0
- data/ext/sources/tests/en-0-ref.txt +1 -0
- data/ext/sources/tests/en-1-ref.txt +1 -0
- data/ext/sources/tests/en-2-ref.txt +1 -0
- data/ext/sources/tests/es-0-ref.txt +1 -0
- data/ext/sources/tests/librispeech/eval.mk +39 -0
- data/ext/sources/tests/librispeech/eval.py +47 -0
- data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
- data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
- data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
- data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
- data/ext/sources/tests/librispeech/requirements.txt +6 -0
- data/ext/sources/tests/run-tests.sh +130 -0
- data/ext/sources/tests/test-c.c +3 -0
- data/ext/sources/tests/test-vad-full.cpp +54 -0
- data/ext/sources/tests/test-vad.cpp +83 -0
- data/ext/sources/tests/test-whisper.js +58 -0
- data/extsources.rb +33 -5
- data/lib/whisper/model/uri.rb +149 -128
- data/sig/whisper.rbs +480 -0
- data/tests/helper.rb +28 -0
- data/tests/test_callback.rb +45 -3
- data/tests/test_error.rb +2 -2
- data/tests/test_model.rb +38 -0
- data/tests/test_package.rb +18 -3
- data/tests/test_params.rb +145 -8
- data/tests/test_segment.rb +10 -19
- data/tests/test_vad.rb +19 -0
- data/tests/test_vad_params.rb +103 -0
- data/tests/test_whisper.rb +37 -37
- data/whispercpp.gemspec +5 -4
- metadata +766 -111
- data/ext/cpu.mk +0 -9
- data/ext/examples/dr_wav.h +0 -8815
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
- data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
- data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
- data/ext/metal-embed.mk +0 -17
- data/ext/metal.mk +0 -6
- data/ext/ruby_whisper.cpp +0 -1909
- data/ext/scripts/get-flags.mk +0 -38
- data/lib/whisper.rb +0 -2
- /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
- /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-aarch64.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
- /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
- /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
- /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
data/ext/ruby_whisper.cpp
DELETED
@@ -1,1909 +0,0 @@
|
|
1
|
-
#include <ruby.h>
|
2
|
-
#include <ruby/memory_view.h>
|
3
|
-
#include "ruby_whisper.h"
|
4
|
-
#define DR_WAV_IMPLEMENTATION
|
5
|
-
#include "dr_wav.h"
|
6
|
-
#include <cmath>
|
7
|
-
#include <fstream>
|
8
|
-
#include <cstdio>
|
9
|
-
#include <string>
|
10
|
-
#include <thread>
|
11
|
-
#include <vector>
|
12
|
-
|
13
|
-
#ifdef __cplusplus
|
14
|
-
extern "C" {
|
15
|
-
#endif
|
16
|
-
|
17
|
-
#define BOOL_PARAMS_SETTER(self, prop, value) \
|
18
|
-
ruby_whisper_params *rwp; \
|
19
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp); \
|
20
|
-
if (value == Qfalse || value == Qnil) { \
|
21
|
-
rwp->params.prop = false; \
|
22
|
-
} else { \
|
23
|
-
rwp->params.prop = true; \
|
24
|
-
} \
|
25
|
-
return value; \
|
26
|
-
|
27
|
-
#define BOOL_PARAMS_GETTER(self, prop) \
|
28
|
-
ruby_whisper_params *rwp; \
|
29
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp); \
|
30
|
-
if (rwp->params.prop) { \
|
31
|
-
return Qtrue; \
|
32
|
-
} else { \
|
33
|
-
return Qfalse; \
|
34
|
-
}
|
35
|
-
|
36
|
-
VALUE mWhisper;
|
37
|
-
VALUE cContext;
|
38
|
-
VALUE cParams;
|
39
|
-
VALUE eError;
|
40
|
-
|
41
|
-
VALUE cSegment;
|
42
|
-
VALUE cModel;
|
43
|
-
|
44
|
-
static ID id_to_s;
|
45
|
-
static ID id_call;
|
46
|
-
static ID id___method__;
|
47
|
-
static ID id_to_enum;
|
48
|
-
static ID id_length;
|
49
|
-
static ID id_next;
|
50
|
-
static ID id_new;
|
51
|
-
static ID id_to_path;
|
52
|
-
static ID id_pre_converted_models;
|
53
|
-
|
54
|
-
static bool is_log_callback_finalized = false;
|
55
|
-
|
56
|
-
/*
|
57
|
-
* call-seq:
|
58
|
-
* lang_max_id -> Integer
|
59
|
-
*/
|
60
|
-
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
|
61
|
-
return INT2NUM(whisper_lang_max_id());
|
62
|
-
}
|
63
|
-
|
64
|
-
/*
|
65
|
-
* call-seq:
|
66
|
-
* lang_id(lang_name) -> Integer
|
67
|
-
*/
|
68
|
-
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
|
69
|
-
const char * lang_str = StringValueCStr(lang);
|
70
|
-
const int id = whisper_lang_id(lang_str);
|
71
|
-
if (-1 == id) {
|
72
|
-
rb_raise(rb_eArgError, "language not found: %s", lang_str);
|
73
|
-
}
|
74
|
-
return INT2NUM(id);
|
75
|
-
}
|
76
|
-
|
77
|
-
/*
|
78
|
-
* call-seq:
|
79
|
-
* lang_str(lang_id) -> String
|
80
|
-
*/
|
81
|
-
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
|
82
|
-
const int lang_id = NUM2INT(id);
|
83
|
-
const char * str = whisper_lang_str(lang_id);
|
84
|
-
if (nullptr == str) {
|
85
|
-
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
86
|
-
}
|
87
|
-
return rb_str_new2(str);
|
88
|
-
}
|
89
|
-
|
90
|
-
/*
|
91
|
-
* call-seq:
|
92
|
-
* lang_str(lang_id) -> String
|
93
|
-
*/
|
94
|
-
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
95
|
-
const int lang_id = NUM2INT(id);
|
96
|
-
const char * str_full = whisper_lang_str_full(lang_id);
|
97
|
-
if (nullptr == str_full) {
|
98
|
-
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
99
|
-
}
|
100
|
-
return rb_str_new2(str_full);
|
101
|
-
}
|
102
|
-
|
103
|
-
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
104
|
-
is_log_callback_finalized = true;
|
105
|
-
return Qnil;
|
106
|
-
}
|
107
|
-
|
108
|
-
/*
|
109
|
-
* call-seq:
|
110
|
-
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
111
|
-
*/
|
112
|
-
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
113
|
-
VALUE old_callback = rb_iv_get(self, "log_callback");
|
114
|
-
if (!NIL_P(old_callback)) {
|
115
|
-
rb_undefine_finalizer(old_callback);
|
116
|
-
}
|
117
|
-
|
118
|
-
rb_iv_set(self, "log_callback", log_callback);
|
119
|
-
rb_iv_set(self, "user_data", user_data);
|
120
|
-
|
121
|
-
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
122
|
-
rb_define_finalizer(log_callback, finalize_log_callback);
|
123
|
-
|
124
|
-
whisper_log_set([](ggml_log_level level, const char * buffer, void * user_data) {
|
125
|
-
if (is_log_callback_finalized) {
|
126
|
-
return;
|
127
|
-
}
|
128
|
-
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
|
129
|
-
VALUE udata = rb_iv_get(mWhisper, "user_data");
|
130
|
-
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
131
|
-
}, nullptr);
|
132
|
-
|
133
|
-
return Qnil;
|
134
|
-
}
|
135
|
-
|
136
|
-
static void ruby_whisper_free(ruby_whisper *rw) {
|
137
|
-
if (rw->context) {
|
138
|
-
whisper_free(rw->context);
|
139
|
-
rw->context = NULL;
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
static void ruby_whisper_params_free(ruby_whisper_params *rwp) {
|
144
|
-
}
|
145
|
-
|
146
|
-
void rb_whisper_mark(ruby_whisper *rw) {
|
147
|
-
// call rb_gc_mark on any ruby references in rw
|
148
|
-
}
|
149
|
-
|
150
|
-
void rb_whisper_free(ruby_whisper *rw) {
|
151
|
-
ruby_whisper_free(rw);
|
152
|
-
free(rw);
|
153
|
-
}
|
154
|
-
|
155
|
-
void rb_whisper_callbcack_container_mark(ruby_whisper_callback_container *rwc) {
|
156
|
-
rb_gc_mark(rwc->user_data);
|
157
|
-
rb_gc_mark(rwc->callback);
|
158
|
-
rb_gc_mark(rwc->callbacks);
|
159
|
-
}
|
160
|
-
|
161
|
-
void rb_whisper_params_mark(ruby_whisper_params *rwp) {
|
162
|
-
rb_whisper_callbcack_container_mark(rwp->new_segment_callback_container);
|
163
|
-
rb_whisper_callbcack_container_mark(rwp->progress_callback_container);
|
164
|
-
rb_whisper_callbcack_container_mark(rwp->abort_callback_container);
|
165
|
-
}
|
166
|
-
|
167
|
-
void rb_whisper_params_free(ruby_whisper_params *rwp) {
|
168
|
-
// How to free user_data and callback only when not referred to by others?
|
169
|
-
ruby_whisper_params_free(rwp);
|
170
|
-
free(rwp);
|
171
|
-
}
|
172
|
-
|
173
|
-
static VALUE ruby_whisper_allocate(VALUE klass) {
|
174
|
-
ruby_whisper *rw;
|
175
|
-
rw = ALLOC(ruby_whisper);
|
176
|
-
rw->context = NULL;
|
177
|
-
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
|
178
|
-
}
|
179
|
-
|
180
|
-
static ruby_whisper_callback_container * rb_whisper_callback_container_allocate() {
|
181
|
-
ruby_whisper_callback_container *container;
|
182
|
-
container = ALLOC(ruby_whisper_callback_container);
|
183
|
-
container->context = nullptr;
|
184
|
-
container->user_data = Qnil;
|
185
|
-
container->callback = Qnil;
|
186
|
-
container->callbacks = rb_ary_new();
|
187
|
-
return container;
|
188
|
-
}
|
189
|
-
|
190
|
-
static VALUE ruby_whisper_params_allocate(VALUE klass) {
|
191
|
-
ruby_whisper_params *rwp;
|
192
|
-
rwp = ALLOC(ruby_whisper_params);
|
193
|
-
rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
194
|
-
rwp->diarize = false;
|
195
|
-
rwp->new_segment_callback_container = rb_whisper_callback_container_allocate();
|
196
|
-
rwp->progress_callback_container = rb_whisper_callback_container_allocate();
|
197
|
-
rwp->abort_callback_container = rb_whisper_callback_container_allocate();
|
198
|
-
return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp);
|
199
|
-
}
|
200
|
-
|
201
|
-
/*
|
202
|
-
* call-seq:
|
203
|
-
* new("base.en") -> Whisper::Context
|
204
|
-
* new("path/to/model.bin") -> Whisper::Context
|
205
|
-
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
|
206
|
-
*/
|
207
|
-
static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
208
|
-
ruby_whisper *rw;
|
209
|
-
VALUE whisper_model_file_path;
|
210
|
-
|
211
|
-
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
212
|
-
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
213
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
214
|
-
|
215
|
-
VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
|
216
|
-
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, whisper_model_file_path);
|
217
|
-
if (!NIL_P(pre_converted_model)) {
|
218
|
-
whisper_model_file_path = pre_converted_model;
|
219
|
-
}
|
220
|
-
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
|
221
|
-
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
|
222
|
-
}
|
223
|
-
if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
|
224
|
-
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
225
|
-
}
|
226
|
-
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
227
|
-
if (rw->context == nullptr) {
|
228
|
-
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
229
|
-
}
|
230
|
-
return self;
|
231
|
-
}
|
232
|
-
|
233
|
-
// High level API
|
234
|
-
static VALUE rb_whisper_segment_initialize(VALUE context, int index);
|
235
|
-
|
236
|
-
/*
|
237
|
-
* transcribe a single file
|
238
|
-
* can emit to a block results
|
239
|
-
*
|
240
|
-
* params = Whisper::Params.new
|
241
|
-
* params.duration = 60_000
|
242
|
-
* whisper.transcribe "path/to/audio.wav", params do |text|
|
243
|
-
* puts text
|
244
|
-
* end
|
245
|
-
*
|
246
|
-
* call-seq:
|
247
|
-
* transcribe(path_to_audio, params) {|text| ...}
|
248
|
-
**/
|
249
|
-
static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
250
|
-
ruby_whisper *rw;
|
251
|
-
ruby_whisper_params *rwp;
|
252
|
-
VALUE wave_file_path, blk, params;
|
253
|
-
|
254
|
-
rb_scan_args(argc, argv, "02&", &wave_file_path, ¶ms, &blk);
|
255
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
256
|
-
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
257
|
-
|
258
|
-
if (!rb_respond_to(wave_file_path, id_to_s)) {
|
259
|
-
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
260
|
-
}
|
261
|
-
|
262
|
-
std::string fname_inp = StringValueCStr(wave_file_path);
|
263
|
-
|
264
|
-
std::vector<float> pcmf32; // mono-channel F32 PCM
|
265
|
-
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
266
|
-
|
267
|
-
// WAV input - this is directly from main.cpp example
|
268
|
-
{
|
269
|
-
drwav wav;
|
270
|
-
std::vector<uint8_t> wav_data; // used for pipe input from stdin
|
271
|
-
|
272
|
-
if (fname_inp == "-") {
|
273
|
-
{
|
274
|
-
uint8_t buf[1024];
|
275
|
-
while (true) {
|
276
|
-
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
277
|
-
if (n == 0) {
|
278
|
-
break;
|
279
|
-
}
|
280
|
-
wav_data.insert(wav_data.end(), buf, buf + n);
|
281
|
-
}
|
282
|
-
}
|
283
|
-
|
284
|
-
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
285
|
-
fprintf(stderr, "error: failed to open WAV file from stdin\n");
|
286
|
-
return self;
|
287
|
-
}
|
288
|
-
|
289
|
-
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
|
290
|
-
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
|
291
|
-
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
292
|
-
return self;
|
293
|
-
}
|
294
|
-
|
295
|
-
if (wav.channels != 1 && wav.channels != 2) {
|
296
|
-
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
|
297
|
-
return self;
|
298
|
-
}
|
299
|
-
|
300
|
-
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
|
301
|
-
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
|
302
|
-
return self;
|
303
|
-
}
|
304
|
-
|
305
|
-
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
|
306
|
-
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
|
307
|
-
return self;
|
308
|
-
}
|
309
|
-
|
310
|
-
if (wav.bitsPerSample != 16) {
|
311
|
-
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
|
312
|
-
return self;
|
313
|
-
}
|
314
|
-
|
315
|
-
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
|
316
|
-
|
317
|
-
std::vector<int16_t> pcm16;
|
318
|
-
pcm16.resize(n*wav.channels);
|
319
|
-
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
|
320
|
-
drwav_uninit(&wav);
|
321
|
-
|
322
|
-
// convert to mono, float
|
323
|
-
pcmf32.resize(n);
|
324
|
-
if (wav.channels == 1) {
|
325
|
-
for (uint64_t i = 0; i < n; i++) {
|
326
|
-
pcmf32[i] = float(pcm16[i])/32768.0f;
|
327
|
-
}
|
328
|
-
} else {
|
329
|
-
for (uint64_t i = 0; i < n; i++) {
|
330
|
-
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
331
|
-
}
|
332
|
-
}
|
333
|
-
|
334
|
-
if (rwp->diarize) {
|
335
|
-
// convert to stereo, float
|
336
|
-
pcmf32s.resize(2);
|
337
|
-
|
338
|
-
pcmf32s[0].resize(n);
|
339
|
-
pcmf32s[1].resize(n);
|
340
|
-
for (uint64_t i = 0; i < n; i++) {
|
341
|
-
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
342
|
-
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
343
|
-
}
|
344
|
-
}
|
345
|
-
}
|
346
|
-
{
|
347
|
-
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
348
|
-
|
349
|
-
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
350
|
-
bool is_aborted = *(bool*)user_data;
|
351
|
-
return !is_aborted;
|
352
|
-
};
|
353
|
-
rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
354
|
-
}
|
355
|
-
|
356
|
-
if (!NIL_P(rwp->new_segment_callback_container->callback) || 0 != RARRAY_LEN(rwp->new_segment_callback_container->callbacks)) {
|
357
|
-
rwp->params.new_segment_callback = [](struct whisper_context * ctx, struct whisper_state * state, int n_new, void * user_data) {
|
358
|
-
const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data;
|
359
|
-
|
360
|
-
// Currently, doesn't support state because
|
361
|
-
// those require to resolve GC-related problems.
|
362
|
-
if (!NIL_P(container->callback)) {
|
363
|
-
rb_funcall(container->callback, id_call, 4, *container->context, Qnil, INT2NUM(n_new), container->user_data);
|
364
|
-
}
|
365
|
-
const long callbacks_len = RARRAY_LEN(container->callbacks);
|
366
|
-
if (0 == callbacks_len) {
|
367
|
-
return;
|
368
|
-
}
|
369
|
-
const int n_segments = whisper_full_n_segments_from_state(state);
|
370
|
-
for (int i = n_new; i > 0; i--) {
|
371
|
-
int i_segment = n_segments - i;
|
372
|
-
VALUE segment = rb_whisper_segment_initialize(*container->context, i_segment);
|
373
|
-
for (int j = 0; j < callbacks_len; j++) {
|
374
|
-
VALUE cb = rb_ary_entry(container->callbacks, j);
|
375
|
-
rb_funcall(cb, id_call, 1, segment);
|
376
|
-
}
|
377
|
-
}
|
378
|
-
};
|
379
|
-
rwp->new_segment_callback_container->context = &self;
|
380
|
-
rwp->params.new_segment_callback_user_data = rwp->new_segment_callback_container;
|
381
|
-
}
|
382
|
-
|
383
|
-
if (!NIL_P(rwp->progress_callback_container->callback) || 0 != RARRAY_LEN(rwp->progress_callback_container->callbacks)) {
|
384
|
-
rwp->params.progress_callback = [](struct whisper_context *ctx, struct whisper_state * /*state*/, int progress_cur, void *user_data) {
|
385
|
-
const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data;
|
386
|
-
const VALUE progress = INT2NUM(progress_cur);
|
387
|
-
// Currently, doesn't support state because
|
388
|
-
// those require to resolve GC-related problems.
|
389
|
-
if (!NIL_P(container->callback)) {
|
390
|
-
rb_funcall(container->callback, id_call, 4, *container->context, Qnil, progress, container->user_data);
|
391
|
-
}
|
392
|
-
const long callbacks_len = RARRAY_LEN(container->callbacks);
|
393
|
-
if (0 == callbacks_len) {
|
394
|
-
return;
|
395
|
-
}
|
396
|
-
for (int j = 0; j < callbacks_len; j++) {
|
397
|
-
VALUE cb = rb_ary_entry(container->callbacks, j);
|
398
|
-
rb_funcall(cb, id_call, 1, progress);
|
399
|
-
}
|
400
|
-
};
|
401
|
-
rwp->progress_callback_container->context = &self;
|
402
|
-
rwp->params.progress_callback_user_data = rwp->progress_callback_container;
|
403
|
-
}
|
404
|
-
|
405
|
-
if (!NIL_P(rwp->abort_callback_container->callback) || 0 != RARRAY_LEN(rwp->abort_callback_container->callbacks)) {
|
406
|
-
rwp->params.abort_callback = [](void * user_data) {
|
407
|
-
const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data;
|
408
|
-
if (!NIL_P(container->callback)) {
|
409
|
-
VALUE result = rb_funcall(container->callback, id_call, 1, container->user_data);
|
410
|
-
if (!NIL_P(result) && Qfalse != result) {
|
411
|
-
return true;
|
412
|
-
}
|
413
|
-
}
|
414
|
-
const long callbacks_len = RARRAY_LEN(container->callbacks);
|
415
|
-
if (0 == callbacks_len) {
|
416
|
-
return false;
|
417
|
-
}
|
418
|
-
for (int j = 0; j < callbacks_len; j++) {
|
419
|
-
VALUE cb = rb_ary_entry(container->callbacks, j);
|
420
|
-
VALUE result = rb_funcall(cb, id_call, 1, container->user_data);
|
421
|
-
if (!NIL_P(result) && Qfalse != result) {
|
422
|
-
return true;
|
423
|
-
}
|
424
|
-
}
|
425
|
-
return false;
|
426
|
-
};
|
427
|
-
rwp->abort_callback_container->context = &self;
|
428
|
-
rwp->params.abort_callback_user_data = rwp->abort_callback_container;
|
429
|
-
}
|
430
|
-
|
431
|
-
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
|
432
|
-
fprintf(stderr, "failed to process audio\n");
|
433
|
-
return self;
|
434
|
-
}
|
435
|
-
const int n_segments = whisper_full_n_segments(rw->context);
|
436
|
-
VALUE output = rb_str_new2("");
|
437
|
-
for (int i = 0; i < n_segments; ++i) {
|
438
|
-
const char * text = whisper_full_get_segment_text(rw->context, i);
|
439
|
-
output = rb_str_concat(output, rb_str_new2(text));
|
440
|
-
}
|
441
|
-
VALUE idCall = id_call;
|
442
|
-
if (blk != Qnil) {
|
443
|
-
rb_funcall(blk, idCall, 1, output);
|
444
|
-
}
|
445
|
-
return self;
|
446
|
-
}
|
447
|
-
|
448
|
-
/*
|
449
|
-
* call-seq:
|
450
|
-
* model_n_vocab -> Integer
|
451
|
-
*/
|
452
|
-
VALUE ruby_whisper_model_n_vocab(VALUE self) {
|
453
|
-
ruby_whisper *rw;
|
454
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
455
|
-
return INT2NUM(whisper_model_n_vocab(rw->context));
|
456
|
-
}
|
457
|
-
|
458
|
-
/*
|
459
|
-
* call-seq:
|
460
|
-
* model_n_audio_ctx -> Integer
|
461
|
-
*/
|
462
|
-
VALUE ruby_whisper_model_n_audio_ctx(VALUE self) {
|
463
|
-
ruby_whisper *rw;
|
464
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
465
|
-
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
466
|
-
}
|
467
|
-
|
468
|
-
/*
|
469
|
-
* call-seq:
|
470
|
-
* model_n_audio_state -> Integer
|
471
|
-
*/
|
472
|
-
VALUE ruby_whisper_model_n_audio_state(VALUE self) {
|
473
|
-
ruby_whisper *rw;
|
474
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
475
|
-
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
476
|
-
}
|
477
|
-
|
478
|
-
/*
|
479
|
-
* call-seq:
|
480
|
-
* model_n_audio_head -> Integer
|
481
|
-
*/
|
482
|
-
VALUE ruby_whisper_model_n_audio_head(VALUE self) {
|
483
|
-
ruby_whisper *rw;
|
484
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
485
|
-
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
486
|
-
}
|
487
|
-
|
488
|
-
/*
|
489
|
-
* call-seq:
|
490
|
-
* model_n_audio_layer -> Integer
|
491
|
-
*/
|
492
|
-
VALUE ruby_whisper_model_n_audio_layer(VALUE self) {
|
493
|
-
ruby_whisper *rw;
|
494
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
495
|
-
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
496
|
-
}
|
497
|
-
|
498
|
-
/*
|
499
|
-
* call-seq:
|
500
|
-
* model_n_text_ctx -> Integer
|
501
|
-
*/
|
502
|
-
VALUE ruby_whisper_model_n_text_ctx(VALUE self) {
|
503
|
-
ruby_whisper *rw;
|
504
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
505
|
-
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
506
|
-
}
|
507
|
-
|
508
|
-
/*
|
509
|
-
* call-seq:
|
510
|
-
* model_n_text_state -> Integer
|
511
|
-
*/
|
512
|
-
VALUE ruby_whisper_model_n_text_state(VALUE self) {
|
513
|
-
ruby_whisper *rw;
|
514
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
515
|
-
return INT2NUM(whisper_model_n_text_state(rw->context));
|
516
|
-
}
|
517
|
-
|
518
|
-
/*
|
519
|
-
* call-seq:
|
520
|
-
* model_n_text_head -> Integer
|
521
|
-
*/
|
522
|
-
VALUE ruby_whisper_model_n_text_head(VALUE self) {
|
523
|
-
ruby_whisper *rw;
|
524
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
525
|
-
return INT2NUM(whisper_model_n_text_head(rw->context));
|
526
|
-
}
|
527
|
-
|
528
|
-
/*
|
529
|
-
* call-seq:
|
530
|
-
* model_n_text_layer -> Integer
|
531
|
-
*/
|
532
|
-
VALUE ruby_whisper_model_n_text_layer(VALUE self) {
|
533
|
-
ruby_whisper *rw;
|
534
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
535
|
-
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
536
|
-
}
|
537
|
-
|
538
|
-
/*
|
539
|
-
* call-seq:
|
540
|
-
* model_n_mels -> Integer
|
541
|
-
*/
|
542
|
-
VALUE ruby_whisper_model_n_mels(VALUE self) {
|
543
|
-
ruby_whisper *rw;
|
544
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
545
|
-
return INT2NUM(whisper_model_n_mels(rw->context));
|
546
|
-
}
|
547
|
-
|
548
|
-
/*
|
549
|
-
* call-seq:
|
550
|
-
* model_ftype -> Integer
|
551
|
-
*/
|
552
|
-
VALUE ruby_whisper_model_ftype(VALUE self) {
|
553
|
-
ruby_whisper *rw;
|
554
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
555
|
-
return INT2NUM(whisper_model_ftype(rw->context));
|
556
|
-
}
|
557
|
-
|
558
|
-
/*
|
559
|
-
* call-seq:
|
560
|
-
* model_type -> String
|
561
|
-
*/
|
562
|
-
VALUE ruby_whisper_model_type(VALUE self) {
|
563
|
-
ruby_whisper *rw;
|
564
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
565
|
-
return rb_str_new2(whisper_model_type_readable(rw->context));
|
566
|
-
}
|
567
|
-
|
568
|
-
/*
|
569
|
-
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
570
|
-
* Not thread safe for same context
|
571
|
-
* Uses the specified decoding strategy to obtain the text.
|
572
|
-
*
|
573
|
-
* call-seq:
|
574
|
-
* full(params, samples, n_samples) -> nil
|
575
|
-
* full(params, samples) -> nil
|
576
|
-
*
|
577
|
-
* The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
|
578
|
-
*/
|
579
|
-
VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self) {
|
580
|
-
if (argc < 2 || argc > 3) {
|
581
|
-
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
582
|
-
}
|
583
|
-
|
584
|
-
ruby_whisper *rw;
|
585
|
-
ruby_whisper_params *rwp;
|
586
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
587
|
-
VALUE params = argv[0];
|
588
|
-
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
589
|
-
VALUE samples = argv[1];
|
590
|
-
int n_samples;
|
591
|
-
rb_memory_view_t view;
|
592
|
-
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
593
|
-
if (argc == 3) {
|
594
|
-
n_samples = NUM2INT(argv[2]);
|
595
|
-
if (TYPE(samples) == T_ARRAY) {
|
596
|
-
if (RARRAY_LEN(samples) < n_samples) {
|
597
|
-
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
598
|
-
}
|
599
|
-
}
|
600
|
-
// Should check when samples.respond_to?(:length)?
|
601
|
-
} else {
|
602
|
-
if (TYPE(samples) == T_ARRAY) {
|
603
|
-
n_samples = RARRAY_LEN(samples);
|
604
|
-
} else if (memory_view_available_p) {
|
605
|
-
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
606
|
-
view.obj = Qnil;
|
607
|
-
rb_raise(rb_eArgError, "unable to get a memory view");
|
608
|
-
}
|
609
|
-
n_samples = view.byte_size / view.item_size;
|
610
|
-
} else if (rb_respond_to(samples, id_length)) {
|
611
|
-
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
612
|
-
} else {
|
613
|
-
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
614
|
-
}
|
615
|
-
}
|
616
|
-
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
617
|
-
if (memory_view_available_p) {
|
618
|
-
c_samples = (float *)view.data;
|
619
|
-
} else {
|
620
|
-
if (TYPE(samples) == T_ARRAY) {
|
621
|
-
for (int i = 0; i < n_samples; i++) {
|
622
|
-
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
623
|
-
}
|
624
|
-
} else {
|
625
|
-
// TODO: use rb_block_call
|
626
|
-
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
627
|
-
for (int i = 0; i < n_samples; i++) {
|
628
|
-
// TODO: check if iter is exhausted and raise ArgumentError appropriately
|
629
|
-
VALUE sample = rb_funcall(iter, id_next, 0);
|
630
|
-
c_samples[i] = RFLOAT_VALUE(sample);
|
631
|
-
}
|
632
|
-
}
|
633
|
-
}
|
634
|
-
const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
|
635
|
-
if (0 == result) {
|
636
|
-
return Qnil;
|
637
|
-
} else {
|
638
|
-
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
639
|
-
}
|
640
|
-
}
|
641
|
-
|
642
|
-
/*
|
643
|
-
* Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
644
|
-
* Result is stored in the default state of the context
|
645
|
-
* Not thread safe if executed in parallel on the same context.
|
646
|
-
* It seems this approach can offer some speedup in some cases.
|
647
|
-
* However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
648
|
-
*
|
649
|
-
* call-seq:
|
650
|
-
* full_parallel(params, samples) -> nil
|
651
|
-
* full_parallel(params, samples, n_samples) -> nil
|
652
|
-
* full_parallel(params, samples, n_samples, n_processors) -> nil
|
653
|
-
* full_parallel(params, samples, nil, n_processors) -> nil
|
654
|
-
*/
|
655
|
-
static VALUE ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self) {
|
656
|
-
if (argc < 2 || argc > 4) {
|
657
|
-
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
658
|
-
}
|
659
|
-
|
660
|
-
ruby_whisper *rw;
|
661
|
-
ruby_whisper_params *rwp;
|
662
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
663
|
-
VALUE params = argv[0];
|
664
|
-
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
665
|
-
VALUE samples = argv[1];
|
666
|
-
int n_samples;
|
667
|
-
int n_processors;
|
668
|
-
rb_memory_view_t view;
|
669
|
-
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
670
|
-
switch (argc) {
|
671
|
-
case 2:
|
672
|
-
n_processors = 1;
|
673
|
-
break;
|
674
|
-
case 3:
|
675
|
-
n_processors = 1;
|
676
|
-
break;
|
677
|
-
case 4:
|
678
|
-
n_processors = NUM2INT(argv[3]);
|
679
|
-
break;
|
680
|
-
}
|
681
|
-
if (argc >= 3 && !NIL_P(argv[2])) {
|
682
|
-
n_samples = NUM2INT(argv[2]);
|
683
|
-
if (TYPE(samples) == T_ARRAY) {
|
684
|
-
if (RARRAY_LEN(samples) < n_samples) {
|
685
|
-
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
686
|
-
}
|
687
|
-
}
|
688
|
-
// Should check when samples.respond_to?(:length)?
|
689
|
-
} else if (memory_view_available_p) {
|
690
|
-
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
691
|
-
view.obj = Qnil;
|
692
|
-
rb_raise(rb_eArgError, "unable to get a memory view");
|
693
|
-
}
|
694
|
-
n_samples = view.byte_size / view.item_size;
|
695
|
-
} else {
|
696
|
-
if (TYPE(samples) == T_ARRAY) {
|
697
|
-
n_samples = RARRAY_LEN(samples);
|
698
|
-
} else if (rb_respond_to(samples, id_length)) {
|
699
|
-
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
700
|
-
} else {
|
701
|
-
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
702
|
-
}
|
703
|
-
}
|
704
|
-
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
705
|
-
if (memory_view_available_p) {
|
706
|
-
c_samples = (float *)view.data;
|
707
|
-
} else {
|
708
|
-
if (TYPE(samples) == T_ARRAY) {
|
709
|
-
for (int i = 0; i < n_samples; i++) {
|
710
|
-
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
711
|
-
}
|
712
|
-
} else {
|
713
|
-
// FIXME: use rb_block_call
|
714
|
-
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
715
|
-
for (int i = 0; i < n_samples; i++) {
|
716
|
-
// TODO: check if iter is exhausted and raise ArgumentError
|
717
|
-
VALUE sample = rb_funcall(iter, id_next, 0);
|
718
|
-
c_samples[i] = RFLOAT_VALUE(sample);
|
719
|
-
}
|
720
|
-
}
|
721
|
-
}
|
722
|
-
const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
|
723
|
-
if (0 == result) {
|
724
|
-
return Qnil;
|
725
|
-
} else {
|
726
|
-
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
727
|
-
}
|
728
|
-
}
|
729
|
-
|
730
|
-
/*
|
731
|
-
* Number of segments.
|
732
|
-
*
|
733
|
-
* call-seq:
|
734
|
-
* full_n_segments -> Integer
|
735
|
-
*/
|
736
|
-
static VALUE ruby_whisper_full_n_segments(VALUE self) {
|
737
|
-
ruby_whisper *rw;
|
738
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
739
|
-
return INT2NUM(whisper_full_n_segments(rw->context));
|
740
|
-
}
|
741
|
-
|
742
|
-
/*
|
743
|
-
* Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
|
744
|
-
*
|
745
|
-
* call-seq:
|
746
|
-
* full_lang_id -> Integer
|
747
|
-
*/
|
748
|
-
static VALUE ruby_whisper_full_lang_id(VALUE self) {
|
749
|
-
ruby_whisper *rw;
|
750
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
751
|
-
return INT2NUM(whisper_full_lang_id(rw->context));
|
752
|
-
}
|
753
|
-
|
754
|
-
static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment) {
|
755
|
-
const int c_i_segment = NUM2INT(i_segment);
|
756
|
-
if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) {
|
757
|
-
rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment);
|
758
|
-
}
|
759
|
-
return c_i_segment;
|
760
|
-
}
|
761
|
-
|
762
|
-
/*
|
763
|
-
* Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
764
|
-
*
|
765
|
-
* full_get_segment_t0(3) # => 1668 (16680 ms)
|
766
|
-
*
|
767
|
-
* call-seq:
|
768
|
-
* full_get_segment_t0(segment_index) -> Integer
|
769
|
-
*/
|
770
|
-
static VALUE ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment) {
|
771
|
-
ruby_whisper *rw;
|
772
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
773
|
-
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
774
|
-
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
775
|
-
return INT2NUM(t0);
|
776
|
-
}
|
777
|
-
|
778
|
-
/*
|
779
|
-
* End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
780
|
-
*
|
781
|
-
* full_get_segment_t1(3) # => 1668 (16680 ms)
|
782
|
-
*
|
783
|
-
* call-seq:
|
784
|
-
* full_get_segment_t1(segment_index) -> Integer
|
785
|
-
*/
|
786
|
-
static VALUE ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment) {
|
787
|
-
ruby_whisper *rw;
|
788
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
789
|
-
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
790
|
-
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
791
|
-
return INT2NUM(t1);
|
792
|
-
}
|
793
|
-
|
794
|
-
/*
|
795
|
-
* Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
796
|
-
*
|
797
|
-
* full_get_segment_speacker_turn_next(3) # => true
|
798
|
-
*
|
799
|
-
* call-seq:
|
800
|
-
* full_get_segment_speacker_turn_next(segment_index) -> bool
|
801
|
-
*/
|
802
|
-
static VALUE ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment) {
|
803
|
-
ruby_whisper *rw;
|
804
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
805
|
-
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
806
|
-
const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment);
|
807
|
-
return speaker_turn_next ? Qtrue : Qfalse;
|
808
|
-
}
|
809
|
-
|
810
|
-
/*
|
811
|
-
* Text of a segment indexed by +segment_index+.
|
812
|
-
*
|
813
|
-
* full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
814
|
-
*
|
815
|
-
* call-seq:
|
816
|
-
* full_get_segment_text(segment_index) -> String
|
817
|
-
*/
|
818
|
-
static VALUE ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment) {
|
819
|
-
ruby_whisper *rw;
|
820
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
821
|
-
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
822
|
-
const char * text = whisper_full_get_segment_text(rw->context, c_i_segment);
|
823
|
-
return rb_str_new2(text);
|
824
|
-
}
|
825
|
-
|
826
|
-
/*
|
827
|
-
* params.language = "auto" | "en", etc...
|
828
|
-
*
|
829
|
-
* call-seq:
|
830
|
-
* language = lang_name -> lang_name
|
831
|
-
*/
|
832
|
-
static VALUE ruby_whisper_params_set_language(VALUE self, VALUE value) {
|
833
|
-
ruby_whisper_params *rwp;
|
834
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
835
|
-
if (value == Qfalse || value == Qnil) {
|
836
|
-
rwp->params.language = "auto";
|
837
|
-
} else {
|
838
|
-
rwp->params.language = StringValueCStr(value);
|
839
|
-
}
|
840
|
-
return value;
|
841
|
-
}
|
842
|
-
/*
|
843
|
-
* call-seq:
|
844
|
-
* language -> String
|
845
|
-
*/
|
846
|
-
static VALUE ruby_whisper_params_get_language(VALUE self) {
|
847
|
-
ruby_whisper_params *rwp;
|
848
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
849
|
-
if (rwp->params.language) {
|
850
|
-
return rb_str_new2(rwp->params.language);
|
851
|
-
} else {
|
852
|
-
return rb_str_new2("auto");
|
853
|
-
}
|
854
|
-
}
|
855
|
-
/*
|
856
|
-
* call-seq:
|
857
|
-
* translate = do_translate -> do_translate
|
858
|
-
*/
|
859
|
-
static VALUE ruby_whisper_params_set_translate(VALUE self, VALUE value) {
|
860
|
-
BOOL_PARAMS_SETTER(self, translate, value)
|
861
|
-
}
|
862
|
-
/*
|
863
|
-
* call-seq:
|
864
|
-
* translate -> bool
|
865
|
-
*/
|
866
|
-
static VALUE ruby_whisper_params_get_translate(VALUE self) {
|
867
|
-
BOOL_PARAMS_GETTER(self, translate)
|
868
|
-
}
|
869
|
-
/*
|
870
|
-
* call-seq:
|
871
|
-
* no_context = dont_use_context -> dont_use_context
|
872
|
-
*/
|
873
|
-
static VALUE ruby_whisper_params_set_no_context(VALUE self, VALUE value) {
|
874
|
-
BOOL_PARAMS_SETTER(self, no_context, value)
|
875
|
-
}
|
876
|
-
/*
|
877
|
-
* If true, does not use past transcription (if any) as initial prompt for the decoder.
|
878
|
-
*
|
879
|
-
* call-seq:
|
880
|
-
* no_context -> bool
|
881
|
-
*/
|
882
|
-
static VALUE ruby_whisper_params_get_no_context(VALUE self) {
|
883
|
-
BOOL_PARAMS_GETTER(self, no_context)
|
884
|
-
}
|
885
|
-
/*
|
886
|
-
* call-seq:
|
887
|
-
* single_segment = force_single -> force_single
|
888
|
-
*/
|
889
|
-
static VALUE ruby_whisper_params_set_single_segment(VALUE self, VALUE value) {
|
890
|
-
BOOL_PARAMS_SETTER(self, single_segment, value)
|
891
|
-
}
|
892
|
-
/*
|
893
|
-
* If true, forces single segment output (useful for streaming).
|
894
|
-
*
|
895
|
-
* call-seq:
|
896
|
-
* single_segment -> bool
|
897
|
-
*/
|
898
|
-
static VALUE ruby_whisper_params_get_single_segment(VALUE self) {
|
899
|
-
BOOL_PARAMS_GETTER(self, single_segment)
|
900
|
-
}
|
901
|
-
/*
|
902
|
-
* call-seq:
|
903
|
-
* print_special = force_print -> force_print
|
904
|
-
*/
|
905
|
-
static VALUE ruby_whisper_params_set_print_special(VALUE self, VALUE value) {
|
906
|
-
BOOL_PARAMS_SETTER(self, print_special, value)
|
907
|
-
}
|
908
|
-
/*
|
909
|
-
* If true, prints special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.).
|
910
|
-
*
|
911
|
-
* call-seq:
|
912
|
-
* print_special -> bool
|
913
|
-
*/
|
914
|
-
static VALUE ruby_whisper_params_get_print_special(VALUE self) {
|
915
|
-
BOOL_PARAMS_GETTER(self, print_special)
|
916
|
-
}
|
917
|
-
/*
|
918
|
-
* call-seq:
|
919
|
-
* print_progress = force_print -> force_print
|
920
|
-
*/
|
921
|
-
static VALUE ruby_whisper_params_set_print_progress(VALUE self, VALUE value) {
|
922
|
-
BOOL_PARAMS_SETTER(self, print_progress, value)
|
923
|
-
}
|
924
|
-
/*
|
925
|
-
* If true, prints progress information.
|
926
|
-
*
|
927
|
-
* call-seq:
|
928
|
-
* print_progress -> bool
|
929
|
-
*/
|
930
|
-
static VALUE ruby_whisper_params_get_print_progress(VALUE self) {
|
931
|
-
BOOL_PARAMS_GETTER(self, print_progress)
|
932
|
-
}
|
933
|
-
/*
|
934
|
-
* call-seq:
|
935
|
-
* print_realtime = force_print -> force_print
|
936
|
-
*/
|
937
|
-
static VALUE ruby_whisper_params_set_print_realtime(VALUE self, VALUE value) {
|
938
|
-
BOOL_PARAMS_SETTER(self, print_realtime, value)
|
939
|
-
}
|
940
|
-
/*
|
941
|
-
* If true, prints results from within whisper.cpp. (avoid it, use callback instead)
|
942
|
-
* call-seq:
|
943
|
-
* print_realtime -> bool
|
944
|
-
*/
|
945
|
-
static VALUE ruby_whisper_params_get_print_realtime(VALUE self) {
|
946
|
-
BOOL_PARAMS_GETTER(self, print_realtime)
|
947
|
-
}
|
948
|
-
/*
|
949
|
-
* call-seq:
|
950
|
-
* print_timestamps = force_print -> force_print
|
951
|
-
*/
|
952
|
-
static VALUE ruby_whisper_params_set_print_timestamps(VALUE self, VALUE value) {
|
953
|
-
BOOL_PARAMS_SETTER(self, print_timestamps, value)
|
954
|
-
}
|
955
|
-
/*
|
956
|
-
* If true, prints timestamps for each text segment when printing realtime.
|
957
|
-
*
|
958
|
-
* call-seq:
|
959
|
-
* print_timestamps -> bool
|
960
|
-
*/
|
961
|
-
static VALUE ruby_whisper_params_get_print_timestamps(VALUE self) {
|
962
|
-
BOOL_PARAMS_GETTER(self, print_timestamps)
|
963
|
-
}
|
964
|
-
/*
|
965
|
-
* call-seq:
|
966
|
-
* suppress_blank = force_suppress -> force_suppress
|
967
|
-
*/
|
968
|
-
static VALUE ruby_whisper_params_set_suppress_blank(VALUE self, VALUE value) {
|
969
|
-
BOOL_PARAMS_SETTER(self, suppress_blank, value)
|
970
|
-
}
|
971
|
-
/*
|
972
|
-
* If true, suppresses blank outputs.
|
973
|
-
*
|
974
|
-
* call-seq:
|
975
|
-
* suppress_blank -> bool
|
976
|
-
*/
|
977
|
-
static VALUE ruby_whisper_params_get_suppress_blank(VALUE self) {
|
978
|
-
BOOL_PARAMS_GETTER(self, suppress_blank)
|
979
|
-
}
|
980
|
-
/*
|
981
|
-
* call-seq:
|
982
|
-
* suppress_non_speech_tokens = force_suppress -> force_suppress
|
983
|
-
*/
|
984
|
-
static VALUE ruby_whisper_params_set_suppress_non_speech_tokens(VALUE self, VALUE value) {
|
985
|
-
BOOL_PARAMS_SETTER(self, suppress_non_speech_tokens, value)
|
986
|
-
}
|
987
|
-
/*
|
988
|
-
* If true, suppresses non-speech-tokens.
|
989
|
-
*
|
990
|
-
* call-seq:
|
991
|
-
* suppress_non_speech_tokens -> bool
|
992
|
-
*/
|
993
|
-
static VALUE ruby_whisper_params_get_suppress_non_speech_tokens(VALUE self) {
|
994
|
-
BOOL_PARAMS_GETTER(self, suppress_non_speech_tokens)
|
995
|
-
}
|
996
|
-
/*
|
997
|
-
* If true, enables token-level timestamps.
|
998
|
-
*
|
999
|
-
* call-seq:
|
1000
|
-
* token_timestamps -> bool
|
1001
|
-
*/
|
1002
|
-
static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) {
|
1003
|
-
BOOL_PARAMS_GETTER(self, token_timestamps)
|
1004
|
-
}
|
1005
|
-
/*
|
1006
|
-
* call-seq:
|
1007
|
-
* token_timestamps = force_timestamps -> force_timestamps
|
1008
|
-
*/
|
1009
|
-
static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) {
|
1010
|
-
BOOL_PARAMS_SETTER(self, token_timestamps, value)
|
1011
|
-
}
|
1012
|
-
/*
|
1013
|
-
* If true, split on word rather than on token (when used with max_len).
|
1014
|
-
*
|
1015
|
-
* call-seq:
|
1016
|
-
* translate -> bool
|
1017
|
-
*/
|
1018
|
-
static VALUE ruby_whisper_params_get_split_on_word(VALUE self) {
|
1019
|
-
BOOL_PARAMS_GETTER(self, split_on_word)
|
1020
|
-
}
|
1021
|
-
/*
|
1022
|
-
* call-seq:
|
1023
|
-
* split_on_word = force_split -> force_split
|
1024
|
-
*/
|
1025
|
-
static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
|
1026
|
-
BOOL_PARAMS_SETTER(self, split_on_word, value)
|
1027
|
-
}
|
1028
|
-
/*
|
1029
|
-
* Tokens to provide to the whisper decoder as initial prompt
|
1030
|
-
* these are prepended to any existing text context from a previous call
|
1031
|
-
* use whisper_tokenize() to convert text to tokens.
|
1032
|
-
* Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224).
|
1033
|
-
*
|
1034
|
-
* call-seq:
|
1035
|
-
* initial_prompt -> String
|
1036
|
-
*/
|
1037
|
-
static VALUE ruby_whisper_params_get_initial_prompt(VALUE self) {
|
1038
|
-
ruby_whisper_params *rwp;
|
1039
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1040
|
-
return rwp->params.initial_prompt == nullptr ? Qnil : rb_str_new2(rwp->params.initial_prompt);
|
1041
|
-
}
|
1042
|
-
/*
|
1043
|
-
* call-seq:
|
1044
|
-
* initial_prompt = prompt -> prompt
|
1045
|
-
*/
|
1046
|
-
static VALUE ruby_whisper_params_set_initial_prompt(VALUE self, VALUE value) {
|
1047
|
-
ruby_whisper_params *rwp;
|
1048
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1049
|
-
rwp->params.initial_prompt = StringValueCStr(value);
|
1050
|
-
return value;
|
1051
|
-
}
|
1052
|
-
/*
|
1053
|
-
* If true, enables diarization.
|
1054
|
-
*
|
1055
|
-
* call-seq:
|
1056
|
-
* diarize -> bool
|
1057
|
-
*/
|
1058
|
-
static VALUE ruby_whisper_params_get_diarize(VALUE self) {
|
1059
|
-
ruby_whisper_params *rwp;
|
1060
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1061
|
-
if (rwp->diarize) {
|
1062
|
-
return Qtrue;
|
1063
|
-
} else {
|
1064
|
-
return Qfalse;
|
1065
|
-
}
|
1066
|
-
}
|
1067
|
-
/*
|
1068
|
-
* call-seq:
|
1069
|
-
* diarize = force_diarize -> force_diarize
|
1070
|
-
*/
|
1071
|
-
static VALUE ruby_whisper_params_set_diarize(VALUE self, VALUE value) {
|
1072
|
-
ruby_whisper_params *rwp;
|
1073
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1074
|
-
if (value == Qfalse || value == Qnil) {
|
1075
|
-
rwp->diarize = false;
|
1076
|
-
} else {
|
1077
|
-
rwp->diarize = true;
|
1078
|
-
} \
|
1079
|
-
return value;
|
1080
|
-
}
|
1081
|
-
|
1082
|
-
/*
|
1083
|
-
* Start offset in ms.
|
1084
|
-
*
|
1085
|
-
* call-seq:
|
1086
|
-
* offset -> Integer
|
1087
|
-
*/
|
1088
|
-
static VALUE ruby_whisper_params_get_offset(VALUE self) {
|
1089
|
-
ruby_whisper_params *rwp;
|
1090
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1091
|
-
return INT2NUM(rwp->params.offset_ms);
|
1092
|
-
}
|
1093
|
-
/*
|
1094
|
-
* call-seq:
|
1095
|
-
* offset = offset_ms -> offset_ms
|
1096
|
-
*/
|
1097
|
-
static VALUE ruby_whisper_params_set_offset(VALUE self, VALUE value) {
|
1098
|
-
ruby_whisper_params *rwp;
|
1099
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1100
|
-
rwp->params.offset_ms = NUM2INT(value);
|
1101
|
-
return value;
|
1102
|
-
}
|
1103
|
-
/*
|
1104
|
-
* Audio duration to process in ms.
|
1105
|
-
*
|
1106
|
-
* call-seq:
|
1107
|
-
* duration -> Integer
|
1108
|
-
*/
|
1109
|
-
static VALUE ruby_whisper_params_get_duration(VALUE self) {
|
1110
|
-
ruby_whisper_params *rwp;
|
1111
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1112
|
-
return INT2NUM(rwp->params.duration_ms);
|
1113
|
-
}
|
1114
|
-
/*
|
1115
|
-
* call-seq:
|
1116
|
-
* duration = duration_ms -> duration_ms
|
1117
|
-
*/
|
1118
|
-
static VALUE ruby_whisper_params_set_duration(VALUE self, VALUE value) {
|
1119
|
-
ruby_whisper_params *rwp;
|
1120
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1121
|
-
rwp->params.duration_ms = NUM2INT(value);
|
1122
|
-
return value;
|
1123
|
-
}
|
1124
|
-
|
1125
|
-
/*
|
1126
|
-
* Max tokens to use from past text as prompt for the decoder.
|
1127
|
-
*
|
1128
|
-
* call-seq:
|
1129
|
-
* max_text_tokens -> Integer
|
1130
|
-
*/
|
1131
|
-
static VALUE ruby_whisper_params_get_max_text_tokens(VALUE self) {
|
1132
|
-
ruby_whisper_params *rwp;
|
1133
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1134
|
-
return INT2NUM(rwp->params.n_max_text_ctx);
|
1135
|
-
}
|
1136
|
-
/*
|
1137
|
-
* call-seq:
|
1138
|
-
* max_text_tokens = n_tokens -> n_tokens
|
1139
|
-
*/
|
1140
|
-
static VALUE ruby_whisper_params_set_max_text_tokens(VALUE self, VALUE value) {
|
1141
|
-
ruby_whisper_params *rwp;
|
1142
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1143
|
-
rwp->params.n_max_text_ctx = NUM2INT(value);
|
1144
|
-
return value;
|
1145
|
-
}
|
1146
|
-
/*
|
1147
|
-
* call-seq:
|
1148
|
-
* temperature -> Float
|
1149
|
-
*/
|
1150
|
-
static VALUE ruby_whisper_params_get_temperature(VALUE self) {
|
1151
|
-
ruby_whisper_params *rwp;
|
1152
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1153
|
-
return DBL2NUM(rwp->params.temperature);
|
1154
|
-
}
|
1155
|
-
/*
|
1156
|
-
* call-seq:
|
1157
|
-
* temperature = temp -> temp
|
1158
|
-
*/
|
1159
|
-
static VALUE ruby_whisper_params_set_temperature(VALUE self, VALUE value) {
|
1160
|
-
ruby_whisper_params *rwp;
|
1161
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1162
|
-
rwp->params.temperature = RFLOAT_VALUE(value);
|
1163
|
-
return value;
|
1164
|
-
}
|
1165
|
-
/*
|
1166
|
-
* See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
|
1167
|
-
*
|
1168
|
-
* call-seq:
|
1169
|
-
* max_initial_ts -> Flaot
|
1170
|
-
*/
|
1171
|
-
static VALUE ruby_whisper_params_get_max_initial_ts(VALUE self) {
|
1172
|
-
ruby_whisper_params *rwp;
|
1173
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1174
|
-
return DBL2NUM(rwp->params.max_initial_ts);
|
1175
|
-
}
|
1176
|
-
/*
|
1177
|
-
* call-seq:
|
1178
|
-
* max_initial_ts = timestamp -> timestamp
|
1179
|
-
*/
|
1180
|
-
static VALUE ruby_whisper_params_set_max_initial_ts(VALUE self, VALUE value) {
|
1181
|
-
ruby_whisper_params *rwp;
|
1182
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1183
|
-
rwp->params.max_initial_ts = RFLOAT_VALUE(value);
|
1184
|
-
return value;
|
1185
|
-
}
|
1186
|
-
/*
|
1187
|
-
* call-seq:
|
1188
|
-
* length_penalty -> Float
|
1189
|
-
*/
|
1190
|
-
static VALUE ruby_whisper_params_get_length_penalty(VALUE self) {
|
1191
|
-
ruby_whisper_params *rwp;
|
1192
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1193
|
-
return DBL2NUM(rwp->params.length_penalty);
|
1194
|
-
}
|
1195
|
-
/*
|
1196
|
-
* call-seq:
|
1197
|
-
* length_penalty = penalty -> penalty
|
1198
|
-
*/
|
1199
|
-
static VALUE ruby_whisper_params_set_length_penalty(VALUE self, VALUE value) {
|
1200
|
-
ruby_whisper_params *rwp;
|
1201
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1202
|
-
rwp->params.length_penalty = RFLOAT_VALUE(value);
|
1203
|
-
return value;
|
1204
|
-
}
|
1205
|
-
/*
|
1206
|
-
* call-seq:
|
1207
|
-
* temperature_inc -> Float
|
1208
|
-
*/
|
1209
|
-
static VALUE ruby_whisper_params_get_temperature_inc(VALUE self) {
|
1210
|
-
ruby_whisper_params *rwp;
|
1211
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1212
|
-
return DBL2NUM(rwp->params.temperature_inc);
|
1213
|
-
}
|
1214
|
-
/*
|
1215
|
-
* call-seq:
|
1216
|
-
* temperature_inc = inc -> inc
|
1217
|
-
*/
|
1218
|
-
static VALUE ruby_whisper_params_set_temperature_inc(VALUE self, VALUE value) {
|
1219
|
-
ruby_whisper_params *rwp;
|
1220
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1221
|
-
rwp->params.temperature_inc = RFLOAT_VALUE(value);
|
1222
|
-
return value;
|
1223
|
-
}
|
1224
|
-
/*
|
1225
|
-
* Similar to OpenAI's "compression_ratio_threshold"
|
1226
|
-
*
|
1227
|
-
* call-seq:
|
1228
|
-
* entropy_thold -> Float
|
1229
|
-
*/
|
1230
|
-
static VALUE ruby_whisper_params_get_entropy_thold(VALUE self) {
|
1231
|
-
ruby_whisper_params *rwp;
|
1232
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1233
|
-
return DBL2NUM(rwp->params.entropy_thold);
|
1234
|
-
}
|
1235
|
-
/*
|
1236
|
-
* call-seq:
|
1237
|
-
* entropy_thold = threshold -> threshold
|
1238
|
-
*/
|
1239
|
-
static VALUE ruby_whisper_params_set_entropy_thold(VALUE self, VALUE value) {
|
1240
|
-
ruby_whisper_params *rwp;
|
1241
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1242
|
-
rwp->params.entropy_thold = RFLOAT_VALUE(value);
|
1243
|
-
return value;
|
1244
|
-
}
|
1245
|
-
/*
|
1246
|
-
* call-seq:
|
1247
|
-
* logprob_thold -> Float
|
1248
|
-
*/
|
1249
|
-
static VALUE ruby_whisper_params_get_logprob_thold(VALUE self) {
|
1250
|
-
ruby_whisper_params *rwp;
|
1251
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1252
|
-
return DBL2NUM(rwp->params.logprob_thold);
|
1253
|
-
}
|
1254
|
-
/*
|
1255
|
-
* call-seq:
|
1256
|
-
* logprob_thold = threshold -> threshold
|
1257
|
-
*/
|
1258
|
-
static VALUE ruby_whisper_params_set_logprob_thold(VALUE self, VALUE value) {
|
1259
|
-
ruby_whisper_params *rwp;
|
1260
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1261
|
-
rwp->params.logprob_thold = RFLOAT_VALUE(value);
|
1262
|
-
return value;
|
1263
|
-
}
|
1264
|
-
/*
|
1265
|
-
* call-seq:
|
1266
|
-
* no_speech_thold -> Float
|
1267
|
-
*/
|
1268
|
-
static VALUE ruby_whisper_params_get_no_speech_thold(VALUE self) {
|
1269
|
-
ruby_whisper_params *rwp;
|
1270
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1271
|
-
return DBL2NUM(rwp->params.no_speech_thold);
|
1272
|
-
}
|
1273
|
-
/*
|
1274
|
-
* call-seq:
|
1275
|
-
* no_speech_thold = threshold -> threshold
|
1276
|
-
*/
|
1277
|
-
static VALUE ruby_whisper_params_set_no_speech_thold(VALUE self, VALUE value) {
|
1278
|
-
ruby_whisper_params *rwp;
|
1279
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1280
|
-
rwp->params.no_speech_thold = RFLOAT_VALUE(value);
|
1281
|
-
return value;
|
1282
|
-
}
|
1283
|
-
/*
|
1284
|
-
* Sets new segment callback, called for every newly generated text segment.
|
1285
|
-
*
|
1286
|
-
* params.new_segment_callback = ->(context, _, n_new, user_data) {
|
1287
|
-
* # ...
|
1288
|
-
* }
|
1289
|
-
*
|
1290
|
-
* call-seq:
|
1291
|
-
* new_segment_callback = callback -> callback
|
1292
|
-
*/
|
1293
|
-
static VALUE ruby_whisper_params_set_new_segment_callback(VALUE self, VALUE value) {
|
1294
|
-
ruby_whisper_params *rwp;
|
1295
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1296
|
-
rwp->new_segment_callback_container->callback = value;
|
1297
|
-
return value;
|
1298
|
-
}
|
1299
|
-
/*
|
1300
|
-
* Sets user data passed to the last argument of new segment callback.
|
1301
|
-
*
|
1302
|
-
* call-seq:
|
1303
|
-
* new_segment_callback_user_data = user_data -> use_data
|
1304
|
-
*/
|
1305
|
-
static VALUE ruby_whisper_params_set_new_segment_callback_user_data(VALUE self, VALUE value) {
|
1306
|
-
ruby_whisper_params *rwp;
|
1307
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1308
|
-
rwp->new_segment_callback_container->user_data = value;
|
1309
|
-
return value;
|
1310
|
-
}
|
1311
|
-
/*
|
1312
|
-
* Sets progress callback, called on each progress update.
|
1313
|
-
*
|
1314
|
-
* params.new_segment_callback = ->(context, _, n_new, user_data) {
|
1315
|
-
* # ...
|
1316
|
-
* }
|
1317
|
-
*
|
1318
|
-
* call-seq:
|
1319
|
-
* progress_callback = callback -> callback
|
1320
|
-
*/
|
1321
|
-
static VALUE ruby_whisper_params_set_progress_callback(VALUE self, VALUE value) {
|
1322
|
-
ruby_whisper_params *rwp;
|
1323
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1324
|
-
rwp->progress_callback_container->callback = value;
|
1325
|
-
return value;
|
1326
|
-
}
|
1327
|
-
/*
|
1328
|
-
* Sets user data passed to the last argument of progress callback.
|
1329
|
-
*
|
1330
|
-
* call-seq:
|
1331
|
-
* progress_callback_user_data = user_data -> use_data
|
1332
|
-
*/
|
1333
|
-
static VALUE ruby_whisper_params_set_progress_callback_user_data(VALUE self, VALUE value) {
|
1334
|
-
ruby_whisper_params *rwp;
|
1335
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1336
|
-
rwp->progress_callback_container->user_data = value;
|
1337
|
-
return value;
|
1338
|
-
}
|
1339
|
-
/*
|
1340
|
-
* Sets abort callback, called to check if the process should be aborted.
|
1341
|
-
*
|
1342
|
-
* params.abort_callback = ->(user_data) {
|
1343
|
-
* # ...
|
1344
|
-
* }
|
1345
|
-
*
|
1346
|
-
* call-seq:
|
1347
|
-
* abort_callback = callback -> callback
|
1348
|
-
*/
|
1349
|
-
static VALUE ruby_whisper_params_set_abort_callback(VALUE self, VALUE value) {
|
1350
|
-
ruby_whisper_params *rwp;
|
1351
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1352
|
-
rwp->abort_callback_container->callback = value;
|
1353
|
-
return value;
|
1354
|
-
}
|
1355
|
-
/*
|
1356
|
-
* Sets user data passed to the last argument of abort callback.
|
1357
|
-
*
|
1358
|
-
* call-seq:
|
1359
|
-
* abort_callback_user_data = user_data -> use_data
|
1360
|
-
*/
|
1361
|
-
static VALUE ruby_whisper_params_set_abort_callback_user_data(VALUE self, VALUE value) {
|
1362
|
-
ruby_whisper_params *rwp;
|
1363
|
-
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
1364
|
-
rwp->abort_callback_container->user_data = value;
|
1365
|
-
return value;
|
1366
|
-
}
|
1367
|
-
|
1368
|
-
// High level API
|
1369
|
-
|
1370
|
-
typedef struct {
|
1371
|
-
VALUE context;
|
1372
|
-
int index;
|
1373
|
-
} ruby_whisper_segment;
|
1374
|
-
|
1375
|
-
typedef struct {
|
1376
|
-
VALUE context;
|
1377
|
-
} ruby_whisper_model;
|
1378
|
-
|
1379
|
-
static void rb_whisper_segment_mark(ruby_whisper_segment *rws) {
|
1380
|
-
rb_gc_mark(rws->context);
|
1381
|
-
}
|
1382
|
-
|
1383
|
-
static VALUE ruby_whisper_segment_allocate(VALUE klass) {
|
1384
|
-
ruby_whisper_segment *rws;
|
1385
|
-
rws = ALLOC(ruby_whisper_segment);
|
1386
|
-
return Data_Wrap_Struct(klass, rb_whisper_segment_mark, RUBY_DEFAULT_FREE, rws);
|
1387
|
-
}
|
1388
|
-
|
1389
|
-
static VALUE rb_whisper_segment_initialize(VALUE context, int index) {
|
1390
|
-
ruby_whisper_segment *rws;
|
1391
|
-
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
1392
|
-
Data_Get_Struct(segment, ruby_whisper_segment, rws);
|
1393
|
-
rws->context = context;
|
1394
|
-
rws->index = index;
|
1395
|
-
return segment;
|
1396
|
-
};
|
1397
|
-
|
1398
|
-
/*
|
1399
|
-
* Yields each Whisper::Segment:
|
1400
|
-
*
|
1401
|
-
* whisper.transcribe("path/to/audio.wav", params)
|
1402
|
-
* whisper.each_segment do |segment|
|
1403
|
-
* puts segment.text
|
1404
|
-
* end
|
1405
|
-
*
|
1406
|
-
* Returns an Enumerator if no block given:
|
1407
|
-
*
|
1408
|
-
* whisper.transcribe("path/to/audio.wav", params)
|
1409
|
-
* enum = whisper.each_segment
|
1410
|
-
* enum.to_a # => [#<Whisper::Segment>, ...]
|
1411
|
-
*
|
1412
|
-
* call-seq:
|
1413
|
-
* each_segment {|segment| ... }
|
1414
|
-
* each_segment -> Enumerator
|
1415
|
-
*/
|
1416
|
-
static VALUE ruby_whisper_each_segment(VALUE self) {
|
1417
|
-
if (!rb_block_given_p()) {
|
1418
|
-
const VALUE method_name = rb_funcall(self, id___method__, 0);
|
1419
|
-
return rb_funcall(self, id_to_enum, 1, method_name);
|
1420
|
-
}
|
1421
|
-
|
1422
|
-
ruby_whisper *rw;
|
1423
|
-
Data_Get_Struct(self, ruby_whisper, rw);
|
1424
|
-
|
1425
|
-
const int n_segments = whisper_full_n_segments(rw->context);
|
1426
|
-
for (int i = 0; i < n_segments; ++i) {
|
1427
|
-
rb_yield(rb_whisper_segment_initialize(self, i));
|
1428
|
-
}
|
1429
|
-
|
1430
|
-
return self;
|
1431
|
-
}
|
1432
|
-
|
1433
|
-
/*
|
1434
|
-
* Hook called on new segment. Yields each Whisper::Segment.
|
1435
|
-
*
|
1436
|
-
* whisper.on_new_segment do |segment|
|
1437
|
-
* # ...
|
1438
|
-
* end
|
1439
|
-
*
|
1440
|
-
* call-seq:
|
1441
|
-
* on_new_segment {|segment| ... }
|
1442
|
-
*/
|
1443
|
-
static VALUE ruby_whisper_params_on_new_segment(VALUE self) {
|
1444
|
-
ruby_whisper_params *rws;
|
1445
|
-
Data_Get_Struct(self, ruby_whisper_params, rws);
|
1446
|
-
const VALUE blk = rb_block_proc();
|
1447
|
-
rb_ary_push(rws->new_segment_callback_container->callbacks, blk);
|
1448
|
-
return Qnil;
|
1449
|
-
}
|
1450
|
-
|
1451
|
-
/*
|
1452
|
-
* Hook called on progress update. Yields each progress Integer between 0 and 100.
|
1453
|
-
*
|
1454
|
-
* whisper.on_progress do |progress|
|
1455
|
-
* # ...
|
1456
|
-
* end
|
1457
|
-
*
|
1458
|
-
* call-seq:
|
1459
|
-
* on_progress {|progress| ... }
|
1460
|
-
*/
|
1461
|
-
static VALUE ruby_whisper_params_on_progress(VALUE self) {
|
1462
|
-
ruby_whisper_params *rws;
|
1463
|
-
Data_Get_Struct(self, ruby_whisper_params, rws);
|
1464
|
-
const VALUE blk = rb_block_proc();
|
1465
|
-
rb_ary_push(rws->progress_callback_container->callbacks, blk);
|
1466
|
-
return Qnil;
|
1467
|
-
}
|
1468
|
-
|
1469
|
-
/*
|
1470
|
-
* Call block to determine whether abort or not. Return +true+ when you want to abort.
|
1471
|
-
*
|
1472
|
-
* params.abort_on do
|
1473
|
-
* if some_condition
|
1474
|
-
* true # abort
|
1475
|
-
* else
|
1476
|
-
* false # continue
|
1477
|
-
* end
|
1478
|
-
* end
|
1479
|
-
*
|
1480
|
-
* call-seq:
|
1481
|
-
* abort_on { ... }
|
1482
|
-
*/
|
1483
|
-
static VALUE ruby_whisper_params_abort_on(VALUE self) {
|
1484
|
-
ruby_whisper_params *rws;
|
1485
|
-
Data_Get_Struct(self, ruby_whisper_params, rws);
|
1486
|
-
const VALUE blk = rb_block_proc();
|
1487
|
-
rb_ary_push(rws->abort_callback_container->callbacks, blk);
|
1488
|
-
return Qnil;
|
1489
|
-
}
|
1490
|
-
|
1491
|
-
/*
|
1492
|
-
* Start time in milliseconds.
|
1493
|
-
*
|
1494
|
-
* call-seq:
|
1495
|
-
* start_time -> Integer
|
1496
|
-
*/
|
1497
|
-
static VALUE ruby_whisper_segment_get_start_time(VALUE self) {
|
1498
|
-
ruby_whisper_segment *rws;
|
1499
|
-
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
1500
|
-
ruby_whisper *rw;
|
1501
|
-
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
1502
|
-
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
1503
|
-
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
1504
|
-
return INT2NUM(t0 * 10);
|
1505
|
-
}
|
1506
|
-
|
1507
|
-
/*
|
1508
|
-
* End time in milliseconds.
|
1509
|
-
*
|
1510
|
-
* call-seq:
|
1511
|
-
* end_time -> Integer
|
1512
|
-
*/
|
1513
|
-
static VALUE ruby_whisper_segment_get_end_time(VALUE self) {
|
1514
|
-
ruby_whisper_segment *rws;
|
1515
|
-
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
1516
|
-
ruby_whisper *rw;
|
1517
|
-
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
1518
|
-
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
1519
|
-
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
1520
|
-
return INT2NUM(t1 * 10);
|
1521
|
-
}
|
1522
|
-
|
1523
|
-
/*
|
1524
|
-
* Whether the next segment is predicted as a speaker turn.
|
1525
|
-
*
|
1526
|
-
* call-seq:
|
1527
|
-
* speaker_turn_next? -> bool
|
1528
|
-
*/
|
1529
|
-
static VALUE ruby_whisper_segment_get_speaker_turn_next(VALUE self) {
|
1530
|
-
ruby_whisper_segment *rws;
|
1531
|
-
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
1532
|
-
ruby_whisper *rw;
|
1533
|
-
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
1534
|
-
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
|
1535
|
-
}
|
1536
|
-
|
1537
|
-
/*
|
1538
|
-
* call-seq:
|
1539
|
-
* text -> String
|
1540
|
-
*/
|
1541
|
-
static VALUE ruby_whisper_segment_get_text(VALUE self) {
|
1542
|
-
ruby_whisper_segment *rws;
|
1543
|
-
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
1544
|
-
ruby_whisper *rw;
|
1545
|
-
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
1546
|
-
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
|
1547
|
-
return rb_str_new2(text);
|
1548
|
-
}
|
1549
|
-
|
1550
|
-
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
1551
|
-
rb_gc_mark(rwm->context);
|
1552
|
-
}
|
1553
|
-
|
1554
|
-
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
1555
|
-
ruby_whisper_model *rwm;
|
1556
|
-
rwm = ALLOC(ruby_whisper_model);
|
1557
|
-
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
1558
|
-
}
|
1559
|
-
|
1560
|
-
static VALUE rb_whisper_model_initialize(VALUE context) {
|
1561
|
-
ruby_whisper_model *rwm;
|
1562
|
-
const VALUE model = ruby_whisper_model_allocate(cModel);
|
1563
|
-
Data_Get_Struct(model, ruby_whisper_model, rwm);
|
1564
|
-
rwm->context = context;
|
1565
|
-
return model;
|
1566
|
-
};
|
1567
|
-
|
1568
|
-
/*
|
1569
|
-
* call-seq:
|
1570
|
-
* model -> Whisper::Model
|
1571
|
-
*/
|
1572
|
-
static VALUE ruby_whisper_get_model(VALUE self) {
|
1573
|
-
return rb_whisper_model_initialize(self);
|
1574
|
-
}
|
1575
|
-
|
1576
|
-
/*
|
1577
|
-
* call-seq:
|
1578
|
-
* n_vocab -> Integer
|
1579
|
-
*/
|
1580
|
-
static VALUE ruby_whisper_c_model_n_vocab(VALUE self) {
|
1581
|
-
ruby_whisper_model *rwm;
|
1582
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1583
|
-
ruby_whisper *rw;
|
1584
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1585
|
-
return INT2NUM(whisper_model_n_vocab(rw->context));
|
1586
|
-
}
|
1587
|
-
|
1588
|
-
/*
|
1589
|
-
* call-seq:
|
1590
|
-
* n_audio_ctx -> Integer
|
1591
|
-
*/
|
1592
|
-
static VALUE ruby_whisper_c_model_n_audio_ctx(VALUE self) {
|
1593
|
-
ruby_whisper_model *rwm;
|
1594
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1595
|
-
ruby_whisper *rw;
|
1596
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1597
|
-
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
1598
|
-
}
|
1599
|
-
|
1600
|
-
/*
|
1601
|
-
* call-seq:
|
1602
|
-
* n_audio_state -> Integer
|
1603
|
-
*/
|
1604
|
-
static VALUE ruby_whisper_c_model_n_audio_state(VALUE self) {
|
1605
|
-
ruby_whisper_model *rwm;
|
1606
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1607
|
-
ruby_whisper *rw;
|
1608
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1609
|
-
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
1610
|
-
}
|
1611
|
-
|
1612
|
-
/*
|
1613
|
-
* call-seq:
|
1614
|
-
* n_audio_head -> Integer
|
1615
|
-
*/
|
1616
|
-
static VALUE ruby_whisper_c_model_n_audio_head(VALUE self) {
|
1617
|
-
ruby_whisper_model *rwm;
|
1618
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1619
|
-
ruby_whisper *rw;
|
1620
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1621
|
-
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
1622
|
-
}
|
1623
|
-
|
1624
|
-
/*
|
1625
|
-
* call-seq:
|
1626
|
-
* n_audio_layer -> Integer
|
1627
|
-
*/
|
1628
|
-
static VALUE ruby_whisper_c_model_n_audio_layer(VALUE self) {
|
1629
|
-
ruby_whisper_model *rwm;
|
1630
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1631
|
-
ruby_whisper *rw;
|
1632
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1633
|
-
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
1634
|
-
}
|
1635
|
-
|
1636
|
-
/*
|
1637
|
-
* call-seq:
|
1638
|
-
* n_text_ctx -> Integer
|
1639
|
-
*/
|
1640
|
-
static VALUE ruby_whisper_c_model_n_text_ctx(VALUE self) {
|
1641
|
-
ruby_whisper_model *rwm;
|
1642
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1643
|
-
ruby_whisper *rw;
|
1644
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1645
|
-
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
1646
|
-
}
|
1647
|
-
|
1648
|
-
/*
|
1649
|
-
* call-seq:
|
1650
|
-
* n_text_state -> Integer
|
1651
|
-
*/
|
1652
|
-
static VALUE ruby_whisper_c_model_n_text_state(VALUE self) {
|
1653
|
-
ruby_whisper_model *rwm;
|
1654
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1655
|
-
ruby_whisper *rw;
|
1656
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1657
|
-
return INT2NUM(whisper_model_n_text_state(rw->context));
|
1658
|
-
}
|
1659
|
-
|
1660
|
-
/*
|
1661
|
-
* call-seq:
|
1662
|
-
* n_text_head -> Integer
|
1663
|
-
*/
|
1664
|
-
static VALUE ruby_whisper_c_model_n_text_head(VALUE self) {
|
1665
|
-
ruby_whisper_model *rwm;
|
1666
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1667
|
-
ruby_whisper *rw;
|
1668
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1669
|
-
return INT2NUM(whisper_model_n_text_head(rw->context));
|
1670
|
-
}
|
1671
|
-
|
1672
|
-
/*
|
1673
|
-
* call-seq:
|
1674
|
-
* n_text_layer -> Integer
|
1675
|
-
*/
|
1676
|
-
static VALUE ruby_whisper_c_model_n_text_layer(VALUE self) {
|
1677
|
-
ruby_whisper_model *rwm;
|
1678
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1679
|
-
ruby_whisper *rw;
|
1680
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1681
|
-
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
1682
|
-
}
|
1683
|
-
|
1684
|
-
/*
|
1685
|
-
* call-seq:
|
1686
|
-
* n_mels -> Integer
|
1687
|
-
*/
|
1688
|
-
static VALUE ruby_whisper_c_model_n_mels(VALUE self) {
|
1689
|
-
ruby_whisper_model *rwm;
|
1690
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1691
|
-
ruby_whisper *rw;
|
1692
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1693
|
-
return INT2NUM(whisper_model_n_mels(rw->context));
|
1694
|
-
}
|
1695
|
-
|
1696
|
-
/*
|
1697
|
-
* call-seq:
|
1698
|
-
* ftype -> Integer
|
1699
|
-
*/
|
1700
|
-
static VALUE ruby_whisper_c_model_ftype(VALUE self) {
|
1701
|
-
ruby_whisper_model *rwm;
|
1702
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1703
|
-
ruby_whisper *rw;
|
1704
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1705
|
-
return INT2NUM(whisper_model_ftype(rw->context));
|
1706
|
-
}
|
1707
|
-
|
1708
|
-
/*
|
1709
|
-
* call-seq:
|
1710
|
-
* type -> String
|
1711
|
-
*/
|
1712
|
-
static VALUE ruby_whisper_c_model_type(VALUE self) {
|
1713
|
-
ruby_whisper_model *rwm;
|
1714
|
-
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
1715
|
-
ruby_whisper *rw;
|
1716
|
-
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
1717
|
-
return rb_str_new2(whisper_model_type_readable(rw->context));
|
1718
|
-
}
|
1719
|
-
|
1720
|
-
static VALUE ruby_whisper_error_initialize(VALUE self, VALUE code) {
|
1721
|
-
const int c_code = NUM2INT(code);
|
1722
|
-
char *raw_message;
|
1723
|
-
switch (c_code) {
|
1724
|
-
case -2:
|
1725
|
-
raw_message = "failed to compute log mel spectrogram";
|
1726
|
-
break;
|
1727
|
-
case -3:
|
1728
|
-
raw_message = "failed to auto-detect language";
|
1729
|
-
break;
|
1730
|
-
case -4:
|
1731
|
-
raw_message = "too many decoders requested";
|
1732
|
-
break;
|
1733
|
-
case -5:
|
1734
|
-
raw_message = "audio_ctx is larger than the maximum allowed";
|
1735
|
-
break;
|
1736
|
-
case -6:
|
1737
|
-
raw_message = "failed to encode";
|
1738
|
-
break;
|
1739
|
-
case -7:
|
1740
|
-
raw_message = "whisper_kv_cache_init() failed for self-attention cache";
|
1741
|
-
break;
|
1742
|
-
case -8:
|
1743
|
-
raw_message = "failed to decode";
|
1744
|
-
break;
|
1745
|
-
case -9:
|
1746
|
-
raw_message = "failed to decode";
|
1747
|
-
break;
|
1748
|
-
default:
|
1749
|
-
raw_message = "unknown error";
|
1750
|
-
break;
|
1751
|
-
}
|
1752
|
-
const VALUE message = rb_str_new2(raw_message);
|
1753
|
-
rb_call_super(1, &message);
|
1754
|
-
rb_iv_set(self, "@code", code);
|
1755
|
-
|
1756
|
-
return self;
|
1757
|
-
}
|
1758
|
-
|
1759
|
-
|
1760
|
-
void Init_whisper() {
|
1761
|
-
id_to_s = rb_intern("to_s");
|
1762
|
-
id_call = rb_intern("call");
|
1763
|
-
id___method__ = rb_intern("__method__");
|
1764
|
-
id_to_enum = rb_intern("to_enum");
|
1765
|
-
id_length = rb_intern("length");
|
1766
|
-
id_next = rb_intern("next");
|
1767
|
-
id_new = rb_intern("new");
|
1768
|
-
id_to_path = rb_intern("to_path");
|
1769
|
-
id_pre_converted_models = rb_intern("pre_converted_models");
|
1770
|
-
|
1771
|
-
mWhisper = rb_define_module("Whisper");
|
1772
|
-
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
|
1773
|
-
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
|
1774
|
-
eError = rb_define_class_under(mWhisper, "Error", rb_eStandardError);
|
1775
|
-
|
1776
|
-
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
1777
|
-
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
1778
|
-
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
1779
|
-
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
1780
|
-
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
1781
|
-
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
1782
|
-
|
1783
|
-
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
1784
|
-
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
1785
|
-
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
1786
|
-
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
1787
|
-
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
1788
|
-
rb_define_singleton_method(mWhisper, "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
1789
|
-
|
1790
|
-
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
1791
|
-
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
1792
|
-
|
1793
|
-
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
1794
|
-
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
1795
|
-
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
1796
|
-
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
1797
|
-
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
1798
|
-
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
1799
|
-
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
1800
|
-
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
1801
|
-
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
1802
|
-
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
1803
|
-
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
1804
|
-
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
1805
|
-
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
1806
|
-
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
1807
|
-
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
1808
|
-
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
1809
|
-
rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1);
|
1810
|
-
rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1);
|
1811
|
-
rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1);
|
1812
|
-
rb_define_method(cContext, "full", ruby_whisper_full, -1);
|
1813
|
-
rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1);
|
1814
|
-
|
1815
|
-
rb_define_alloc_func(cParams, ruby_whisper_params_allocate);
|
1816
|
-
|
1817
|
-
rb_define_method(cParams, "language=", ruby_whisper_params_set_language, 1);
|
1818
|
-
rb_define_method(cParams, "language", ruby_whisper_params_get_language, 0);
|
1819
|
-
rb_define_method(cParams, "translate=", ruby_whisper_params_set_translate, 1);
|
1820
|
-
rb_define_method(cParams, "translate", ruby_whisper_params_get_translate, 0);
|
1821
|
-
rb_define_method(cParams, "no_context=", ruby_whisper_params_set_no_context, 1);
|
1822
|
-
rb_define_method(cParams, "no_context", ruby_whisper_params_get_no_context, 0);
|
1823
|
-
rb_define_method(cParams, "single_segment=", ruby_whisper_params_set_single_segment, 1);
|
1824
|
-
rb_define_method(cParams, "single_segment", ruby_whisper_params_get_single_segment, 0);
|
1825
|
-
rb_define_method(cParams, "print_special", ruby_whisper_params_get_print_special, 0);
|
1826
|
-
rb_define_method(cParams, "print_special=", ruby_whisper_params_set_print_special, 1);
|
1827
|
-
rb_define_method(cParams, "print_progress", ruby_whisper_params_get_print_progress, 0);
|
1828
|
-
rb_define_method(cParams, "print_progress=", ruby_whisper_params_set_print_progress, 1);
|
1829
|
-
rb_define_method(cParams, "print_realtime", ruby_whisper_params_get_print_realtime, 0);
|
1830
|
-
rb_define_method(cParams, "print_realtime=", ruby_whisper_params_set_print_realtime, 1);
|
1831
|
-
rb_define_method(cParams, "print_timestamps", ruby_whisper_params_get_print_timestamps, 0);
|
1832
|
-
rb_define_method(cParams, "print_timestamps=", ruby_whisper_params_set_print_timestamps, 1);
|
1833
|
-
rb_define_method(cParams, "suppress_blank", ruby_whisper_params_get_suppress_blank, 0);
|
1834
|
-
rb_define_method(cParams, "suppress_blank=", ruby_whisper_params_set_suppress_blank, 1);
|
1835
|
-
rb_define_method(cParams, "suppress_non_speech_tokens", ruby_whisper_params_get_suppress_non_speech_tokens, 0);
|
1836
|
-
rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1);
|
1837
|
-
rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0);
|
1838
|
-
rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
|
1839
|
-
rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
|
1840
|
-
rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
|
1841
|
-
rb_define_method(cParams, "initial_prompt", ruby_whisper_params_get_initial_prompt, 0);
|
1842
|
-
rb_define_method(cParams, "initial_prompt=", ruby_whisper_params_set_initial_prompt, 1);
|
1843
|
-
rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
|
1844
|
-
rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1);
|
1845
|
-
|
1846
|
-
rb_define_method(cParams, "offset", ruby_whisper_params_get_offset, 0);
|
1847
|
-
rb_define_method(cParams, "offset=", ruby_whisper_params_set_offset, 1);
|
1848
|
-
rb_define_method(cParams, "duration", ruby_whisper_params_get_duration, 0);
|
1849
|
-
rb_define_method(cParams, "duration=", ruby_whisper_params_set_duration, 1);
|
1850
|
-
|
1851
|
-
rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0);
|
1852
|
-
rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1);
|
1853
|
-
rb_define_method(cParams, "temperature", ruby_whisper_params_get_temperature, 0);
|
1854
|
-
rb_define_method(cParams, "temperature=", ruby_whisper_params_set_temperature, 1);
|
1855
|
-
rb_define_method(cParams, "max_initial_ts", ruby_whisper_params_get_max_initial_ts, 0);
|
1856
|
-
rb_define_method(cParams, "max_initial_ts=", ruby_whisper_params_set_max_initial_ts, 1);
|
1857
|
-
rb_define_method(cParams, "length_penalty", ruby_whisper_params_get_length_penalty, 0);
|
1858
|
-
rb_define_method(cParams, "length_penalty=", ruby_whisper_params_set_length_penalty, 1);
|
1859
|
-
rb_define_method(cParams, "temperature_inc", ruby_whisper_params_get_temperature_inc, 0);
|
1860
|
-
rb_define_method(cParams, "temperature_inc=", ruby_whisper_params_set_temperature_inc, 1);
|
1861
|
-
rb_define_method(cParams, "entropy_thold", ruby_whisper_params_get_entropy_thold, 0);
|
1862
|
-
rb_define_method(cParams, "entropy_thold=", ruby_whisper_params_set_entropy_thold, 1);
|
1863
|
-
rb_define_method(cParams, "logprob_thold", ruby_whisper_params_get_logprob_thold, 0);
|
1864
|
-
rb_define_method(cParams, "logprob_thold=", ruby_whisper_params_set_logprob_thold, 1);
|
1865
|
-
rb_define_method(cParams, "no_speech_thold", ruby_whisper_params_get_no_speech_thold, 0);
|
1866
|
-
rb_define_method(cParams, "no_speech_thold=", ruby_whisper_params_set_no_speech_thold, 1);
|
1867
|
-
|
1868
|
-
rb_define_method(cParams, "new_segment_callback=", ruby_whisper_params_set_new_segment_callback, 1);
|
1869
|
-
rb_define_method(cParams, "new_segment_callback_user_data=", ruby_whisper_params_set_new_segment_callback_user_data, 1);
|
1870
|
-
rb_define_method(cParams, "progress_callback=", ruby_whisper_params_set_progress_callback, 1);
|
1871
|
-
rb_define_method(cParams, "progress_callback_user_data=", ruby_whisper_params_set_progress_callback_user_data, 1);
|
1872
|
-
rb_define_method(cParams, "abort_callback=", ruby_whisper_params_set_abort_callback, 1);
|
1873
|
-
rb_define_method(cParams, "abort_callback_user_data=", ruby_whisper_params_set_abort_callback_user_data, 1);
|
1874
|
-
|
1875
|
-
rb_define_attr(eError, "code", true, false);
|
1876
|
-
rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1);
|
1877
|
-
|
1878
|
-
// High leve
|
1879
|
-
cSegment = rb_define_class_under(mWhisper, "Segment", rb_cObject);
|
1880
|
-
|
1881
|
-
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
|
1882
|
-
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
|
1883
|
-
rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0);
|
1884
|
-
rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0);
|
1885
|
-
rb_define_method(cParams, "abort_on", ruby_whisper_params_abort_on, 0);
|
1886
|
-
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
|
1887
|
-
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
1888
|
-
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
1889
|
-
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
1890
|
-
|
1891
|
-
cModel = rb_define_class_under(mWhisper, "Model", rb_cObject);
|
1892
|
-
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
1893
|
-
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
1894
|
-
rb_define_method(cModel, "n_vocab", ruby_whisper_c_model_n_vocab, 0);
|
1895
|
-
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_c_model_n_audio_ctx, 0);
|
1896
|
-
rb_define_method(cModel, "n_audio_state", ruby_whisper_c_model_n_audio_state, 0);
|
1897
|
-
rb_define_method(cModel, "n_audio_head", ruby_whisper_c_model_n_audio_head, 0);
|
1898
|
-
rb_define_method(cModel, "n_audio_layer", ruby_whisper_c_model_n_audio_layer, 0);
|
1899
|
-
rb_define_method(cModel, "n_text_ctx", ruby_whisper_c_model_n_text_ctx, 0);
|
1900
|
-
rb_define_method(cModel, "n_text_state", ruby_whisper_c_model_n_text_state, 0);
|
1901
|
-
rb_define_method(cModel, "n_text_head", ruby_whisper_c_model_n_text_head, 0);
|
1902
|
-
rb_define_method(cModel, "n_text_layer", ruby_whisper_c_model_n_text_layer, 0);
|
1903
|
-
rb_define_method(cModel, "n_mels", ruby_whisper_c_model_n_mels, 0);
|
1904
|
-
rb_define_method(cModel, "ftype", ruby_whisper_c_model_ftype, 0);
|
1905
|
-
rb_define_method(cModel, "type", ruby_whisper_c_model_type, 0);
|
1906
|
-
}
|
1907
|
-
#ifdef __cplusplus
|
1908
|
-
}
|
1909
|
-
#endif
|