whispercpp 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -3
- data/README.md +71 -14
- data/Rakefile +20 -7
- data/ext/.gitignore +4 -6
- data/ext/dependencies.rb +36 -24
- data/ext/extconf.rb +1 -1
- data/ext/options.rb +48 -184
- data/ext/ruby_whisper.c +18 -0
- data/ext/ruby_whisper_context.c +43 -12
- data/ext/ruby_whisper_model.c +1 -1
- data/ext/ruby_whisper_params.c +4 -2
- data/ext/ruby_whisper_segment.c +81 -4
- data/ext/ruby_whisper_transcribe.cpp +13 -7
- data/ext/ruby_whisper_vad_params.c +1 -1
- data/ext/sources/CMakeLists.txt +5 -1
- data/ext/sources/bindings/javascript/package.json +1 -1
- data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
- data/ext/sources/examples/addon.node/addon.cpp +150 -31
- data/ext/sources/examples/addon.node/index.js +3 -0
- data/ext/sources/examples/addon.node/vad-example.js +132 -0
- data/ext/sources/examples/bench/bench.cpp +3 -2
- data/ext/sources/examples/cli/cli.cpp +3 -2
- data/ext/sources/examples/command/command.cpp +32 -8
- data/ext/sources/examples/common-whisper.cpp +14 -7
- data/ext/sources/examples/lsp/lsp.cpp +2 -0
- data/ext/sources/examples/quantize/quantize.cpp +3 -0
- data/ext/sources/examples/server/CMakeLists.txt +3 -0
- data/ext/sources/examples/server/server.cpp +169 -22
- data/ext/sources/examples/stream/stream.cpp +6 -0
- data/ext/sources/examples/talk-llama/CMakeLists.txt +4 -1
- data/ext/sources/examples/talk-llama/llama-arch.cpp +171 -3
- data/ext/sources/examples/talk-llama/llama-arch.h +28 -1
- data/ext/sources/examples/talk-llama/llama-batch.cpp +741 -272
- data/ext/sources/examples/talk-llama/llama-batch.h +112 -54
- data/ext/sources/examples/talk-llama/llama-chat.cpp +30 -8
- data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
- data/ext/sources/examples/talk-llama/llama-context.cpp +520 -351
- data/ext/sources/examples/talk-llama/llama-context.h +38 -17
- data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-cparams.h +1 -1
- data/ext/sources/examples/talk-llama/llama-graph.cpp +447 -372
- data/ext/sources/examples/talk-llama/llama-graph.h +128 -58
- data/ext/sources/examples/talk-llama/llama-hparams.cpp +10 -2
- data/ext/sources/examples/talk-llama/llama-hparams.h +19 -2
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
- data/ext/sources/examples/talk-llama/llama-kv-cache.h +14 -472
- data/ext/sources/examples/talk-llama/llama-kv-cells.h +86 -26
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
- data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
- data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
- data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
- data/ext/sources/examples/talk-llama/llama-memory.h +88 -4
- data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
- data/ext/sources/examples/talk-llama/llama-model-loader.cpp +42 -17
- data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
- data/ext/sources/examples/talk-llama/llama-model.cpp +1863 -563
- data/ext/sources/examples/talk-llama/llama-model.h +27 -0
- data/ext/sources/examples/talk-llama/llama-quant.cpp +89 -6
- data/ext/sources/examples/talk-llama/llama-vocab.cpp +65 -28
- data/ext/sources/examples/talk-llama/llama-vocab.h +1 -0
- data/ext/sources/examples/talk-llama/llama.cpp +11 -7
- data/ext/sources/examples/talk-llama/llama.h +147 -40
- data/ext/sources/examples/talk-llama/talk-llama.cpp +2 -0
- data/ext/sources/examples/talk-llama/unicode.cpp +5 -0
- data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
- data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +2 -0
- data/ext/sources/ggml/CMakeLists.txt +48 -3
- data/ext/sources/ggml/cmake/common.cmake +24 -0
- data/ext/sources/ggml/include/ggml-backend.h +1 -1
- data/ext/sources/ggml/include/ggml-cpu.h +2 -0
- data/ext/sources/ggml/include/ggml.h +144 -5
- data/ext/sources/ggml/src/CMakeLists.txt +82 -24
- data/ext/sources/ggml/src/ggml-backend-reg.cpp +5 -0
- data/ext/sources/ggml/src/ggml-backend.cpp +46 -23
- data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- data/ext/sources/ggml/src/ggml-cann/common.h +6 -1
- data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- data/ext/sources/ggml/src/ggml-common.h +4 -0
- data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +133 -40
- data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- data/ext/sources/ggml/src/ggml-cpu/common.h +4 -3
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +146 -105
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1057 -174
- data/ext/sources/ggml/src/ggml-cpu/ops.h +8 -0
- data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
- data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
- data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +330 -38
- data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- data/ext/sources/ggml/src/ggml-cpu/vec.cpp +111 -18
- data/ext/sources/ggml/src/ggml-cpu/vec.h +303 -94
- data/ext/sources/ggml/src/ggml-cuda/common.cuh +60 -37
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cu +22 -0
- data/ext/sources/ggml/src/ggml-cuda/convert.cuh +5 -0
- data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +2 -2
- data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
- data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +265 -123
- data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
- data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
- data/ext/sources/ggml/src/ggml-cuda/mmv.cu +257 -87
- data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +2 -3
- data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +5 -18
- data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- data/ext/sources/ggml/src/ggml-cuda/unary.cu +89 -0
- data/ext/sources/ggml/src/ggml-cuda/unary.cuh +7 -0
- data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- data/ext/sources/ggml/src/ggml-impl.h +127 -183
- data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +27 -0
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +331 -49
- data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +564 -282
- data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
- data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +14 -0
- data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1859 -489
- data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- data/ext/sources/ggml/src/ggml-quants.c +6 -8
- data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
- data/ext/sources/ggml/src/ggml-sycl/concat.cpp +28 -41
- data/ext/sources/ggml/src/ggml-sycl/conv.cpp +4 -10
- data/ext/sources/ggml/src/ggml-sycl/convert.cpp +117 -165
- data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +192 -53
- data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +648 -1039
- data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +8 -105
- data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -100
- data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
- data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
- data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +60 -80
- data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- data/ext/sources/ggml/src/ggml-sycl/norm.cpp +55 -74
- data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -10
- data/ext/sources/ggml/src/ggml-sycl/rope.cpp +138 -27
- data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +12 -16
- data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
- data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +726 -282
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +10 -1
- data/ext/sources/ggml/src/ggml.c +328 -48
- data/ext/sources/ggml/src/ggml.cpp +26 -0
- data/ext/sources/ggml/src/gguf.cpp +24 -3
- data/ext/sources/include/whisper.h +2 -0
- data/ext/sources/src/CMakeLists.txt +2 -0
- data/ext/sources/src/coreml/whisper-compat.h +10 -0
- data/ext/sources/src/coreml/whisper-compat.m +35 -0
- data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
- data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
- data/ext/sources/src/whisper.cpp +218 -169
- data/extsources.rb +15 -9
- data/lib/whisper/context.rb +15 -0
- data/lib/whisper/model/uri.rb +56 -1
- data/lib/whisper/segment.rb +58 -0
- data/sig/whisper.rbs +68 -38
- data/{tests → test}/helper.rb +1 -12
- data/{tests → test}/test_model.rb +9 -0
- data/test/test_package.rb +51 -0
- data/test/test_segment.rb +146 -0
- data/{tests → test}/test_whisper.rb +70 -0
- data/whispercpp.gemspec +2 -3
- metadata +91 -43
- data/ext/sources/.dockerignore +0 -3
- data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
- data/ext/sources/ci/run.sh +0 -336
- data/ext/sources/close-issue.yml +0 -28
- data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2739
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
- data/tests/test_package.rb +0 -46
- data/tests/test_segment.rb +0 -74
- /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /data/{tests → test}/jfk_reader/.gitignore +0 -0
- /data/{tests → test}/jfk_reader/extconf.rb +0 -0
- /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
- /data/{tests → test}/test_callback.rb +0 -0
- /data/{tests → test}/test_error.rb +0 -0
- /data/{tests → test}/test_params.rb +0 -0
- /data/{tests → test}/test_vad.rb +0 -0
- /data/{tests → test}/test_vad_params.rb +0 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
module Whisper
|
2
|
+
class Context
|
3
|
+
def to_srt
|
4
|
+
each_segment.with_index.reduce("") {|srt, (segment, index)|
|
5
|
+
srt << "#{index + 1}\n#{segment.to_srt_cue}\n"
|
6
|
+
}
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_webvtt
|
10
|
+
each_segment.with_index.reduce("WEBVTT\n\n") {|webvtt, (segment, index)|
|
11
|
+
webvtt << "#{index + 1}\n#{segment.to_webvtt_cue}\n"
|
12
|
+
}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/whisper/model/uri.rb
CHANGED
@@ -130,6 +130,44 @@ module Whisper
|
|
130
130
|
end
|
131
131
|
end
|
132
132
|
|
133
|
+
class ZipURI < URI
|
134
|
+
def cache
|
135
|
+
zip_path = super
|
136
|
+
dest = unzipped_path
|
137
|
+
return if dest.exist? && dest.mtime >= zip_path.mtime
|
138
|
+
escaping dest do
|
139
|
+
system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
|
140
|
+
end
|
141
|
+
zip_path
|
142
|
+
end
|
143
|
+
|
144
|
+
def clear_cache
|
145
|
+
super
|
146
|
+
unzipped_path.rmtree if unzipped_path.exist?
|
147
|
+
end
|
148
|
+
|
149
|
+
private
|
150
|
+
|
151
|
+
def unzipped_path
|
152
|
+
cache_path.sub_ext("")
|
153
|
+
end
|
154
|
+
|
155
|
+
def escaping(path)
|
156
|
+
escaped = Pathname("#{path}.removing")
|
157
|
+
if path.exist?
|
158
|
+
escaped.rmtree if escaped.exist?
|
159
|
+
path.rename escaped
|
160
|
+
end
|
161
|
+
yield
|
162
|
+
ensure
|
163
|
+
if path.exist?
|
164
|
+
escaped.rmtree if escaped.exist?
|
165
|
+
else
|
166
|
+
escaped.rename path if escaped.exist?
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
133
171
|
@pre_converted_models = %w[
|
134
172
|
tiny
|
135
173
|
tiny.en
|
@@ -171,8 +209,25 @@ module Whisper
|
|
171
209
|
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
|
172
210
|
end
|
173
211
|
|
212
|
+
@coreml_compiled_models = %w[
|
213
|
+
tiny
|
214
|
+
tiny.en
|
215
|
+
base
|
216
|
+
base.en
|
217
|
+
small
|
218
|
+
small.en
|
219
|
+
medium
|
220
|
+
medium.en
|
221
|
+
large-v1
|
222
|
+
large-v2
|
223
|
+
large-v3
|
224
|
+
large-v3-turbo
|
225
|
+
].each_with_object({}) do |name, models|
|
226
|
+
models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
|
227
|
+
end
|
228
|
+
|
174
229
|
class << self
|
175
|
-
attr_reader :pre_converted_models
|
230
|
+
attr_reader :pre_converted_models, :coreml_compiled_models
|
176
231
|
end
|
177
232
|
end
|
178
233
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Whisper
|
2
|
+
class Segment
|
3
|
+
SRT_ESCAPES = {
|
4
|
+
"&" => "&",
|
5
|
+
"<" => "<",
|
6
|
+
">" => ">",
|
7
|
+
}
|
8
|
+
SRT_ESCAPES_RE = Regexp.union(SRT_ESCAPES.keys)
|
9
|
+
private_constant :SRT_ESCAPES, :SRT_ESCAPES_RE
|
10
|
+
|
11
|
+
def to_srt_cue
|
12
|
+
"#{srt_start_time} --> #{srt_end_time}\n#{srt_text}\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_webvtt_cue
|
16
|
+
"#{webvtt_start_time} --> #{webvtt_end_time}\n#{webvtt_text}\n"
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def time_to_a(time)
|
22
|
+
sec, decimal_part = time.divmod(1000)
|
23
|
+
min, sec = sec.divmod(60)
|
24
|
+
hour, min = min.divmod(60)
|
25
|
+
[hour, min, sec, decimal_part]
|
26
|
+
end
|
27
|
+
|
28
|
+
def srt_time(time)
|
29
|
+
"%02d:%02d:%02d,%03d" % time_to_a(time)
|
30
|
+
end
|
31
|
+
|
32
|
+
def srt_start_time
|
33
|
+
srt_time(start_time)
|
34
|
+
end
|
35
|
+
|
36
|
+
def srt_end_time
|
37
|
+
srt_time(end_time)
|
38
|
+
end
|
39
|
+
|
40
|
+
def srt_text
|
41
|
+
text.gsub(SRT_ESCAPES_RE, SRT_ESCAPES)
|
42
|
+
end
|
43
|
+
|
44
|
+
def webvtt_time(time)
|
45
|
+
"%02d:%02d:%02d.%03d" % time_to_a(time)
|
46
|
+
end
|
47
|
+
|
48
|
+
def webvtt_start_time
|
49
|
+
webvtt_time(start_time)
|
50
|
+
end
|
51
|
+
|
52
|
+
def webvtt_end_time
|
53
|
+
webvtt_time(end_time)
|
54
|
+
end
|
55
|
+
|
56
|
+
alias webvtt_text srt_text
|
57
|
+
end
|
58
|
+
end
|
data/sig/whisper.rbs
CHANGED
@@ -10,6 +10,7 @@ module Whisper
|
|
10
10
|
type encoder_begin_callback = ^(Whisper::Context, void, Object user_data) -> void
|
11
11
|
type abort_callback = ^(Whisper::Context, void, Object user_data) -> boolish
|
12
12
|
|
13
|
+
VERSION: String
|
13
14
|
LOG_LEVEL_NONE: Integer
|
14
15
|
LOG_LEVEL_INFO: Integer
|
15
16
|
LOG_LEVEL_WARN: Integer
|
@@ -22,21 +23,22 @@ module Whisper
|
|
22
23
|
def self.lang_str: (Integer id) -> String
|
23
24
|
def self.lang_str_full: (Integer id) -> String
|
24
25
|
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
26
|
+
def self.system_info_str: () -> String
|
25
27
|
|
26
28
|
class Context
|
27
|
-
def self.new: (path | ::URI::HTTP) -> instance
|
29
|
+
def self.new: (String | path | ::URI::HTTP) -> instance
|
28
30
|
|
29
31
|
# transcribe a single file
|
30
32
|
# can emit to a block results
|
31
33
|
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
34
|
+
# params = Whisper::Params.new
|
35
|
+
# params.duration = 60_000
|
36
|
+
# whisper.transcribe "path/to/audio.wav", params do |text|
|
37
|
+
# puts text
|
38
|
+
# end
|
37
39
|
#
|
38
|
-
def transcribe: (string, Params) -> self
|
39
|
-
| (string, Params) { (String) -> void } -> self
|
40
|
+
def transcribe: (string, Params, ?n_processors: Integer) -> self
|
41
|
+
| (string, Params, ?n_processors: Integer) { (String) -> void } -> self
|
40
42
|
|
41
43
|
def model_n_vocab: () -> Integer
|
42
44
|
def model_n_audio_ctx: () -> Integer
|
@@ -49,16 +51,16 @@ module Whisper
|
|
49
51
|
|
50
52
|
# Yields each Whisper::Segment:
|
51
53
|
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
54
|
+
# whisper.transcribe("path/to/audio.wav", params)
|
55
|
+
# whisper.each_segment do |segment|
|
56
|
+
# puts segment.text
|
57
|
+
# end
|
56
58
|
#
|
57
59
|
# Returns an Enumerator if no block given:
|
58
60
|
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
61
|
+
# whisper.transcribe("path/to/audio.wav", params)
|
62
|
+
# enum = whisper.each_segment
|
63
|
+
# enum.to_a # => [#<Whisper::Segment>, ...]
|
62
64
|
#
|
63
65
|
def each_segment: { (Segment) -> void } -> void
|
64
66
|
| () -> Enumerator[Segment]
|
@@ -73,25 +75,25 @@ module Whisper
|
|
73
75
|
|
74
76
|
# Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
75
77
|
#
|
76
|
-
#
|
78
|
+
# full_get_segment_t0(3) # => 1668 (16680 ms)
|
77
79
|
#
|
78
80
|
def full_get_segment_t0: (Integer) -> Integer
|
79
81
|
|
80
82
|
# End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
81
83
|
#
|
82
|
-
#
|
84
|
+
# full_get_segment_t1(3) # => 1668 (16680 ms)
|
83
85
|
#
|
84
86
|
def full_get_segment_t1: (Integer) -> Integer
|
85
87
|
|
86
88
|
# Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
87
89
|
#
|
88
|
-
#
|
90
|
+
# full_get_segment_speacker_turn_next(3) # => true
|
89
91
|
#
|
90
92
|
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
|
91
93
|
|
92
94
|
# Text of a segment indexed by +segment_index+.
|
93
95
|
#
|
94
|
-
#
|
96
|
+
# full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
95
97
|
#
|
96
98
|
def full_get_segment_text: (Integer) -> String
|
97
99
|
|
@@ -115,6 +117,9 @@ module Whisper
|
|
115
117
|
def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
|
116
118
|
| (Params, _Samples, ?Integer n_samples) -> self
|
117
119
|
| (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
|
120
|
+
|
121
|
+
def to_srt: () -> String
|
122
|
+
def to_webvtt: () -> String
|
118
123
|
end
|
119
124
|
|
120
125
|
class Params
|
@@ -281,9 +286,9 @@ module Whisper
|
|
281
286
|
|
282
287
|
# Sets new segment callback, called for every newly generated text segment.
|
283
288
|
#
|
284
|
-
#
|
285
|
-
#
|
286
|
-
#
|
289
|
+
# params.new_segment_callback = ->(context, _, n_new, user_data) {
|
290
|
+
# # ...
|
291
|
+
# }
|
287
292
|
#
|
288
293
|
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
|
289
294
|
def new_segment_callback: () -> (new_segment_callback | nil)
|
@@ -296,9 +301,9 @@ module Whisper
|
|
296
301
|
|
297
302
|
# Sets progress callback, called on each progress update.
|
298
303
|
#
|
299
|
-
#
|
300
|
-
#
|
301
|
-
#
|
304
|
+
# params.new_segment_callback = ->(context, _, progress, user_data) {
|
305
|
+
# # ...
|
306
|
+
# }
|
302
307
|
#
|
303
308
|
# +progress+ is an Integer between 0 and 100.
|
304
309
|
#
|
@@ -326,9 +331,9 @@ module Whisper
|
|
326
331
|
|
327
332
|
# Sets abort callback, called to check if the process should be aborted.
|
328
333
|
#
|
329
|
-
#
|
330
|
-
#
|
331
|
-
#
|
334
|
+
# params.abort_callback = ->(user_data) {
|
335
|
+
# # ...
|
336
|
+
# }
|
332
337
|
#
|
333
338
|
#
|
334
339
|
def abort_callback=: (abort_callback) -> abort_callback
|
@@ -357,9 +362,9 @@ module Whisper
|
|
357
362
|
|
358
363
|
# Hook called on new segment. Yields each Whisper::Segment.
|
359
364
|
#
|
360
|
-
#
|
361
|
-
#
|
362
|
-
#
|
365
|
+
# whisper.on_new_segment do |segment|
|
366
|
+
# # ...
|
367
|
+
# end
|
363
368
|
#
|
364
369
|
def on_new_segment: { (Segment) -> void } -> void
|
365
370
|
|
@@ -373,19 +378,20 @@ module Whisper
|
|
373
378
|
|
374
379
|
# Call block to determine whether abort or not. Return +true+ when you want to abort.
|
375
380
|
#
|
376
|
-
#
|
377
|
-
#
|
378
|
-
#
|
379
|
-
#
|
380
|
-
#
|
381
|
+
# params.abort_on do
|
382
|
+
# if some_condition
|
383
|
+
# true # abort
|
384
|
+
# else
|
385
|
+
# false # continue
|
386
|
+
# end
|
381
387
|
# end
|
382
|
-
# end
|
383
388
|
#
|
384
389
|
def abort_on: { (Object user_data) -> boolish } -> void
|
385
390
|
end
|
386
391
|
|
387
392
|
class Model
|
388
393
|
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
394
|
+
def self.coreml_compiled_models: () -> Hash[Model::URI, Model::ZipURI]
|
389
395
|
def self.new: () -> instance
|
390
396
|
def n_vocab: () -> Integer
|
391
397
|
def n_audio_ctx: () -> Integer
|
@@ -405,9 +411,22 @@ module Whisper
|
|
405
411
|
def to_path: -> String
|
406
412
|
def clear_cache: -> void
|
407
413
|
end
|
414
|
+
|
415
|
+
class ZipURI < URI
|
416
|
+
def cache: () -> Pathname
|
417
|
+
def clear_cache: () -> void
|
418
|
+
end
|
408
419
|
end
|
409
420
|
|
410
421
|
class Segment
|
422
|
+
type deconstructed_keys = {
|
423
|
+
start_time: (Integer | nil),
|
424
|
+
end_time: (Integer | nil),
|
425
|
+
text: (String | nil),
|
426
|
+
no_speech_prob: (Float | nil),
|
427
|
+
speaker_turn_next: (true | false | nil)
|
428
|
+
}
|
429
|
+
|
411
430
|
# Start time in milliseconds.
|
412
431
|
#
|
413
432
|
def start_time: () -> Integer
|
@@ -417,10 +436,21 @@ module Whisper
|
|
417
436
|
def end_time: () -> Integer
|
418
437
|
|
419
438
|
# Whether the next segment is predicted as a speaker turn.
|
420
|
-
def
|
439
|
+
def speaker_turn_next?: () -> (true | false)
|
421
440
|
|
422
441
|
def text: () -> String
|
423
442
|
def no_speech_prob: () -> Float
|
443
|
+
def to_srt_cue: () -> String
|
444
|
+
def to_webvtt_cue: () -> String
|
445
|
+
|
446
|
+
# Possible keys: :start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next
|
447
|
+
#
|
448
|
+
# whisper.each_segment do |segment|
|
449
|
+
# segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:}
|
450
|
+
#
|
451
|
+
# puts "[#{start_time} --> #{end_time}] #{text} (no speech prob: #{no_speech_prob}#{speaker_turn_next ? ', speaker turns next' : ''})"
|
452
|
+
# end
|
453
|
+
def deconstruct_keys: (Array[:start_time | :end_time | :text | :no_speech_prob | :speaker_turn_next] | nil) -> deconstructed_keys
|
424
454
|
end
|
425
455
|
|
426
456
|
module VAD
|
data/{tests → test}/helper.rb
RENAMED
@@ -3,7 +3,7 @@ require "whisper"
|
|
3
3
|
require_relative "jfk_reader/jfk_reader"
|
4
4
|
|
5
5
|
class TestBase < Test::Unit::TestCase
|
6
|
-
AUDIO = File.join(__dir__, "
|
6
|
+
AUDIO = File.join(__dir__, "fixtures", "jfk.wav")
|
7
7
|
|
8
8
|
class << self
|
9
9
|
def whisper
|
@@ -21,15 +21,4 @@ class TestBase < Test::Unit::TestCase
|
|
21
21
|
def whisper
|
22
22
|
self.class.whisper
|
23
23
|
end
|
24
|
-
|
25
|
-
module BuildOptions
|
26
|
-
load "ext/options.rb", self
|
27
|
-
Options.include self
|
28
|
-
|
29
|
-
def enable_config(name)
|
30
|
-
end
|
31
|
-
|
32
|
-
def arg_config(name)
|
33
|
-
end
|
34
|
-
end
|
35
24
|
end
|
@@ -106,4 +106,13 @@ class TestModel < TestBase
|
|
106
106
|
assert_equal 1, model.ftype
|
107
107
|
assert_equal "base", model.type
|
108
108
|
end
|
109
|
+
|
110
|
+
def test_coreml_model_auto_download
|
111
|
+
uri = Whisper::Model.coreml_compiled_models[Whisper::Model.pre_converted_models["tiny"]]
|
112
|
+
model_path = Pathname(uri.to_path).sub_ext("")
|
113
|
+
model_path.rmtree if model_path.exist?
|
114
|
+
|
115
|
+
uri.cache
|
116
|
+
assert_path_exist model_path
|
117
|
+
end
|
109
118
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require_relative "helper"
|
2
|
+
require 'tempfile'
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'shellwords'
|
5
|
+
|
6
|
+
class TestPackage < TestBase
|
7
|
+
def test_build
|
8
|
+
Tempfile.create do |file|
|
9
|
+
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
10
|
+
assert file.size > 0
|
11
|
+
assert_path_exist file.to_path
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
sub_test_case "Building binary on installation" do
|
16
|
+
def setup
|
17
|
+
system "rake", "build", exception: true
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_install
|
21
|
+
gemspec = Gem::Specification.load("whispercpp.gemspec")
|
22
|
+
Dir.mktmpdir do |dir|
|
23
|
+
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", exception: true
|
24
|
+
assert_installed dir, gemspec.version
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_install_with_coreml
|
29
|
+
omit_unless RUBY_PLATFORM.match?(/darwin/) do
|
30
|
+
gemspec = Gem::Specification.load("whispercpp.gemspec")
|
31
|
+
Dir.mktmpdir do |dir|
|
32
|
+
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", "--", "--enable-whisper-coreml", exception: true
|
33
|
+
assert_installed dir, gemspec.version
|
34
|
+
libdir = File.join(dir, "gems", "#{gemspec.name}-#{gemspec.version}", "lib")
|
35
|
+
assert_nothing_raised do
|
36
|
+
system "ruby", "-I", libdir, "-r", "whisper", "-e", "Whisper::Context.new('tiny')", exception: true
|
37
|
+
end
|
38
|
+
assert_match(/COREML = 1/, `ruby -I #{libdir.shellescape} -r whisper -e 'puts Whisper.system_info_str'`)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def assert_installed(dir, version)
|
46
|
+
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", "whisper.#{RbConfig::CONFIG["DLEXT"]}")
|
47
|
+
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE")
|
48
|
+
assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require_relative "helper"
|
2
|
+
|
3
|
+
class TestSegment < TestBase
|
4
|
+
def test_iteration
|
5
|
+
whisper.each_segment do |segment|
|
6
|
+
assert_instance_of Whisper::Segment, segment
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_enumerator
|
11
|
+
enum = whisper.each_segment
|
12
|
+
assert_instance_of Enumerator, enum
|
13
|
+
enum.to_a.each_with_index do |segment, index|
|
14
|
+
assert_instance_of Whisper::Segment, segment
|
15
|
+
assert_kind_of Integer, index
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_start_time
|
20
|
+
i = 0
|
21
|
+
whisper.each_segment do |segment|
|
22
|
+
assert_equal 0, segment.start_time if i == 0
|
23
|
+
i += 1
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_end_time
|
28
|
+
i = 0
|
29
|
+
whisper.each_segment do |segment|
|
30
|
+
assert_equal whisper.full_get_segment_t1(i) * 10, segment.end_time
|
31
|
+
i += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_no_speech_prob
|
36
|
+
no_speech_prob = nil
|
37
|
+
whisper.each_segment do |segment|
|
38
|
+
no_speech_prob = segment.no_speech_prob
|
39
|
+
end
|
40
|
+
assert no_speech_prob > 0.0
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_on_new_segment
|
44
|
+
params = Whisper::Params.new
|
45
|
+
seg = nil
|
46
|
+
index = 0
|
47
|
+
params.on_new_segment do |segment|
|
48
|
+
assert_instance_of Whisper::Segment, segment
|
49
|
+
if index == 0
|
50
|
+
seg = segment
|
51
|
+
assert_equal 0, segment.start_time
|
52
|
+
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
53
|
+
end
|
54
|
+
index += 1
|
55
|
+
end
|
56
|
+
whisper.transcribe(AUDIO, params)
|
57
|
+
assert_equal 0, seg.start_time
|
58
|
+
assert_match(/ask not what your country can do for you, ask what you can do for your country/, seg.text)
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_on_new_segment_twice
|
62
|
+
params = Whisper::Params.new
|
63
|
+
seg = nil
|
64
|
+
params.on_new_segment do |segment|
|
65
|
+
seg = segment
|
66
|
+
return
|
67
|
+
end
|
68
|
+
params.on_new_segment do |segment|
|
69
|
+
assert_same seg, segment
|
70
|
+
return
|
71
|
+
end
|
72
|
+
whisper.transcribe(AUDIO, params)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_transcription_after_segment_retrieved
|
76
|
+
params = Whisper::Params.new
|
77
|
+
segment = whisper.each_segment.first
|
78
|
+
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
79
|
+
|
80
|
+
whisper.transcribe(AUDIO, Whisper::Params.new(offset: 5000))
|
81
|
+
assert_not_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
82
|
+
assert_match(/what you can do for your country/i, segment.text)
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_pattern_matching
|
86
|
+
segment = whisper.each_segment.first
|
87
|
+
segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:}
|
88
|
+
|
89
|
+
assert_equal segment.start_time, start_time
|
90
|
+
assert_equal segment.end_time, end_time
|
91
|
+
assert_equal segment.text, text
|
92
|
+
assert_equal segment.no_speech_prob, no_speech_prob
|
93
|
+
assert_equal segment.speaker_turn_next?, speaker_turn_next
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_pattern_matching_partial
|
97
|
+
segment = whisper.each_segment.first
|
98
|
+
segment => {start_time:, end_time:, text:}
|
99
|
+
|
100
|
+
assert_equal segment.start_time, start_time
|
101
|
+
assert_equal segment.end_time, end_time
|
102
|
+
assert_equal segment.text, text
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_deconstruct_keys
|
106
|
+
segment = whisper.each_segment.first
|
107
|
+
expected = {
|
108
|
+
start_time: segment.start_time,
|
109
|
+
end_time: segment.end_time,
|
110
|
+
text: segment.text,
|
111
|
+
no_speech_prob: segment.no_speech_prob,
|
112
|
+
speaker_turn_next: segment.speaker_turn_next?
|
113
|
+
}
|
114
|
+
assert_equal expected, segment.deconstruct_keys([:start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next])
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_deconstruct_keys_non_existent
|
118
|
+
omit "Undefined behavior"
|
119
|
+
|
120
|
+
segment = whisper.each_segment.first
|
121
|
+
|
122
|
+
assert_equal({}, segment.deconstruct_keys([:non_existent]))
|
123
|
+
end
|
124
|
+
|
125
|
+
def test_deconstruct_keys_too_many_keys
|
126
|
+
omit "Undefined behavior"
|
127
|
+
|
128
|
+
segment = whisper.each_segment.first
|
129
|
+
|
130
|
+
assert_equal({}, segment.deconstruct_keys([:start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next, :extra_key]))
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_deconstruct_keys_includes_non_existent_keys_not_too_many
|
134
|
+
omit "Undefined behavior"
|
135
|
+
|
136
|
+
segment = whisper.each_segment.first
|
137
|
+
|
138
|
+
expected = {
|
139
|
+
start_time: segment.start_time,
|
140
|
+
end_time: segment.end_time,
|
141
|
+
text: segment.text,
|
142
|
+
no_speech_prob: segment.no_speech_prob
|
143
|
+
}
|
144
|
+
assert_equal(expected, segment.deconstruct_keys([:start_time, :end_time, :text, :no_speech_prob, :non_existent]))
|
145
|
+
end
|
146
|
+
end
|
@@ -20,6 +20,24 @@ class TestWhisper < TestBase
|
|
20
20
|
}
|
21
21
|
end
|
22
22
|
|
23
|
+
def test_transcribe_non_parallel
|
24
|
+
@whisper = Whisper::Context.new("base.en")
|
25
|
+
params = Whisper::Params.new
|
26
|
+
|
27
|
+
@whisper.transcribe(AUDIO, params, n_processors: 1) {|text|
|
28
|
+
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_transcribe_n_processors
|
33
|
+
@whisper = Whisper::Context.new("base.en")
|
34
|
+
params = Whisper::Params.new
|
35
|
+
|
36
|
+
@whisper.transcribe(AUDIO, params, n_processors: 4) {|text|
|
37
|
+
assert_match(/ask not what your country can do for you[,.] ask what you can do for your country/i, text)
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
23
41
|
sub_test_case "After transcription" do
|
24
42
|
def test_full_n_segments
|
25
43
|
assert_equal 1, whisper.full_n_segments
|
@@ -94,6 +112,14 @@ class TestWhisper < TestBase
|
|
94
112
|
end
|
95
113
|
end
|
96
114
|
|
115
|
+
def test_system_info_str
|
116
|
+
assert_match(/\AWHISPER : COREML = \d | OPENVINO = \d |/, Whisper.system_info_str)
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_version
|
120
|
+
assert_kind_of String, Whisper::VERSION
|
121
|
+
end
|
122
|
+
|
97
123
|
def test_log_set
|
98
124
|
user_data = Object.new
|
99
125
|
logs = []
|
@@ -223,4 +249,48 @@ class TestWhisper < TestBase
|
|
223
249
|
assert_match(/for your country/i, text)
|
224
250
|
end
|
225
251
|
end
|
252
|
+
|
253
|
+
def test_to_srt
|
254
|
+
whisper = Whisper::Context.new("base.en")
|
255
|
+
whisper.transcribe AUDIO, @params
|
256
|
+
|
257
|
+
lines = whisper.to_srt.lines
|
258
|
+
assert_match(/\A\d+\n/, lines[0])
|
259
|
+
assert_match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n/, lines[1])
|
260
|
+
assert_match(/ask not what your country can do for you, ask what you can do for your country/, lines[2])
|
261
|
+
end
|
262
|
+
|
263
|
+
def test_to_webvtt
|
264
|
+
whisper = Whisper::Context.new("base.en")
|
265
|
+
whisper.transcribe AUDIO, @params
|
266
|
+
|
267
|
+
lines = whisper.to_webvtt.lines
|
268
|
+
assert_equal "WEBVTT\n", lines[0]
|
269
|
+
assert_equal "\n", lines[1]
|
270
|
+
assert_match(/\A\d+\n/, lines[2])
|
271
|
+
assert_match(/\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n/, lines[3])
|
272
|
+
assert_match(/ask not what your country can do for you, ask what you can do for your country/, lines[4])
|
273
|
+
end
|
274
|
+
|
275
|
+
sub_test_case "Format needs escape" do
|
276
|
+
def setup
|
277
|
+
@whisper = Whisper::Context.new("base.en")
|
278
|
+
@whisper.transcribe AUDIO, Whisper::Params.new
|
279
|
+
segment = @whisper.each_segment.first
|
280
|
+
segment.define_singleton_method :text do
|
281
|
+
"& so my fellow Americans --> ask not what your country can do for you <-- ask what you can do for your country."
|
282
|
+
end
|
283
|
+
@whisper.define_singleton_method :each_segment do
|
284
|
+
Enumerator.new(3) {|yielder| 3.times {yielder << segment}}
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def test_to_srt_escape
|
289
|
+
assert_equal "& so my fellow Americans --> ask not what your country can do for you <-- ask what you can do for your country.\n", @whisper.to_srt.lines[2]
|
290
|
+
end
|
291
|
+
|
292
|
+
def test_to_webvtt_escape
|
293
|
+
assert_equal "& so my fellow Americans --> ask not what your country can do for you <-- ask what you can do for your country.\n", @whisper.to_webvtt.lines[4]
|
294
|
+
end
|
295
|
+
end
|
226
296
|
end
|