whispercpp 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -2,6 +2,7 @@
|
|
2
2
|
#define WHISPER_H
|
3
3
|
|
4
4
|
#include "ggml.h"
|
5
|
+
#include "ggml-cpu.h"
|
5
6
|
|
6
7
|
#include <stddef.h>
|
7
8
|
#include <stdint.h>
|
@@ -99,6 +100,7 @@ extern "C" {
|
|
99
100
|
WHISPER_AHEADS_LARGE_V1,
|
100
101
|
WHISPER_AHEADS_LARGE_V2,
|
101
102
|
WHISPER_AHEADS_LARGE_V3,
|
103
|
+
WHISPER_AHEADS_LARGE_V3_TURBO,
|
102
104
|
};
|
103
105
|
|
104
106
|
typedef struct whisper_ahead {
|
@@ -113,6 +115,7 @@ extern "C" {
|
|
113
115
|
|
114
116
|
struct whisper_context_params {
|
115
117
|
bool use_gpu;
|
118
|
+
bool flash_attn;
|
116
119
|
int gpu_device; // CUDA device
|
117
120
|
|
118
121
|
// [EXPERIMENTAL] Token-level timestamps with DTW
|
@@ -237,6 +240,13 @@ extern "C" {
|
|
237
240
|
// GPU, by caching compiled 'blobs' there.
|
238
241
|
// Set to nullptr if not used.
|
239
242
|
// Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
|
243
|
+
WHISPER_API int whisper_ctx_init_openvino_encoder_with_state(
|
244
|
+
struct whisper_context * ctx,
|
245
|
+
struct whisper_state * state,
|
246
|
+
const char * model_path,
|
247
|
+
const char * device,
|
248
|
+
const char * cache_dir);
|
249
|
+
|
240
250
|
WHISPER_API int whisper_ctx_init_openvino_encoder(
|
241
251
|
struct whisper_context * ctx,
|
242
252
|
const char * model_path,
|
@@ -265,22 +275,6 @@ extern "C" {
|
|
265
275
|
int n_samples,
|
266
276
|
int n_threads);
|
267
277
|
|
268
|
-
// Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
|
269
|
-
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
270
|
-
// Returns 0 on success
|
271
|
-
WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
|
272
|
-
struct whisper_context * ctx,
|
273
|
-
const float * samples,
|
274
|
-
int n_samples,
|
275
|
-
int n_threads);
|
276
|
-
|
277
|
-
WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
|
278
|
-
struct whisper_context * ctx,
|
279
|
-
struct whisper_state * state,
|
280
|
-
const float * samples,
|
281
|
-
int n_samples,
|
282
|
-
int n_threads);
|
283
|
-
|
284
278
|
// This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
|
285
279
|
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
286
280
|
// n_mel must be 80
|
@@ -350,7 +344,7 @@ extern "C" {
|
|
350
344
|
int whisper_token_count(struct whisper_context * ctx, const char * text);
|
351
345
|
|
352
346
|
// Largest language id (i.e. number of available languages - 1)
|
353
|
-
WHISPER_API int whisper_lang_max_id();
|
347
|
+
WHISPER_API int whisper_lang_max_id(void);
|
354
348
|
|
355
349
|
// Return the id of the specified language, returns -1 if not found
|
356
350
|
// Examples:
|
@@ -430,6 +424,14 @@ extern "C" {
|
|
430
424
|
WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
|
431
425
|
|
432
426
|
// Performance information from the default state.
|
427
|
+
struct whisper_timings {
|
428
|
+
float sample_ms;
|
429
|
+
float encode_ms;
|
430
|
+
float decode_ms;
|
431
|
+
float batchd_ms;
|
432
|
+
float prompt_ms;
|
433
|
+
};
|
434
|
+
WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
|
433
435
|
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
434
436
|
WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
|
435
437
|
|
@@ -498,7 +500,6 @@ extern "C" {
|
|
498
500
|
|
499
501
|
// [EXPERIMENTAL] speed-up techniques
|
500
502
|
// note: these can significantly reduce the quality of the output
|
501
|
-
bool speed_up; // speed-up the audio by 2x using Phase Vocoder
|
502
503
|
bool debug_mode; // enable debug_mode provides extra info (eg. Dump log_mel)
|
503
504
|
int audio_ctx; // overwrite the audio context size (0 = use default)
|
504
505
|
|
@@ -533,7 +534,7 @@ extern "C" {
|
|
533
534
|
float temperature_inc;
|
534
535
|
float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
|
535
536
|
float logprob_thold;
|
536
|
-
float no_speech_thold;
|
537
|
+
float no_speech_thold;
|
537
538
|
|
538
539
|
struct {
|
539
540
|
int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
|
@@ -572,10 +573,10 @@ extern "C" {
|
|
572
573
|
};
|
573
574
|
|
574
575
|
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
575
|
-
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
|
576
|
-
WHISPER_API struct whisper_context_params
|
576
|
+
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
|
577
|
+
WHISPER_API struct whisper_context_params whisper_context_default_params (void);
|
577
578
|
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
578
|
-
WHISPER_API struct whisper_full_params
|
579
|
+
WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
|
579
580
|
|
580
581
|
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
581
582
|
// Not thread safe for same context
|
data/ext/metal-embed.mk
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
ggml/src/ggml-metal/ggml-metal-embed.o: \
|
2
|
+
ggml/src/ggml-metal/ggml-metal.metal \
|
3
|
+
ggml/src/ggml-metal/ggml-metal-impl.h \
|
4
|
+
ggml/src/ggml-common.h
|
5
|
+
@echo "Embedding Metal library"
|
6
|
+
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
|
7
|
+
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
|
8
|
+
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
9
|
+
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
10
|
+
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
11
|
+
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
12
|
+
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
13
|
+
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
14
|
+
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
15
|
+
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
|
16
|
+
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
|
17
|
+
@rmdir ${TEMP_ASSEMBLY}
|