whispercpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -2,6 +2,7 @@
|
|
2
2
|
#define WHISPER_H
|
3
3
|
|
4
4
|
#include "ggml.h"
|
5
|
+
#include "ggml-cpu.h"
|
5
6
|
|
6
7
|
#include <stddef.h>
|
7
8
|
#include <stdint.h>
|
@@ -99,6 +100,7 @@ extern "C" {
|
|
99
100
|
WHISPER_AHEADS_LARGE_V1,
|
100
101
|
WHISPER_AHEADS_LARGE_V2,
|
101
102
|
WHISPER_AHEADS_LARGE_V3,
|
103
|
+
WHISPER_AHEADS_LARGE_V3_TURBO,
|
102
104
|
};
|
103
105
|
|
104
106
|
typedef struct whisper_ahead {
|
@@ -113,6 +115,7 @@ extern "C" {
|
|
113
115
|
|
114
116
|
struct whisper_context_params {
|
115
117
|
bool use_gpu;
|
118
|
+
bool flash_attn;
|
116
119
|
int gpu_device; // CUDA device
|
117
120
|
|
118
121
|
// [EXPERIMENTAL] Token-level timestamps with DTW
|
@@ -237,6 +240,13 @@ extern "C" {
|
|
237
240
|
// GPU, by caching compiled 'blobs' there.
|
238
241
|
// Set to nullptr if not used.
|
239
242
|
// Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
|
243
|
+
WHISPER_API int whisper_ctx_init_openvino_encoder_with_state(
|
244
|
+
struct whisper_context * ctx,
|
245
|
+
struct whisper_state * state,
|
246
|
+
const char * model_path,
|
247
|
+
const char * device,
|
248
|
+
const char * cache_dir);
|
249
|
+
|
240
250
|
WHISPER_API int whisper_ctx_init_openvino_encoder(
|
241
251
|
struct whisper_context * ctx,
|
242
252
|
const char * model_path,
|
@@ -265,22 +275,6 @@ extern "C" {
|
|
265
275
|
int n_samples,
|
266
276
|
int n_threads);
|
267
277
|
|
268
|
-
// Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
|
269
|
-
// The resulting spectrogram is stored inside the default state of the provided whisper context.
|
270
|
-
// Returns 0 on success
|
271
|
-
WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
|
272
|
-
struct whisper_context * ctx,
|
273
|
-
const float * samples,
|
274
|
-
int n_samples,
|
275
|
-
int n_threads);
|
276
|
-
|
277
|
-
WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
|
278
|
-
struct whisper_context * ctx,
|
279
|
-
struct whisper_state * state,
|
280
|
-
const float * samples,
|
281
|
-
int n_samples,
|
282
|
-
int n_threads);
|
283
|
-
|
284
278
|
// This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
|
285
279
|
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
286
280
|
// n_mel must be 80
|
@@ -350,7 +344,7 @@ extern "C" {
|
|
350
344
|
int whisper_token_count(struct whisper_context * ctx, const char * text);
|
351
345
|
|
352
346
|
// Largest language id (i.e. number of available languages - 1)
|
353
|
-
WHISPER_API int whisper_lang_max_id();
|
347
|
+
WHISPER_API int whisper_lang_max_id(void);
|
354
348
|
|
355
349
|
// Return the id of the specified language, returns -1 if not found
|
356
350
|
// Examples:
|
@@ -430,6 +424,14 @@ extern "C" {
|
|
430
424
|
WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
|
431
425
|
|
432
426
|
// Performance information from the default state.
|
427
|
+
struct whisper_timings {
|
428
|
+
float sample_ms;
|
429
|
+
float encode_ms;
|
430
|
+
float decode_ms;
|
431
|
+
float batchd_ms;
|
432
|
+
float prompt_ms;
|
433
|
+
};
|
434
|
+
WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
|
433
435
|
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
434
436
|
WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
|
435
437
|
|
@@ -498,7 +500,6 @@ extern "C" {
|
|
498
500
|
|
499
501
|
// [EXPERIMENTAL] speed-up techniques
|
500
502
|
// note: these can significantly reduce the quality of the output
|
501
|
-
bool speed_up; // speed-up the audio by 2x using Phase Vocoder
|
502
503
|
bool debug_mode; // enable debug_mode provides extra info (eg. Dump log_mel)
|
503
504
|
int audio_ctx; // overwrite the audio context size (0 = use default)
|
504
505
|
|
@@ -533,7 +534,7 @@ extern "C" {
|
|
533
534
|
float temperature_inc;
|
534
535
|
float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
|
535
536
|
float logprob_thold;
|
536
|
-
float no_speech_thold;
|
537
|
+
float no_speech_thold;
|
537
538
|
|
538
539
|
struct {
|
539
540
|
int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
|
@@ -572,10 +573,10 @@ extern "C" {
|
|
572
573
|
};
|
573
574
|
|
574
575
|
// NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
|
575
|
-
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
|
576
|
-
WHISPER_API struct whisper_context_params
|
576
|
+
WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
|
577
|
+
WHISPER_API struct whisper_context_params whisper_context_default_params (void);
|
577
578
|
WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
|
578
|
-
WHISPER_API struct whisper_full_params
|
579
|
+
WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
|
579
580
|
|
580
581
|
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
581
582
|
// Not thread safe for same context
|
data/ext/metal-embed.mk
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
ggml/src/ggml-metal/ggml-metal-embed.o: \
|
2
|
+
ggml/src/ggml-metal/ggml-metal.metal \
|
3
|
+
ggml/src/ggml-metal/ggml-metal-impl.h \
|
4
|
+
ggml/src/ggml-common.h
|
5
|
+
@echo "Embedding Metal library"
|
6
|
+
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
|
7
|
+
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
|
8
|
+
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
9
|
+
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
10
|
+
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
11
|
+
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
12
|
+
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
13
|
+
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
14
|
+
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
15
|
+
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
|
16
|
+
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
|
17
|
+
@rmdir ${TEMP_ASSEMBLY}
|