whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -2,6 +2,7 @@
2
2
  #define WHISPER_H
3
3
 
4
4
  #include "ggml.h"
5
+ #include "ggml-cpu.h"
5
6
 
6
7
  #include <stddef.h>
7
8
  #include <stdint.h>
@@ -99,6 +100,7 @@ extern "C" {
99
100
  WHISPER_AHEADS_LARGE_V1,
100
101
  WHISPER_AHEADS_LARGE_V2,
101
102
  WHISPER_AHEADS_LARGE_V3,
103
+ WHISPER_AHEADS_LARGE_V3_TURBO,
102
104
  };
103
105
 
104
106
  typedef struct whisper_ahead {
@@ -113,6 +115,7 @@ extern "C" {
113
115
 
114
116
  struct whisper_context_params {
115
117
  bool use_gpu;
118
+ bool flash_attn;
116
119
  int gpu_device; // CUDA device
117
120
 
118
121
  // [EXPERIMENTAL] Token-level timestamps with DTW
@@ -237,6 +240,13 @@ extern "C" {
237
240
  // GPU, by caching compiled 'blobs' there.
238
241
  // Set to nullptr if not used.
239
242
  // Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
243
+ WHISPER_API int whisper_ctx_init_openvino_encoder_with_state(
244
+ struct whisper_context * ctx,
245
+ struct whisper_state * state,
246
+ const char * model_path,
247
+ const char * device,
248
+ const char * cache_dir);
249
+
240
250
  WHISPER_API int whisper_ctx_init_openvino_encoder(
241
251
  struct whisper_context * ctx,
242
252
  const char * model_path,
@@ -265,22 +275,6 @@ extern "C" {
265
275
  int n_samples,
266
276
  int n_threads);
267
277
 
268
- // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
269
- // The resulting spectrogram is stored inside the default state of the provided whisper context.
270
- // Returns 0 on success
271
- WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
272
- struct whisper_context * ctx,
273
- const float * samples,
274
- int n_samples,
275
- int n_threads);
276
-
277
- WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
278
- struct whisper_context * ctx,
279
- struct whisper_state * state,
280
- const float * samples,
281
- int n_samples,
282
- int n_threads);
283
-
284
278
  // This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
285
279
  // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
286
280
  // n_mel must be 80
@@ -350,7 +344,7 @@ extern "C" {
350
344
  int whisper_token_count(struct whisper_context * ctx, const char * text);
351
345
 
352
346
  // Largest language id (i.e. number of available languages - 1)
353
- WHISPER_API int whisper_lang_max_id();
347
+ WHISPER_API int whisper_lang_max_id(void);
354
348
 
355
349
  // Return the id of the specified language, returns -1 if not found
356
350
  // Examples:
@@ -430,6 +424,14 @@ extern "C" {
430
424
  WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
431
425
 
432
426
  // Performance information from the default state.
427
+ struct whisper_timings {
428
+ float sample_ms;
429
+ float encode_ms;
430
+ float decode_ms;
431
+ float batchd_ms;
432
+ float prompt_ms;
433
+ };
434
+ WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
433
435
  WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
434
436
  WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
435
437
 
@@ -498,7 +500,6 @@ extern "C" {
498
500
 
499
501
  // [EXPERIMENTAL] speed-up techniques
500
502
  // note: these can significantly reduce the quality of the output
501
- bool speed_up; // speed-up the audio by 2x using Phase Vocoder
502
503
  bool debug_mode; // enable debug_mode provides extra info (eg. Dump log_mel)
503
504
  int audio_ctx; // overwrite the audio context size (0 = use default)
504
505
 
@@ -533,7 +534,7 @@ extern "C" {
533
534
  float temperature_inc;
534
535
  float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
535
536
  float logprob_thold;
536
- float no_speech_thold; // TODO: not implemented
537
+ float no_speech_thold;
537
538
 
538
539
  struct {
539
540
  int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
@@ -572,10 +573,10 @@ extern "C" {
572
573
  };
573
574
 
574
575
  // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
575
- WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
576
- WHISPER_API struct whisper_context_params whisper_context_default_params(void);
576
+ WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
577
+ WHISPER_API struct whisper_context_params whisper_context_default_params (void);
577
578
  WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
578
- WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
579
+ WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
579
580
 
580
581
  // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
581
582
  // Not thread safe for same context
@@ -0,0 +1,17 @@
1
+ ggml/src/ggml-metal/ggml-metal-embed.o: \
2
+ ggml/src/ggml-metal/ggml-metal.metal \
3
+ ggml/src/ggml-metal/ggml-metal-impl.h \
4
+ ggml/src/ggml-common.h
5
+ @echo "Embedding Metal library"
6
+ @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
7
+ @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
8
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
9
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
10
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
11
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
12
+ @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
13
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
14
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
15
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
16
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
17
+ @rmdir ${TEMP_ASSEMBLY}
data/ext/metal.mk ADDED
@@ -0,0 +1,6 @@
1
+ ggml/src/ggml-metal/ggml-metal.o: \
2
+ ggml/src/ggml-metal/ggml-metal.m \
3
+ ggml/src/ggml-metal/ggml-metal-impl.h \
4
+ ggml/include/ggml-metal.h \
5
+ ggml/include/ggml.h
6
+ $(CC) $(CFLAGS) -c $< -o $@