whispercpp 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -2,6 +2,7 @@
2
2
  #define WHISPER_H
3
3
 
4
4
  #include "ggml.h"
5
+ #include "ggml-cpu.h"
5
6
 
6
7
  #include <stddef.h>
7
8
  #include <stdint.h>
@@ -99,6 +100,7 @@ extern "C" {
99
100
  WHISPER_AHEADS_LARGE_V1,
100
101
  WHISPER_AHEADS_LARGE_V2,
101
102
  WHISPER_AHEADS_LARGE_V3,
103
+ WHISPER_AHEADS_LARGE_V3_TURBO,
102
104
  };
103
105
 
104
106
  typedef struct whisper_ahead {
@@ -113,6 +115,7 @@ extern "C" {
113
115
 
114
116
  struct whisper_context_params {
115
117
  bool use_gpu;
118
+ bool flash_attn;
116
119
  int gpu_device; // CUDA device
117
120
 
118
121
  // [EXPERIMENTAL] Token-level timestamps with DTW
@@ -237,6 +240,13 @@ extern "C" {
237
240
  // GPU, by caching compiled 'blobs' there.
238
241
  // Set to nullptr if not used.
239
242
  // Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
243
+ WHISPER_API int whisper_ctx_init_openvino_encoder_with_state(
244
+ struct whisper_context * ctx,
245
+ struct whisper_state * state,
246
+ const char * model_path,
247
+ const char * device,
248
+ const char * cache_dir);
249
+
240
250
  WHISPER_API int whisper_ctx_init_openvino_encoder(
241
251
  struct whisper_context * ctx,
242
252
  const char * model_path,
@@ -265,22 +275,6 @@ extern "C" {
265
275
  int n_samples,
266
276
  int n_threads);
267
277
 
268
- // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
269
- // The resulting spectrogram is stored inside the default state of the provided whisper context.
270
- // Returns 0 on success
271
- WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
272
- struct whisper_context * ctx,
273
- const float * samples,
274
- int n_samples,
275
- int n_threads);
276
-
277
- WHISPER_API int whisper_pcm_to_mel_phase_vocoder_with_state(
278
- struct whisper_context * ctx,
279
- struct whisper_state * state,
280
- const float * samples,
281
- int n_samples,
282
- int n_threads);
283
-
284
278
  // This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
285
279
  // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
286
280
  // n_mel must be 80
@@ -350,7 +344,7 @@ extern "C" {
350
344
  int whisper_token_count(struct whisper_context * ctx, const char * text);
351
345
 
352
346
  // Largest language id (i.e. number of available languages - 1)
353
- WHISPER_API int whisper_lang_max_id();
347
+ WHISPER_API int whisper_lang_max_id(void);
354
348
 
355
349
  // Return the id of the specified language, returns -1 if not found
356
350
  // Examples:
@@ -430,6 +424,14 @@ extern "C" {
430
424
  WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
431
425
 
432
426
  // Performance information from the default state.
427
+ struct whisper_timings {
428
+ float sample_ms;
429
+ float encode_ms;
430
+ float decode_ms;
431
+ float batchd_ms;
432
+ float prompt_ms;
433
+ };
434
+ WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
433
435
  WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
434
436
  WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
435
437
 
@@ -498,7 +500,6 @@ extern "C" {
498
500
 
499
501
  // [EXPERIMENTAL] speed-up techniques
500
502
  // note: these can significantly reduce the quality of the output
501
- bool speed_up; // speed-up the audio by 2x using Phase Vocoder
502
503
  bool debug_mode; // enable debug_mode provides extra info (eg. Dump log_mel)
503
504
  int audio_ctx; // overwrite the audio context size (0 = use default)
504
505
 
@@ -533,7 +534,7 @@ extern "C" {
533
534
  float temperature_inc;
534
535
  float entropy_thold; // similar to OpenAI's "compression_ratio_threshold"
535
536
  float logprob_thold;
536
- float no_speech_thold; // TODO: not implemented
537
+ float no_speech_thold;
537
538
 
538
539
  struct {
539
540
  int best_of; // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264
@@ -572,10 +573,10 @@ extern "C" {
572
573
  };
573
574
 
574
575
  // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
575
- WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref();
576
- WHISPER_API struct whisper_context_params whisper_context_default_params(void);
576
+ WHISPER_API struct whisper_context_params * whisper_context_default_params_by_ref(void);
577
+ WHISPER_API struct whisper_context_params whisper_context_default_params (void);
577
578
  WHISPER_API struct whisper_full_params * whisper_full_default_params_by_ref(enum whisper_sampling_strategy strategy);
578
- WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
579
+ WHISPER_API struct whisper_full_params whisper_full_default_params (enum whisper_sampling_strategy strategy);
579
580
 
580
581
  // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
581
582
  // Not thread safe for same context
@@ -0,0 +1,17 @@
1
+ ggml/src/ggml-metal/ggml-metal-embed.o: \
2
+ ggml/src/ggml-metal/ggml-metal.metal \
3
+ ggml/src/ggml-metal/ggml-metal-impl.h \
4
+ ggml/src/ggml-common.h
5
+ @echo "Embedding Metal library"
6
+ @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
7
+ @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
8
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
9
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
10
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
11
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
12
+ @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
13
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
14
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
15
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
16
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
17
+ @rmdir ${TEMP_ASSEMBLY}
data/ext/metal.mk ADDED
@@ -0,0 +1,6 @@
1
+ ggml/src/ggml-metal/ggml-metal.o: \
2
+ ggml/src/ggml-metal/ggml-metal.m \
3
+ ggml/src/ggml-metal/ggml-metal-impl.h \
4
+ ggml/include/ggml-metal.h \
5
+ ggml/include/ggml.h
6
+ $(CC) $(CFLAGS) -c $< -o $@