whisper.rn 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/android/src/main/java/com/rnwhisper/RNWhisper.java +24 -18
  2. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -57
  3. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  9. package/cpp/ggml-backend.cpp +36 -18
  10. package/cpp/ggml-backend.h +1 -1
  11. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  12. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  13. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  14. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  15. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  16. package/cpp/ggml-cpu/common.h +3 -2
  17. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  18. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  19. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  20. package/cpp/ggml-cpu/ops.cpp +775 -74
  21. package/cpp/ggml-cpu/ops.h +7 -0
  22. package/cpp/ggml-cpu/quants.c +25 -24
  23. package/cpp/ggml-cpu/repack.cpp +15 -14
  24. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  25. package/cpp/ggml-cpu/vec.cpp +26 -2
  26. package/cpp/ggml-cpu/vec.h +99 -45
  27. package/cpp/ggml-cpu.h +2 -0
  28. package/cpp/ggml-impl.h +125 -183
  29. package/cpp/ggml-metal-impl.h +27 -0
  30. package/cpp/ggml-metal.m +298 -41
  31. package/cpp/ggml-quants.c +6 -6
  32. package/cpp/ggml-whisper-sim.metallib +0 -0
  33. package/cpp/ggml-whisper.metallib +0 -0
  34. package/cpp/ggml.c +269 -40
  35. package/cpp/ggml.h +122 -2
  36. package/cpp/gguf.cpp +5 -1
  37. package/cpp/whisper.cpp +4 -0
  38. package/cpp/whisper.h +2 -0
  39. package/ios/RNWhisper.mm +35 -38
  40. package/ios/RNWhisperVadContext.h +1 -1
  41. package/ios/RNWhisperVadContext.mm +2 -6
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  74. package/package.json +1 -1
package/cpp/ggml.h CHANGED
@@ -470,6 +470,7 @@ extern "C" {
470
470
  WSP_GGML_OP_TRANSPOSE,
471
471
  WSP_GGML_OP_GET_ROWS,
472
472
  WSP_GGML_OP_GET_ROWS_BACK,
473
+ WSP_GGML_OP_SET_ROWS,
473
474
  WSP_GGML_OP_DIAG,
474
475
  WSP_GGML_OP_DIAG_MASK_INF,
475
476
  WSP_GGML_OP_DIAG_MASK_ZERO,
@@ -481,6 +482,7 @@ extern "C" {
481
482
  WSP_GGML_OP_CONV_TRANSPOSE_1D,
482
483
  WSP_GGML_OP_IM2COL,
483
484
  WSP_GGML_OP_IM2COL_BACK,
485
+ WSP_GGML_OP_CONV_2D,
484
486
  WSP_GGML_OP_CONV_2D_DW,
485
487
  WSP_GGML_OP_CONV_TRANSPOSE_2D,
486
488
  WSP_GGML_OP_POOL_1D,
@@ -519,6 +521,8 @@ extern "C" {
519
521
  WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
520
522
  WSP_GGML_OP_OPT_STEP_ADAMW,
521
523
 
524
+ WSP_GGML_OP_GLU,
525
+
522
526
  WSP_GGML_OP_COUNT,
523
527
  };
524
528
 
@@ -542,6 +546,14 @@ extern "C" {
542
546
  WSP_GGML_UNARY_OP_COUNT,
543
547
  };
544
548
 
549
+ enum wsp_ggml_glu_op {
550
+ WSP_GGML_GLU_OP_REGLU,
551
+ WSP_GGML_GLU_OP_GEGLU,
552
+ WSP_GGML_GLU_OP_SWIGLU,
553
+
554
+ WSP_GGML_GLU_OP_COUNT,
555
+ };
556
+
545
557
  enum wsp_ggml_object_type {
546
558
  WSP_GGML_OBJECT_TYPE_TENSOR,
547
559
  WSP_GGML_OBJECT_TYPE_GRAPH,
@@ -657,6 +669,7 @@ extern "C" {
657
669
  WSP_GGML_API const char * wsp_ggml_op_symbol(enum wsp_ggml_op op);
658
670
 
659
671
  WSP_GGML_API const char * wsp_ggml_unary_op_name(enum wsp_ggml_unary_op op);
672
+ WSP_GGML_API const char * wsp_ggml_glu_op_name(enum wsp_ggml_glu_op op);
660
673
  WSP_GGML_API const char * wsp_ggml_op_desc(const struct wsp_ggml_tensor * t); // unary or op name
661
674
 
662
675
  WSP_GGML_API size_t wsp_ggml_element_size(const struct wsp_ggml_tensor * tensor);
@@ -687,6 +700,9 @@ extern "C" {
687
700
  // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
688
701
  WSP_GGML_API bool wsp_ggml_is_contiguous_channels(const struct wsp_ggml_tensor * tensor);
689
702
 
703
+ // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
704
+ WSP_GGML_API bool wsp_ggml_is_contiguous_rows(const struct wsp_ggml_tensor * tensor);
705
+
690
706
  WSP_GGML_API bool wsp_ggml_are_same_shape (const struct wsp_ggml_tensor * t0, const struct wsp_ggml_tensor * t1);
691
707
  WSP_GGML_API bool wsp_ggml_are_same_stride(const struct wsp_ggml_tensor * t0, const struct wsp_ggml_tensor * t1);
692
708
 
@@ -758,6 +774,7 @@ extern "C" {
758
774
  WSP_GGML_API void wsp_ggml_unravel_index(const struct wsp_ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
759
775
 
760
776
  WSP_GGML_API enum wsp_ggml_unary_op wsp_ggml_get_unary_op(const struct wsp_ggml_tensor * tensor);
777
+ WSP_GGML_API enum wsp_ggml_glu_op wsp_ggml_get_glu_op(const struct wsp_ggml_tensor * tensor);
761
778
 
762
779
  WSP_GGML_API void * wsp_ggml_get_data (const struct wsp_ggml_tensor * tensor);
763
780
  WSP_GGML_API float * wsp_ggml_get_data_f32(const struct wsp_ggml_tensor * tensor);
@@ -1086,6 +1103,63 @@ extern "C" {
1086
1103
  struct wsp_ggml_context * ctx,
1087
1104
  struct wsp_ggml_tensor * a);
1088
1105
 
1106
+ // gated linear unit ops
1107
+ // A: n columns, r rows,
1108
+ // result is n / 2 columns, r rows,
1109
+ // expects gate in second half of row, unless swapped is true
1110
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_glu(
1111
+ struct wsp_ggml_context * ctx,
1112
+ struct wsp_ggml_tensor * a,
1113
+ enum wsp_ggml_glu_op op,
1114
+ bool swapped);
1115
+
1116
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reglu(
1117
+ struct wsp_ggml_context * ctx,
1118
+ struct wsp_ggml_tensor * a);
1119
+
1120
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reglu_swapped(
1121
+ struct wsp_ggml_context * ctx,
1122
+ struct wsp_ggml_tensor * a);
1123
+
1124
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu(
1125
+ struct wsp_ggml_context * ctx,
1126
+ struct wsp_ggml_tensor * a);
1127
+
1128
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_swapped(
1129
+ struct wsp_ggml_context * ctx,
1130
+ struct wsp_ggml_tensor * a);
1131
+
1132
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_swiglu(
1133
+ struct wsp_ggml_context * ctx,
1134
+ struct wsp_ggml_tensor * a);
1135
+
1136
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_swiglu_swapped(
1137
+ struct wsp_ggml_context * ctx,
1138
+ struct wsp_ggml_tensor * a);
1139
+
1140
+ // A: n columns, r rows,
1141
+ // B: n columns, r rows,
1142
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_glu_split(
1143
+ struct wsp_ggml_context * ctx,
1144
+ struct wsp_ggml_tensor * a,
1145
+ struct wsp_ggml_tensor * b,
1146
+ enum wsp_ggml_glu_op op);
1147
+
1148
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_reglu_split(
1149
+ struct wsp_ggml_context * ctx,
1150
+ struct wsp_ggml_tensor * a,
1151
+ struct wsp_ggml_tensor * b);
1152
+
1153
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_split(
1154
+ struct wsp_ggml_context * ctx,
1155
+ struct wsp_ggml_tensor * a,
1156
+ struct wsp_ggml_tensor * b);
1157
+
1158
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_swiglu_split(
1159
+ struct wsp_ggml_context * ctx,
1160
+ struct wsp_ggml_tensor * a,
1161
+ struct wsp_ggml_tensor * b);
1162
+
1089
1163
  // normalize along rows
1090
1164
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm(
1091
1165
  struct wsp_ggml_context * ctx,
@@ -1375,6 +1449,23 @@ extern "C" {
1375
1449
  struct wsp_ggml_tensor * b, // row indices
1376
1450
  struct wsp_ggml_tensor * c); // data for wsp_ggml_get_rows, only used for its shape
1377
1451
 
1452
+ // a TD [n_embd, ne1, ne2, ne3]
1453
+ // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
1454
+ // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
1455
+ //
1456
+ // undefined behavior if destination rows overlap
1457
+ //
1458
+ // broadcast:
1459
+ // ne2 % ne11 == 0
1460
+ // ne3 % ne12 == 0
1461
+ //
1462
+ // return view(a)
1463
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_rows(
1464
+ struct wsp_ggml_context * ctx,
1465
+ struct wsp_ggml_tensor * a, // destination
1466
+ struct wsp_ggml_tensor * b, // source
1467
+ struct wsp_ggml_tensor * c); // row indices
1468
+
1378
1469
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_diag(
1379
1470
  struct wsp_ggml_context * ctx,
1380
1471
  struct wsp_ggml_tensor * a);
@@ -1723,6 +1814,17 @@ extern "C" {
1723
1814
  struct wsp_ggml_tensor * b,
1724
1815
  int stride);
1725
1816
 
1817
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_2d_direct(
1818
+ struct wsp_ggml_context * ctx,
1819
+ struct wsp_ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1820
+ struct wsp_ggml_tensor * b, // input data [W, H, C, N]
1821
+ int s0, // stride dimension 0
1822
+ int s1, // stride dimension 1
1823
+ int p0, // padding dimension 0
1824
+ int p1, // padding dimension 1
1825
+ int d0, // dilation dimension 0
1826
+ int d1); // dilation dimension 1
1827
+
1726
1828
  enum wsp_ggml_op_pool {
1727
1829
  WSP_GGML_OP_POOL_MAX,
1728
1830
  WSP_GGML_OP_POOL_AVG,
@@ -1765,6 +1867,12 @@ extern "C" {
1765
1867
  enum wsp_ggml_scale_mode {
1766
1868
  WSP_GGML_SCALE_MODE_NEAREST = 0,
1767
1869
  WSP_GGML_SCALE_MODE_BILINEAR = 1,
1870
+
1871
+ WSP_GGML_SCALE_MODE_COUNT
1872
+ };
1873
+
1874
+ enum wsp_ggml_scale_flag {
1875
+ WSP_GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
1768
1876
  };
1769
1877
 
1770
1878
  // interpolate
@@ -1777,14 +1885,26 @@ extern "C" {
1777
1885
 
1778
1886
  // interpolate
1779
1887
  // interpolate scale to specified dimensions
1780
- WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_upscale_ext(
1888
+ WSP_GGML_DEPRECATED(WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_upscale_ext(
1781
1889
  struct wsp_ggml_context * ctx,
1782
1890
  struct wsp_ggml_tensor * a,
1783
1891
  int ne0,
1784
1892
  int ne1,
1785
1893
  int ne2,
1786
1894
  int ne3,
1787
- enum wsp_ggml_scale_mode mode);
1895
+ enum wsp_ggml_scale_mode mode),
1896
+ "use wsp_ggml_interpolate instead");
1897
+
1898
+ // Up- or downsamples the input to the specified size.
1899
+ // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1900
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_interpolate(
1901
+ struct wsp_ggml_context * ctx,
1902
+ struct wsp_ggml_tensor * a,
1903
+ int64_t ne0,
1904
+ int64_t ne1,
1905
+ int64_t ne2,
1906
+ int64_t ne3,
1907
+ uint32_t mode); // wsp_ggml_scale_mode [ | wsp_ggml_scale_flag...]
1788
1908
 
1789
1909
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1790
1910
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad(
package/cpp/gguf.cpp CHANGED
@@ -335,7 +335,11 @@ struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_g
335
335
 
336
336
  for (uint32_t i = 0; i < magic.size(); i++) {
337
337
  if (magic[i] != WSP_GGUF_MAGIC[i]) {
338
- WSP_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
338
+ char c0 = isprint(magic[0]) ? magic[0] : '?';
339
+ char c1 = isprint(magic[1]) ? magic[1] : '?';
340
+ char c2 = isprint(magic[2]) ? magic[2] : '?';
341
+ char c3 = isprint(magic[3]) ? magic[3] : '?';
342
+ WSP_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
339
343
  wsp_gguf_free(ctx);
340
344
  return nullptr;
341
345
  }
package/cpp/whisper.cpp CHANGED
@@ -8942,6 +8942,10 @@ void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data) {
8942
8942
  wsp_ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
8943
8943
  }
8944
8944
 
8945
+ const char * whisper_version(void) {
8946
+ return "1.7.6";
8947
+ }
8948
+
8945
8949
  WSP_GGML_ATTRIBUTE_FORMAT(2, 3)
8946
8950
  static void whisper_log_internal(wsp_ggml_log_level level, const char * format, ...) {
8947
8951
  va_list args;
package/cpp/whisper.h CHANGED
@@ -199,6 +199,8 @@ extern "C" {
199
199
  float samples_overlap; // Overlap in seconds when copying audio samples from speech segment.
200
200
  } whisper_vad_params;
201
201
 
202
+ WHISPER_API const char * whisper_version(void);
203
+
202
204
  // Various functions for loading a ggml whisper model.
203
205
  // Allocate (almost) all memory needed for the model.
204
206
  // Return NULL on failure
package/ios/RNWhisper.mm CHANGED
@@ -352,39 +352,10 @@ RCT_REMAP_METHOD(releaseAllContexts,
352
352
  withResolver:(RCTPromiseResolveBlock)resolve
353
353
  withRejecter:(RCTPromiseRejectBlock)reject)
354
354
  {
355
- [self invalidate];
355
+ [self releaseAllContexts];
356
356
  resolve(nil);
357
357
  }
358
358
 
359
- - (void)invalidate {
360
- [super invalidate];
361
-
362
- if (contexts == nil) {
363
- return;
364
- }
365
-
366
- for (NSNumber *contextId in contexts) {
367
- RNWhisperContext *context = contexts[contextId];
368
- [context invalidate];
369
- }
370
-
371
- if (vadContexts != nil) {
372
- for (NSNumber *contextId in vadContexts) {
373
- RNWhisperVadContext *vadContext = vadContexts[contextId];
374
- [vadContext invalidate];
375
- }
376
- [vadContexts removeAllObjects];
377
- vadContexts = nil;
378
- }
379
-
380
- rnwhisper::job_abort_all(); // graceful abort
381
-
382
- [contexts removeAllObjects];
383
- contexts = nil;
384
-
385
- [RNWhisperDownloader clearCache];
386
- }
387
-
388
359
  // MARK: - AudioSessionUtils
389
360
 
390
361
  RCT_EXPORT_METHOD(getAudioSessionCurrentCategory:(RCTPromiseResolveBlock)resolve
@@ -507,13 +478,16 @@ RCT_REMAP_METHOD(vadDetectSpeech,
507
478
  }
508
479
 
509
480
  // Decode base64 audio data
510
- NSData *audioData = [[NSData alloc] initWithBase64EncodedString:audioDataBase64 options:0];
511
- if (audioData == nil) {
481
+ NSData *pcmData = [[NSData alloc] initWithBase64EncodedString:audioDataBase64 options:0];
482
+ if (pcmData == nil) {
512
483
  reject(@"whisper_vad_error", @"Invalid audio data", nil);
513
484
  return;
514
485
  }
515
486
 
516
- NSArray *segments = [vadContext detectSpeech:audioData options:options];
487
+ int count = 0;
488
+ float *data = [RNWhisperAudioUtils decodeWaveData:pcmData count:&count cutHeader:NO];
489
+
490
+ NSArray *segments = [vadContext detectSpeech:data samplesCount:count options:options];
517
491
  resolve(segments);
518
492
  }
519
493
 
@@ -549,10 +523,7 @@ RCT_REMAP_METHOD(vadDetectSpeechFile,
549
523
  return;
550
524
  }
551
525
 
552
- // Convert float32 data to NSData for VAD context
553
- NSData *audioData = [NSData dataWithBytes:data length:count * sizeof(float)];
554
-
555
- NSArray *segments = [vadContext detectSpeech:audioData options:options];
526
+ NSArray *segments = [vadContext detectSpeech:data samplesCount:count options:options];
556
527
  resolve(segments);
557
528
  }
558
529
 
@@ -574,14 +545,40 @@ RCT_REMAP_METHOD(releaseVadContext,
574
545
  RCT_EXPORT_METHOD(releaseAllVadContexts:(RCTPromiseResolveBlock)resolve
575
546
  withRejecter:(RCTPromiseRejectBlock)reject)
576
547
  {
548
+ [self releaseAllVadContexts];
549
+ resolve(nil);
550
+ }
551
+
552
+ - (void)releaseAllContexts {
553
+ rnwhisper::job_abort_all(); // graceful abort
554
+ if (contexts != nil) {
555
+ for (NSNumber *contextId in contexts) {
556
+ RNWhisperContext *context = contexts[contextId];
557
+ [context invalidate];
558
+ }
559
+ [contexts removeAllObjects];
560
+ contexts = nil;
561
+ }
562
+ }
563
+
564
+ - (void)releaseAllVadContexts {
577
565
  if (vadContexts != nil) {
578
566
  for (NSNumber *contextId in vadContexts) {
579
567
  RNWhisperVadContext *vadContext = vadContexts[contextId];
580
568
  [vadContext invalidate];
581
569
  }
582
570
  [vadContexts removeAllObjects];
571
+ vadContexts = nil;
583
572
  }
584
- resolve(nil);
573
+ }
574
+
575
+ - (void)invalidate {
576
+ [super invalidate];
577
+
578
+ [self releaseAllContexts];
579
+ [self releaseAllVadContexts];
580
+
581
+ [RNWhisperDownloader clearCache];
585
582
  }
586
583
 
587
584
  #ifdef RCT_NEW_ARCH_ENABLED
@@ -23,7 +23,7 @@
23
23
  - (NSString *)reasonNoMetal;
24
24
  - (struct whisper_vad_context *)getVadContext;
25
25
  - (dispatch_queue_t)getDispatchQueue;
26
- - (NSArray *)detectSpeech:(NSData *)audioData options:(NSDictionary *)options;
26
+ - (NSArray *)detectSpeech:(float *)samples samplesCount:(int)samplesCount options:(NSDictionary *)options;
27
27
  - (void)invalidate;
28
28
 
29
29
  @end
@@ -73,18 +73,14 @@
73
73
  return dQueue;
74
74
  }
75
75
 
76
- - (NSArray *)detectSpeech:(NSData *)audioData options:(NSDictionary *)options {
76
+ - (NSArray *)detectSpeech:(float *)samples samplesCount:(int)samplesCount options:(NSDictionary *)options {
77
77
  if (vctx == NULL) {
78
78
  NSLog(@"VAD context is null");
79
79
  return @[];
80
80
  }
81
81
 
82
- // Convert NSData to float array
83
- const float *samples = (const float *)[audioData bytes];
84
- int n_samples = (int)[audioData length] / sizeof(float);
85
-
86
82
  // Run VAD detection
87
- bool speechDetected = whisper_vad_detect_speech(vctx, samples, n_samples);
83
+ bool speechDetected = whisper_vad_detect_speech(vctx, samples, samplesCount);
88
84
  if (!speechDetected) {
89
85
  return @[];
90
86
  }
@@ -339,7 +339,7 @@ extern "C" {
339
339
  typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
340
340
 
341
341
  // Compare the output of two backends
342
- WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data);
342
+ WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
343
343
 
344
344
  // Tensor initialization
345
345
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
@@ -101,6 +101,7 @@ extern "C" {
101
101
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_riscv_v (void);
102
102
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vsx (void);
103
103
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vxe (void);
104
+ WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_nnpa (void);
104
105
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_wasm_simd (void);
105
106
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_llamafile (void);
106
107
 
@@ -133,6 +134,7 @@ extern "C" {
133
134
 
134
135
  WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_cpu_reg(void);
135
136
 
137
+ WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
136
138
  WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_fp16(const float *, wsp_ggml_fp16_t *, int64_t);
137
139
  WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t *, float *, int64_t);
138
140
  WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_bf16(const float *, wsp_ggml_bf16_t *, int64_t);