whisper.rn 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/android/src/main/java/com/rnwhisper/RNWhisper.java +21 -16
  2. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  4. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  8. package/cpp/ggml-backend.cpp +36 -18
  9. package/cpp/ggml-backend.h +1 -1
  10. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  11. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  12. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  13. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  14. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  15. package/cpp/ggml-cpu/common.h +3 -2
  16. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  17. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  18. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  19. package/cpp/ggml-cpu/ops.cpp +775 -74
  20. package/cpp/ggml-cpu/ops.h +7 -0
  21. package/cpp/ggml-cpu/quants.c +25 -24
  22. package/cpp/ggml-cpu/repack.cpp +15 -14
  23. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  24. package/cpp/ggml-cpu/vec.cpp +26 -2
  25. package/cpp/ggml-cpu/vec.h +99 -45
  26. package/cpp/ggml-cpu.h +2 -0
  27. package/cpp/ggml-impl.h +125 -183
  28. package/cpp/ggml-metal-impl.h +27 -0
  29. package/cpp/ggml-metal.m +298 -41
  30. package/cpp/ggml-quants.c +6 -6
  31. package/cpp/ggml-whisper-sim.metallib +0 -0
  32. package/cpp/ggml-whisper.metallib +0 -0
  33. package/cpp/ggml.c +269 -40
  34. package/cpp/ggml.h +122 -2
  35. package/cpp/gguf.cpp +5 -1
  36. package/cpp/whisper.cpp +4 -0
  37. package/cpp/whisper.h +2 -0
  38. package/ios/RNWhisper.mm +28 -31
  39. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  47. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  48. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  55. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  56. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  64. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  71. package/package.json +1 -1
@@ -326,7 +326,7 @@ public class RNWhisper implements LifecycleEventListener {
326
326
  @Override
327
327
  protected Void doInBackground(Void... voids) {
328
328
  try {
329
- onHostDestroy();
329
+ releaseAllContexts();
330
330
  } catch (Exception e) {
331
331
  exception = e;
332
332
  }
@@ -529,10 +529,7 @@ public class RNWhisper implements LifecycleEventListener {
529
529
  @Override
530
530
  protected Void doInBackground(Void... voids) {
531
531
  try {
532
- for (WhisperVadContext vadContext : vadContexts.values()) {
533
- vadContext.release();
534
- }
535
- vadContexts.clear();
532
+ releaseAllVadContexts();
536
533
  } catch (Exception e) {
537
534
  exception = e;
538
535
  }
@@ -560,27 +557,35 @@ public class RNWhisper implements LifecycleEventListener {
560
557
  public void onHostPause() {
561
558
  }
562
559
 
563
- @Override
564
- public void onHostDestroy() {
560
+ private void releaseAllContexts() {
565
561
  for (WhisperContext context : contexts.values()) {
566
562
  context.stopCurrentTranscribe();
567
563
  }
568
- for (AsyncTask task : tasks.keySet()) {
569
- try {
570
- task.get();
571
- } catch (Exception e) {
572
- Log.e(NAME, "Failed to wait for task", e);
573
- }
574
- }
564
+ WhisperContext.abortAllTranscribe(); // graceful abort
575
565
  for (WhisperContext context : contexts.values()) {
576
566
  context.release();
577
567
  }
568
+ contexts.clear();
569
+ }
570
+
571
+ private void releaseAllVadContexts() {
578
572
  for (WhisperVadContext vadContext : vadContexts.values()) {
579
573
  vadContext.release();
580
574
  }
581
- WhisperContext.abortAllTranscribe(); // graceful abort
582
- contexts.clear();
583
575
  vadContexts.clear();
576
+ }
577
+
578
+ @Override
579
+ public void onHostDestroy() {
580
+ for (AsyncTask task : tasks.keySet()) {
581
+ try {
582
+ task.get();
583
+ } catch (Exception e) {
584
+ Log.e(NAME, "Failed to wait for task", e);
585
+ }
586
+ }
584
587
  downloader.clearCache();
588
+ releaseAllContexts();
589
+ releaseAllVadContexts();
585
590
  }
586
591
  }
@@ -817,8 +817,9 @@ static void wsp_ggml_backend_sched_print_assignments(wsp_ggml_backend_sched_t sc
817
817
  }
818
818
  if (sched->debug > 1) {
819
819
  wsp_ggml_backend_t tensor_backend = wsp_ggml_backend_sched_get_tensor_backend(sched, node);
820
- WSP_GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, wsp_ggml_op_name(node->op), node->name,
821
- fmt_size(wsp_ggml_nbytes(node)), tensor_backend ? wsp_ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
820
+ WSP_GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, wsp_ggml_op_name(node->op), node->name,
821
+ fmt_size(wsp_ggml_nbytes(node)), tensor_backend ? wsp_ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
822
+ graph->use_counts[wsp_ggml_hash_find(&graph->visited_hash_set, node)]);
822
823
  for (int j = 0; j < WSP_GGML_MAX_SRC; j++) {
823
824
  struct wsp_ggml_tensor * src = node->src[j];
824
825
  if (src == NULL) {
@@ -1826,7 +1827,7 @@ void wsp_ggml_backend_graph_copy_free(struct wsp_ggml_backend_graph_copy copy) {
1826
1827
  wsp_ggml_free(copy.ctx_unallocated);
1827
1828
  }
1828
1829
 
1829
- bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data) {
1830
+ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node) {
1830
1831
  struct wsp_ggml_backend_graph_copy copy = wsp_ggml_backend_graph_copy(backend2, graph);
1831
1832
  if (copy.buffer == NULL) {
1832
1833
  return false;
@@ -1837,28 +1838,45 @@ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggm
1837
1838
 
1838
1839
  assert(g1->n_nodes == g2->n_nodes);
1839
1840
 
1840
- for (int i = 0; i < g1->n_nodes; i++) {
1841
- struct wsp_ggml_tensor * t1 = g1->nodes[i];
1842
- struct wsp_ggml_tensor * t2 = g2->nodes[i];
1841
+ if (test_node != nullptr) {
1842
+ // Compute the whole graph and only test the output for a specific tensor
1843
+ wsp_ggml_backend_graph_compute(backend1, g1);
1844
+ wsp_ggml_backend_graph_compute(backend2, g2);
1843
1845
 
1844
- assert(t1->op == t2->op && wsp_ggml_are_same_layout(t1, t2));
1846
+ int test_node_idx = -1;
1847
+ for (int i = 0; i < g1->n_nodes; i++) {
1848
+ struct wsp_ggml_tensor * t1 = g1->nodes[i];
1849
+ if (t1 == test_node) {
1850
+ test_node_idx = i;
1851
+ break;
1852
+ }
1853
+ }
1854
+ WSP_GGML_ASSERT(test_node_idx != -1);
1845
1855
 
1846
- struct wsp_ggml_cgraph g1v = wsp_ggml_graph_view(g1, i, i + 1);
1847
- struct wsp_ggml_cgraph g2v = wsp_ggml_graph_view(g2, i, i + 1);
1856
+ callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
1857
+ } else {
1858
+ for (int i = 0; i < g1->n_nodes; i++) {
1859
+ struct wsp_ggml_tensor * t1 = g1->nodes[i];
1860
+ struct wsp_ggml_tensor * t2 = g2->nodes[i];
1848
1861
 
1849
- wsp_ggml_backend_graph_compute(backend1, &g1v);
1850
- wsp_ggml_backend_graph_compute(backend2, &g2v);
1862
+ assert(t1->op == t2->op && wsp_ggml_are_same_layout(t1, t2));
1851
1863
 
1852
- if (wsp_ggml_is_view_op(t1->op)) {
1853
- continue;
1854
- }
1864
+ struct wsp_ggml_cgraph g1v = wsp_ggml_graph_view(g1, i, i + 1);
1865
+ struct wsp_ggml_cgraph g2v = wsp_ggml_graph_view(g2, i, i + 1);
1855
1866
 
1856
- // compare results, calculate rms etc
1857
- if (!callback(i, t1, t2, user_data)) {
1858
- break;
1867
+ wsp_ggml_backend_graph_compute(backend1, &g1v);
1868
+ wsp_ggml_backend_graph_compute(backend2, &g2v);
1869
+
1870
+ if (wsp_ggml_is_view_op(t1->op)) {
1871
+ continue;
1872
+ }
1873
+
1874
+ // compare results, calculate rms etc
1875
+ if (!callback(i, t1, t2, user_data)) {
1876
+ break;
1877
+ }
1859
1878
  }
1860
1879
  }
1861
-
1862
1880
  wsp_ggml_backend_graph_copy_free(copy);
1863
1881
 
1864
1882
  return true;
@@ -339,7 +339,7 @@ extern "C" {
339
339
  typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
340
340
 
341
341
  // Compare the output of two backends
342
- WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data);
342
+ WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
343
343
 
344
344
  // Tensor initialization
345
345
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
@@ -8,6 +8,7 @@
8
8
  #include "mmq.h"
9
9
  #include "ggml-impl.h"
10
10
  #include "ggml-cpu-impl.h"
11
+ #include "simd-mappings.h"
11
12
  #include "quants.h"
12
13
  #include "ggml-quants.h"
13
14
  #include <algorithm>
@@ -453,7 +454,7 @@ void wsp_quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, in
453
454
 
454
455
  // Quantize these floats
455
456
  const float iscale = 127.f / amax;
456
- y[i].d = WSP_GGML_FP32_TO_FP16(1 / iscale);
457
+ y[i].d = WSP_GGML_CPU_FP32_TO_FP16(1 / iscale);
457
458
  const float id = ( amax != 0.0f ) ? iscale : 0.f;
458
459
  const __m512 vscale = _mm512_set1_ps(id);
459
460
 
@@ -1090,7 +1091,7 @@ struct acc_C<block_q8_0, block_q4_0, is_acc> {
1090
1091
  const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
1091
1092
 
1092
1093
  for (int m = 0; m < nr; ++m) {
1093
- const __m512 vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].d));
1094
+ const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1094
1095
  const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
1095
1096
 
1096
1097
  __m512 vsum;
@@ -1113,8 +1114,8 @@ struct acc_C<block_q8_1, block_q4_1, is_acc> {
1113
1114
  const __m512 vm0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof(wsp_ggml_half))));
1114
1115
 
1115
1116
  for (int m = 0; m < nr; ++m) {
1116
- const __m512 vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].d));
1117
- const __m512 vs1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].s));
1117
+ const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1118
+ const __m512 vs1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].s));
1118
1119
  const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
1119
1120
 
1120
1121
  __m512 vsum;
@@ -1137,7 +1138,7 @@ struct acc_C<block_q8_0, block_q8_0, is_acc> {
1137
1138
  const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
1138
1139
 
1139
1140
  for (int m = 0; m < nr; ++m) {
1140
- const __m512 vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].d));
1141
+ const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1141
1142
  const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
1142
1143
 
1143
1144
  __m512 vsum;
@@ -1437,7 +1438,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLO
1437
1438
  va[k] = _mm512_set1_epi32(a_ptr[k]);
1438
1439
  vcomp = _mm512_dpbusd_epi32(vcomp, off, va[k]);
1439
1440
  }
1440
- vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].d));
1441
+ vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1441
1442
  }
1442
1443
 
1443
1444
  // load b
@@ -1498,8 +1499,8 @@ struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K>
1498
1499
  for (int k = 0; k < 8; ++k) {
1499
1500
  va[k] = _mm512_set1_epi32(a_ptr[k]);
1500
1501
  }
1501
- vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].d));
1502
- vs1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].s));
1502
+ vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1503
+ vs1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].s));
1503
1504
  }
1504
1505
 
1505
1506
  // load b
@@ -1571,7 +1572,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLO
1571
1572
  va[k] = _mm512_set1_epi32(a_ptr[k]);
1572
1573
  va[k] = _mm512_add_epi8(va[k], off);
1573
1574
  }
1574
- vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].d));
1575
+ vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1575
1576
  }
1576
1577
 
1577
1578
  // load b