whisper.rn 0.5.0-rc.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/cpp/ggml-alloc.c +1 -15
  2. package/cpp/ggml-backend-reg.cpp +17 -8
  3. package/cpp/ggml-backend.cpp +15 -22
  4. package/cpp/ggml-common.h +17 -0
  5. package/cpp/ggml-cpu/arch/arm/quants.c +132 -596
  6. package/cpp/ggml-cpu/arch/arm/repack.cpp +14 -286
  7. package/cpp/ggml-cpu/arch/x86/quants.c +184 -675
  8. package/cpp/ggml-cpu/arch/x86/repack.cpp +4679 -1657
  9. package/cpp/ggml-cpu/arch-fallback.h +34 -0
  10. package/cpp/ggml-cpu/ggml-cpu.c +22 -1
  11. package/cpp/ggml-cpu/ggml-cpu.cpp +21 -24
  12. package/cpp/ggml-cpu/ops.cpp +870 -211
  13. package/cpp/ggml-cpu/ops.h +3 -8
  14. package/cpp/ggml-cpu/quants.c +35 -0
  15. package/cpp/ggml-cpu/quants.h +8 -0
  16. package/cpp/ggml-cpu/repack.cpp +458 -47
  17. package/cpp/ggml-cpu/repack.h +22 -0
  18. package/cpp/ggml-cpu/simd-mappings.h +1 -1
  19. package/cpp/ggml-cpu/traits.cpp +2 -2
  20. package/cpp/ggml-cpu/traits.h +1 -1
  21. package/cpp/ggml-cpu/vec.cpp +12 -9
  22. package/cpp/ggml-cpu/vec.h +107 -13
  23. package/cpp/ggml-impl.h +77 -0
  24. package/cpp/ggml-metal-impl.h +51 -12
  25. package/cpp/ggml-metal.m +610 -115
  26. package/cpp/ggml-opt.cpp +97 -41
  27. package/cpp/ggml-opt.h +25 -6
  28. package/cpp/ggml-quants.c +110 -16
  29. package/cpp/ggml-quants.h +6 -0
  30. package/cpp/ggml-whisper-sim.metallib +0 -0
  31. package/cpp/ggml-whisper.metallib +0 -0
  32. package/cpp/ggml.c +314 -88
  33. package/cpp/ggml.h +137 -11
  34. package/cpp/gguf.cpp +8 -1
  35. package/cpp/jsi/RNWhisperJSI.cpp +23 -6
  36. package/cpp/whisper.cpp +15 -6
  37. package/ios/RNWhisper.mm +6 -6
  38. package/ios/RNWhisperContext.mm +2 -0
  39. package/ios/RNWhisperVadContext.mm +2 -0
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  56. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-common.h +17 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +137 -11
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h +17 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +77 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +51 -12
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-opt.h +25 -6
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-quants.h +6 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +137 -11
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  72. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +13 -0
  73. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  74. package/lib/module/realtime-transcription/RealtimeTranscriber.js +13 -0
  75. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  76. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  77. package/lib/typescript/realtime-transcription/types.d.ts +6 -0
  78. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  79. package/package.json +1 -1
  80. package/src/realtime-transcription/RealtimeTranscriber.ts +17 -0
  81. package/src/realtime-transcription/types.ts +6 -0
package/cpp/ggml-alloc.c CHANGED
@@ -22,21 +22,6 @@ static bool wsp_ggml_is_view(const struct wsp_ggml_tensor * t) {
22
22
  return t->view_src != NULL;
23
23
  }
24
24
 
25
- static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const struct wsp_ggml_tensor * b) {
26
- if (a->type != b->type) {
27
- return false;
28
- }
29
- for (int i = 0; i < WSP_GGML_MAX_DIMS; i++) {
30
- if (a->ne[i] != b->ne[i]) {
31
- return false;
32
- }
33
- if (a->nb[i] != b->nb[i]) {
34
- return false;
35
- }
36
- }
37
- return true;
38
- }
39
-
40
25
  // ops that return true for this function must not use restrict pointers for their backend implementations
41
26
  static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
42
27
  switch (op) {
@@ -44,6 +29,7 @@ static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
44
29
  case WSP_GGML_OP_DIAG_MASK_ZERO:
45
30
  case WSP_GGML_OP_DIAG_MASK_INF:
46
31
  case WSP_GGML_OP_ADD:
32
+ case WSP_GGML_OP_ADD_ID:
47
33
  case WSP_GGML_OP_ADD1:
48
34
  case WSP_GGML_OP_SUB:
49
35
  case WSP_GGML_OP_MUL:
@@ -45,6 +45,14 @@
45
45
  #include "ggml-vulkan.h"
46
46
  #endif
47
47
 
48
+ #ifdef WSP_GGML_USE_WEBGPU
49
+ #include "ggml-webgpu.h"
50
+ #endif
51
+
52
+ #ifdef WSP_GGML_USE_ZDNN
53
+ #include "ggml-zdnn.h"
54
+ #endif
55
+
48
56
  #ifdef WSP_GGML_USE_OPENCL
49
57
  #include "ggml-opencl.h"
50
58
  #endif
@@ -61,10 +69,6 @@
61
69
  #include "ggml-cann.h"
62
70
  #endif
63
71
 
64
- #ifdef WSP_GGML_USE_KOMPUTE
65
- #include "ggml-kompute.h"
66
- #endif
67
-
68
72
  // disable C++17 deprecation warning for std::codecvt_utf8
69
73
  #if defined(__clang__)
70
74
  # pragma clang diagnostic push
@@ -177,6 +181,12 @@ struct wsp_ggml_backend_registry {
177
181
  #ifdef WSP_GGML_USE_VULKAN
178
182
  register_backend(wsp_ggml_backend_vk_reg());
179
183
  #endif
184
+ #ifdef WSP_GGML_USE_WEBGPU
185
+ register_backend(wsp_ggml_backend_webgpu_reg());
186
+ #endif
187
+ #ifdef WSP_GGML_USE_ZDNN
188
+ register_backend(wsp_ggml_backend_zdnn_reg());
189
+ #endif
180
190
  #ifdef WSP_GGML_USE_OPENCL
181
191
  register_backend(wsp_ggml_backend_opencl_reg());
182
192
  #endif
@@ -189,9 +199,6 @@ struct wsp_ggml_backend_registry {
189
199
  #ifdef WSP_GGML_USE_RPC
190
200
  register_backend(wsp_ggml_backend_rpc_reg());
191
201
  #endif
192
- #ifdef WSP_GGML_USE_KOMPUTE
193
- register_backend(wsp_ggml_backend_kompute_reg());
194
- #endif
195
202
  #ifdef WSP_GGML_USE_CPU
196
203
  register_backend(wsp_ggml_backend_cpu_reg());
197
204
  #endif
@@ -498,6 +505,9 @@ static wsp_ggml_backend_reg_t wsp_ggml_backend_load_best(const char * name, bool
498
505
 
499
506
  std::vector<fs::path> search_paths;
500
507
  if (user_search_path == nullptr) {
508
+ #ifdef WSP_GGML_BACKEND_DIR
509
+ search_paths.push_back(fs::u8path(WSP_GGML_BACKEND_DIR));
510
+ #endif
501
511
  // default search paths: executable directory, current directory
502
512
  search_paths.push_back(get_executable_path());
503
513
  search_paths.push_back(fs::current_path());
@@ -575,7 +585,6 @@ void wsp_ggml_backend_load_all_from_path(const char * dir_path) {
575
585
  wsp_ggml_backend_load_best("cann", silent, dir_path);
576
586
  wsp_ggml_backend_load_best("cuda", silent, dir_path);
577
587
  wsp_ggml_backend_load_best("hip", silent, dir_path);
578
- wsp_ggml_backend_load_best("kompute", silent, dir_path);
579
588
  wsp_ggml_backend_load_best("metal", silent, dir_path);
580
589
  wsp_ggml_backend_load_best("rpc", silent, dir_path);
581
590
  wsp_ggml_backend_load_best("sycl", silent, dir_path);
@@ -352,21 +352,6 @@ wsp_ggml_backend_dev_t wsp_ggml_backend_get_device(wsp_ggml_backend_t backend) {
352
352
 
353
353
  // backend copy
354
354
 
355
- static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const struct wsp_ggml_tensor * b) {
356
- if (a->type != b->type) {
357
- return false;
358
- }
359
- for (int i = 0; i < WSP_GGML_MAX_DIMS; i++) {
360
- if (a->ne[i] != b->ne[i]) {
361
- return false;
362
- }
363
- if (a->nb[i] != b->nb[i]) {
364
- return false;
365
- }
366
- }
367
- return true;
368
- }
369
-
370
355
  void wsp_ggml_backend_tensor_copy(struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst) {
371
356
  WSP_GGML_ASSERT(wsp_ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
372
357
 
@@ -662,6 +647,7 @@ struct wsp_ggml_backend_sched {
662
647
  // pipeline parallelism support
663
648
  int n_copies;
664
649
  int cur_copy;
650
+ int next_copy;
665
651
  wsp_ggml_backend_event_t events[WSP_GGML_SCHED_MAX_BACKENDS][WSP_GGML_SCHED_MAX_COPIES];
666
652
  struct wsp_ggml_tensor * graph_inputs[WSP_GGML_SCHED_MAX_SPLIT_INPUTS];
667
653
  int n_graph_inputs;
@@ -1085,6 +1071,11 @@ static void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, s
1085
1071
  }
1086
1072
  }
1087
1073
  }
1074
+ // if the node is still unassigned, assign it to the first backend that supports it
1075
+ for (int b = 0; b < sched->n_backends && *cur_backend_id == -1; b++) {
1076
+ wsp_ggml_backend_sched_set_if_supported(sched, node, b, cur_backend_id);
1077
+ }
1078
+ WSP_GGML_ASSERT(*cur_backend_id != -1);
1088
1079
  }
1089
1080
 
1090
1081
  // pass 5: split graph, find tensors that need to be copied
@@ -1112,7 +1103,7 @@ static void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, s
1112
1103
 
1113
1104
  const int node_backend_id = tensor_backend_id(node);
1114
1105
 
1115
- assert(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
1106
+ WSP_GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
1116
1107
 
1117
1108
  // check if we should start a new split based on the sources of the current node
1118
1109
  bool need_new_split = false;
@@ -1170,7 +1161,7 @@ static void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, s
1170
1161
 
1171
1162
  size_t src_id = hash_id(src);
1172
1163
  const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
1173
- assert(src_backend_id != -1); // all inputs should be assigned by now
1164
+ WSP_GGML_ASSERT(src_backend_id != -1); // all inputs should be assigned by now
1174
1165
 
1175
1166
  if (src->flags & WSP_GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
1176
1167
  if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
@@ -1448,8 +1439,6 @@ static enum wsp_ggml_status wsp_ggml_backend_sched_compute_splits(wsp_ggml_backe
1448
1439
  }
1449
1440
  }
1450
1441
 
1451
- sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
1452
-
1453
1442
  return WSP_GGML_STATUS_SUCCESS;
1454
1443
  }
1455
1444
 
@@ -1550,10 +1539,10 @@ void wsp_ggml_backend_sched_reset(wsp_ggml_backend_sched_t sched) {
1550
1539
  bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph) {
1551
1540
  WSP_GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
1552
1541
 
1553
- wsp_ggml_backend_sched_split_graph(sched, measure_graph);
1554
-
1555
1542
  wsp_ggml_backend_sched_synchronize(sched);
1556
1543
 
1544
+ wsp_ggml_backend_sched_split_graph(sched, measure_graph);
1545
+
1557
1546
  if (!wsp_ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
1558
1547
  return false;
1559
1548
  }
@@ -1565,6 +1554,10 @@ bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_g
1565
1554
 
1566
1555
  bool wsp_ggml_backend_sched_alloc_graph(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph) {
1567
1556
  WSP_GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
1557
+ WSP_GGML_ASSERT(!sched->is_alloc);
1558
+
1559
+ sched->cur_copy = sched->next_copy;
1560
+ sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
1568
1561
 
1569
1562
  wsp_ggml_backend_sched_split_graph(sched, graph);
1570
1563
 
@@ -1605,7 +1598,7 @@ void wsp_ggml_backend_sched_synchronize(wsp_ggml_backend_sched_t sched) {
1605
1598
  // if the graph is not already allocated, always use copy 0 after a synchronization
1606
1599
  // this ensures that during generation the same copy is used every time,
1607
1600
  // which avoids changes in the graph that could cause CUDA or other graphs to be disabled
1608
- sched->cur_copy = 0;
1601
+ sched->next_copy = 0;
1609
1602
  }
1610
1603
  }
1611
1604
 
package/cpp/ggml-common.h CHANGED
@@ -99,6 +99,9 @@ typedef sycl::half2 wsp_ggml_half2;
99
99
  #define QI4_1 (QK4_1 / (4 * QR4_1))
100
100
  #define QR4_1 2
101
101
 
102
+ #define QI_MXFP4 (QK_MXFP4 / (4 * QR_MXFP4))
103
+ #define QR_MXFP4 2
104
+
102
105
  #define QI5_0 (QK5_0 / (4 * QR5_0))
103
106
  #define QR5_0 2
104
107
 
@@ -184,6 +187,13 @@ typedef struct {
184
187
  } block_q4_1;
185
188
  static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
186
189
 
190
+ #define QK_MXFP4 32
191
+ typedef struct {
192
+ uint8_t e; // E8M0
193
+ uint8_t qs[QK_MXFP4/2];
194
+ } block_mxfp4;
195
+ static_assert(sizeof(block_mxfp4) == sizeof(uint8_t) + QK_MXFP4/2, "wrong mxfp4 block size/padding");
196
+
187
197
  #define QK5_0 32
188
198
  typedef struct {
189
199
  wsp_ggml_half d; // delta
@@ -1074,10 +1084,17 @@ WSP_GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
1074
1084
  0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
1075
1085
  WSP_GGML_TABLE_END()
1076
1086
 
1087
+ // TODO: fix name to kvalues_iq4_nl
1077
1088
  WSP_GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
1078
1089
  -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
1079
1090
  WSP_GGML_TABLE_END()
1080
1091
 
1092
+ // e2m1 values (doubled)
1093
+ // ref: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
1094
+ WSP_GGML_TABLE_BEGIN(int8_t, kvalues_mxfp4, 16)
1095
+ 0, 1, 2, 3, 4, 6, 8, 12, 0, -1, -2, -3, -4, -6, -8, -12,
1096
+ WSP_GGML_TABLE_END()
1097
+
1081
1098
  #define NGRID_IQ1S 2048
1082
1099
  #define IQ1S_DELTA 0.125f
1083
1100
  #define IQ1M_DELTA 0.125f