whisper.rn 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/android/src/main/java/com/rnwhisper/WhisperContext.java +5 -0
  2. package/android/src/main/jni.cpp +13 -0
  3. package/cpp/ggml-alloc.c +78 -26
  4. package/cpp/ggml-alloc.h +9 -0
  5. package/cpp/ggml-backend-impl.h +1 -1
  6. package/cpp/ggml-backend-reg.cpp +19 -3
  7. package/cpp/ggml-backend.cpp +72 -20
  8. package/cpp/ggml-backend.h +2 -1
  9. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  10. package/cpp/ggml-cpu/arch/arm/repack.cpp +1004 -0
  11. package/cpp/ggml-cpu/arch/x86/repack.cpp +6 -6
  12. package/cpp/ggml-cpu/arch-fallback.h +50 -2
  13. package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
  14. package/cpp/ggml-cpu/ggml-cpu.c +139 -58
  15. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  16. package/cpp/ggml-cpu/ops.cpp +170 -18
  17. package/cpp/ggml-cpu/ops.h +1 -0
  18. package/cpp/ggml-cpu/repack.cpp +531 -5
  19. package/cpp/ggml-cpu/repack.h +14 -0
  20. package/cpp/ggml-cpu/simd-mappings.h +16 -18
  21. package/cpp/ggml-cpu/vec.cpp +41 -1
  22. package/cpp/ggml-cpu/vec.h +241 -138
  23. package/cpp/ggml-cpu.h +1 -0
  24. package/cpp/ggml-impl.h +0 -4
  25. package/cpp/ggml-metal/ggml-metal-context.m +26 -16
  26. package/cpp/ggml-metal/ggml-metal-device.cpp +452 -371
  27. package/cpp/ggml-metal/ggml-metal-device.h +87 -65
  28. package/cpp/ggml-metal/ggml-metal-device.m +263 -104
  29. package/cpp/ggml-metal/ggml-metal-impl.h +58 -4
  30. package/cpp/ggml-metal/ggml-metal-ops.cpp +415 -98
  31. package/cpp/ggml-metal/ggml-metal-ops.h +4 -0
  32. package/cpp/ggml-metal/ggml-metal.cpp +6 -5
  33. package/cpp/ggml-metal/ggml-metal.metal +404 -34
  34. package/cpp/ggml.c +110 -31
  35. package/cpp/ggml.h +51 -12
  36. package/cpp/jsi/RNWhisperJSI.cpp +1 -0
  37. package/cpp/whisper.cpp +16 -3
  38. package/ios/CMakeLists.txt +21 -1
  39. package/ios/RNWhisperContext.mm +5 -0
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
  58. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
  67. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
  74. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  75. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  76. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
  77. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  78. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  79. package/lib/commonjs/jest-mock.js +2 -0
  80. package/lib/commonjs/jest-mock.js.map +1 -1
  81. package/lib/commonjs/version.json +1 -1
  82. package/lib/module/NativeRNWhisper.js.map +1 -1
  83. package/lib/module/jest-mock.js +2 -0
  84. package/lib/module/jest-mock.js.map +1 -1
  85. package/lib/module/version.json +1 -1
  86. package/lib/typescript/NativeRNWhisper.d.ts +1 -0
  87. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  88. package/package.json +1 -1
  89. package/src/NativeRNWhisper.ts +1 -0
  90. package/src/jest-mock.ts +2 -0
  91. package/src/version.json +1 -1
package/cpp/ggml.c CHANGED
@@ -61,13 +61,15 @@
61
61
 
62
62
  #define UNUSED WSP_GGML_UNUSED
63
63
 
64
+ // Needed for wsp_ggml_fp32_to_bf16_row()
65
+ #if defined(__AVX512BF16__)
64
66
  #if defined(_MSC_VER)
65
- #define m512bh(p) p
66
67
  #define m512i(p) p
67
68
  #else
68
- #define m512bh(p) (__m512bh)(p)
69
+ #include <immintrin.h>
69
70
  #define m512i(p) (__m512i)(p)
70
- #endif
71
+ #endif // defined(_MSC_VER)
72
+ #endif // defined(__AVX512BF16__)
71
73
 
72
74
  #if defined(__linux__) || \
73
75
  defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
@@ -132,6 +134,13 @@ static void wsp_ggml_print_backtrace_symbols(void) {
132
134
  int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
133
135
  backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
134
136
  }
137
+ #elif defined(__APPLE__)
138
+ #include <execinfo.h>
139
+ static void wsp_ggml_print_backtrace_symbols(void) {
140
+ void * trace[100];
141
+ int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
142
+ backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
143
+ }
135
144
  #else
136
145
  static void wsp_ggml_print_backtrace_symbols(void) {
137
146
  // platform not supported
@@ -143,6 +152,20 @@ void wsp_ggml_print_backtrace(void) {
143
152
  if (WSP_GGML_NO_BACKTRACE) {
144
153
  return;
145
154
  }
155
+ #if defined(__APPLE__)
156
+ // On macOS, fork+debugger attachment is problematic due to:
157
+ // 1. libdispatch "poisons" forked child processes
158
+ // 2. lldb has issues attaching to parent from forked child
159
+ // Use simple backtrace() instead to avoid Terminal.app crashes
160
+ const char * WSP_GGML_BACKTRACE_LLDB = getenv("WSP_GGML_BACKTRACE_LLDB");
161
+ if (!WSP_GGML_BACKTRACE_LLDB) {
162
+ fprintf(stderr, "WARNING: Using native backtrace. Set WSP_GGML_BACKTRACE_LLDB for more info.\n");
163
+ fprintf(stderr, "WARNING: WSP_GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
164
+ fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
165
+ wsp_ggml_print_backtrace_symbols();
166
+ return;
167
+ }
168
+ #endif
146
169
  #if defined(__linux__)
147
170
  FILE * f = fopen("/proc/self/status", "r");
148
171
  size_t size = 0;
@@ -998,6 +1021,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
998
1021
  "ARANGE",
999
1022
  "TIMESTEP_EMBEDDING",
1000
1023
  "ARGSORT",
1024
+ "TOP_K",
1001
1025
  "LEAKY_RELU",
1002
1026
  "TRI",
1003
1027
  "FILL",
@@ -1031,7 +1055,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
1031
1055
  "GLU",
1032
1056
  };
1033
1057
 
1034
- static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
1058
+ static_assert(WSP_GGML_OP_COUNT == 95, "WSP_GGML_OP_COUNT != 95");
1035
1059
 
1036
1060
  static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1037
1061
  "none",
@@ -1106,6 +1130,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1106
1130
  "arange(start, stop, step)",
1107
1131
  "timestep_embedding(timesteps, dim, max_period)",
1108
1132
  "argsort(x)",
1133
+ "top_k(x)",
1109
1134
  "leaky_relu(x)",
1110
1135
  "tri(x)",
1111
1136
  "fill(x, c)",
@@ -1139,7 +1164,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1139
1164
  "glu(x)",
1140
1165
  };
1141
1166
 
1142
- static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
1167
+ static_assert(WSP_GGML_OP_COUNT == 95, "WSP_GGML_OP_COUNT != 95");
1143
1168
 
1144
1169
  static_assert(WSP_GGML_OP_POOL_COUNT == 2, "WSP_GGML_OP_POOL_COUNT != 2");
1145
1170
 
@@ -4897,6 +4922,8 @@ static struct wsp_ggml_tensor * wsp_ggml_interpolate_impl(
4897
4922
  int64_t ne3,
4898
4923
  uint32_t mode) {
4899
4924
  WSP_GGML_ASSERT((mode & 0xFF) < WSP_GGML_SCALE_MODE_COUNT);
4925
+ // TODO: implement antialias for modes other than bilinear
4926
+ WSP_GGML_ASSERT(!(mode & WSP_GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == WSP_GGML_SCALE_MODE_BILINEAR);
4900
4927
 
4901
4928
  struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4902
4929
 
@@ -4951,6 +4978,18 @@ struct wsp_ggml_tensor * wsp_ggml_pad(
4951
4978
  return wsp_ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4952
4979
  }
4953
4980
 
4981
+ // wsp_ggml_pad_circular
4982
+
4983
+ struct wsp_ggml_tensor * wsp_ggml_pad_circular(
4984
+ struct wsp_ggml_context * ctx,
4985
+ struct wsp_ggml_tensor * a,
4986
+ int p0,
4987
+ int p1,
4988
+ int p2,
4989
+ int p3) {
4990
+ return wsp_ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4991
+ }
4992
+
4954
4993
  struct wsp_ggml_tensor * wsp_ggml_pad_ext(
4955
4994
  struct wsp_ggml_context * ctx,
4956
4995
  struct wsp_ggml_tensor * a,
@@ -4977,6 +5016,7 @@ struct wsp_ggml_tensor * wsp_ggml_pad_ext(
4977
5016
  wsp_ggml_set_op_params_i32(result, 5, rp2);
4978
5017
  wsp_ggml_set_op_params_i32(result, 6, lp3);
4979
5018
  wsp_ggml_set_op_params_i32(result, 7, rp3);
5019
+ wsp_ggml_set_op_params_i32(result, 8, 0); // not circular by default
4980
5020
 
4981
5021
 
4982
5022
  result->op = WSP_GGML_OP_PAD;
@@ -4985,6 +5025,25 @@ struct wsp_ggml_tensor * wsp_ggml_pad_ext(
4985
5025
  return result;
4986
5026
  }
4987
5027
 
5028
+ // wsp_ggml_pad_ext_circular
5029
+
5030
+ struct wsp_ggml_tensor * wsp_ggml_pad_ext_circular(
5031
+ struct wsp_ggml_context * ctx,
5032
+ struct wsp_ggml_tensor * a,
5033
+ int lp0,
5034
+ int rp0,
5035
+ int lp1,
5036
+ int rp1,
5037
+ int lp2,
5038
+ int rp2,
5039
+ int lp3,
5040
+ int rp3
5041
+ ) {
5042
+ struct wsp_ggml_tensor * result = wsp_ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5043
+ wsp_ggml_set_op_params_i32(result, 8, 1); // circular
5044
+ return result;
5045
+ }
5046
+
4988
5047
  // wsp_ggml_pad_reflect_1d
4989
5048
 
4990
5049
  struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
@@ -5044,28 +5103,6 @@ struct wsp_ggml_tensor * wsp_ggml_roll(
5044
5103
  return result;
5045
5104
  }
5046
5105
 
5047
- // wsp_ggml_arange
5048
-
5049
- struct wsp_ggml_tensor * wsp_ggml_arange(
5050
- struct wsp_ggml_context * ctx,
5051
- float start,
5052
- float stop,
5053
- float step) {
5054
- WSP_GGML_ASSERT(stop > start);
5055
-
5056
- const int64_t steps = (int64_t) ceilf((stop - start) / step);
5057
-
5058
- struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, steps);
5059
-
5060
- wsp_ggml_set_op_params_f32(result, 0, start);
5061
- wsp_ggml_set_op_params_f32(result, 1, stop);
5062
- wsp_ggml_set_op_params_f32(result, 2, step);
5063
-
5064
- result->op = WSP_GGML_OP_ARANGE;
5065
-
5066
- return result;
5067
- }
5068
-
5069
5106
  // wsp_ggml_timestep_embedding
5070
5107
 
5071
5108
  struct wsp_ggml_tensor * wsp_ggml_timestep_embedding(
@@ -5147,6 +5184,7 @@ struct wsp_ggml_tensor * wsp_ggml_argsort(
5147
5184
  struct wsp_ggml_tensor * a,
5148
5185
  enum wsp_ggml_sort_order order) {
5149
5186
  WSP_GGML_ASSERT(a->ne[0] <= INT32_MAX);
5187
+
5150
5188
  struct wsp_ggml_tensor * result = wsp_ggml_new_tensor(ctx, WSP_GGML_TYPE_I32, WSP_GGML_MAX_DIMS, a->ne);
5151
5189
 
5152
5190
  wsp_ggml_set_op_params_i32(result, 0, (int32_t) order);
@@ -5157,9 +5195,9 @@ struct wsp_ggml_tensor * wsp_ggml_argsort(
5157
5195
  return result;
5158
5196
  }
5159
5197
 
5160
- // wsp_ggml_top_k
5198
+ // wsp_ggml_argsort_top_k
5161
5199
 
5162
- struct wsp_ggml_tensor * wsp_ggml_top_k(
5200
+ struct wsp_ggml_tensor * wsp_ggml_argsort_top_k(
5163
5201
  struct wsp_ggml_context * ctx,
5164
5202
  struct wsp_ggml_tensor * a,
5165
5203
  int k) {
@@ -5175,6 +5213,44 @@ struct wsp_ggml_tensor * wsp_ggml_top_k(
5175
5213
  return result;
5176
5214
  }
5177
5215
 
5216
+ // wsp_ggml_top_k
5217
+
5218
+ struct wsp_ggml_tensor * wsp_ggml_top_k(
5219
+ struct wsp_ggml_context * ctx,
5220
+ struct wsp_ggml_tensor * a,
5221
+ int k) {
5222
+ WSP_GGML_ASSERT(a->ne[0] >= k);
5223
+
5224
+ struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, WSP_GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5225
+
5226
+ result->op = WSP_GGML_OP_TOP_K;
5227
+ result->src[0] = a;
5228
+
5229
+ return result;
5230
+ }
5231
+
5232
+ // wsp_ggml_arange
5233
+
5234
+ struct wsp_ggml_tensor * wsp_ggml_arange(
5235
+ struct wsp_ggml_context * ctx,
5236
+ float start,
5237
+ float stop,
5238
+ float step) {
5239
+ WSP_GGML_ASSERT(stop > start);
5240
+
5241
+ const int64_t steps = (int64_t) ceilf((stop - start) / step);
5242
+
5243
+ struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, steps);
5244
+
5245
+ wsp_ggml_set_op_params_f32(result, 0, start);
5246
+ wsp_ggml_set_op_params_f32(result, 1, stop);
5247
+ wsp_ggml_set_op_params_f32(result, 2, step);
5248
+
5249
+ result->op = WSP_GGML_OP_ARANGE;
5250
+
5251
+ return result;
5252
+ }
5253
+
5178
5254
  // wsp_ggml_flash_attn_ext
5179
5255
 
5180
5256
  struct wsp_ggml_tensor * wsp_ggml_flash_attn_ext(
@@ -5194,8 +5270,6 @@ struct wsp_ggml_tensor * wsp_ggml_flash_attn_ext(
5194
5270
 
5195
5271
  if (mask) {
5196
5272
  WSP_GGML_ASSERT(wsp_ggml_is_contiguous(mask));
5197
- WSP_GGML_ASSERT(mask->ne[1] >= WSP_GGML_PAD(q->ne[1], WSP_GGML_KQ_MASK_PAD) &&
5198
- "the Flash-Attention kernel requires the mask to be padded to WSP_GGML_KQ_MASK_PAD and at least n_queries big");
5199
5273
  //WSP_GGML_ASSERT(wsp_ggml_can_repeat_rows(mask, qk));
5200
5274
 
5201
5275
  WSP_GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
@@ -7502,6 +7576,11 @@ size_t wsp_ggml_wsp_quantize_chunk(
7502
7576
 
7503
7577
  ////////////////////////////////////////////////////////////////////////////////
7504
7578
 
7579
+ void wsp_ggml_log_get(wsp_ggml_log_callback * log_callback, void ** user_data) {
7580
+ *log_callback = g_logger_state.log_callback;
7581
+ *user_data = g_logger_state.log_callback_user_data;
7582
+ }
7583
+
7505
7584
  void wsp_ggml_log_set(wsp_ggml_log_callback log_callback, void * user_data) {
7506
7585
  g_logger_state.log_callback = log_callback ? log_callback : wsp_ggml_log_callback_default;
7507
7586
  g_logger_state.log_callback_user_data = user_data;
package/cpp/ggml.h CHANGED
@@ -204,6 +204,10 @@
204
204
  # define WSP_GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
205
205
  #endif
206
206
 
207
+ #if defined(_WIN32) && !defined(_WIN32_WINNT)
208
+ # define _WIN32_WINNT 0x0A00
209
+ #endif
210
+
207
211
  #include <stdbool.h>
208
212
  #include <stddef.h>
209
213
  #include <stdint.h>
@@ -230,6 +234,11 @@
230
234
 
231
235
  #if UINTPTR_MAX == 0xFFFFFFFF
232
236
  #define WSP_GGML_MEM_ALIGN 4
237
+ #elif defined(__EMSCRIPTEN__)
238
+ // emscripten uses max_align_t == 8, so we need WSP_GGML_MEM_ALIGN == 8 for 64-bit wasm.
239
+ // (for 32-bit wasm, the first conditional is true and WSP_GGML_MEM_ALIGN stays 4.)
240
+ // ref: https://github.com/ggml-org/llama.cpp/pull/18628
241
+ #define WSP_GGML_MEM_ALIGN 8
233
242
  #else
234
243
  #define WSP_GGML_MEM_ALIGN 16
235
244
  #endif
@@ -530,6 +539,7 @@ extern "C" {
530
539
  WSP_GGML_OP_ARANGE,
531
540
  WSP_GGML_OP_TIMESTEP_EMBEDDING,
532
541
  WSP_GGML_OP_ARGSORT,
542
+ WSP_GGML_OP_TOP_K,
533
543
  WSP_GGML_OP_LEAKY_RELU,
534
544
  WSP_GGML_OP_TRI,
535
545
  WSP_GGML_OP_FILL,
@@ -2147,7 +2157,8 @@ extern "C" {
2147
2157
  };
2148
2158
 
2149
2159
  enum wsp_ggml_scale_flag {
2150
- WSP_GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
2160
+ WSP_GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8),
2161
+ WSP_GGML_SCALE_FLAG_ANTIALIAS = (1 << 9),
2151
2162
  };
2152
2163
 
2153
2164
  // interpolate
@@ -2190,6 +2201,15 @@ extern "C" {
2190
2201
  int p2,
2191
2202
  int p3);
2192
2203
 
2204
+ // pad each dimension with values on the other side of the torus (looping around)
2205
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_circular(
2206
+ struct wsp_ggml_context * ctx,
2207
+ struct wsp_ggml_tensor * a,
2208
+ int p0,
2209
+ int p1,
2210
+ int p2,
2211
+ int p3);
2212
+
2193
2213
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_ext(
2194
2214
  struct wsp_ggml_context * ctx,
2195
2215
  struct wsp_ggml_tensor * a,
@@ -2203,6 +2223,19 @@ extern "C" {
2203
2223
  int rp3
2204
2224
  );
2205
2225
 
2226
+ // pad each dimension with values on the other side of the torus (looping around)
2227
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_ext_circular(
2228
+ struct wsp_ggml_context * ctx,
2229
+ struct wsp_ggml_tensor * a,
2230
+ int lp0,
2231
+ int rp0,
2232
+ int lp1,
2233
+ int rp1,
2234
+ int lp2,
2235
+ int rp2,
2236
+ int lp3,
2237
+ int rp3);
2238
+
2206
2239
  // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
2207
2240
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
2208
2241
  struct wsp_ggml_context * ctx,
@@ -2258,25 +2291,30 @@ extern "C" {
2258
2291
  struct wsp_ggml_tensor * a,
2259
2292
  enum wsp_ggml_sort_order order);
2260
2293
 
2261
- WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_arange(
2294
+ // similar to wsp_ggml_top_k but implemented as `argsort` + `view`
2295
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_argsort_top_k(
2262
2296
  struct wsp_ggml_context * ctx,
2263
- float start,
2264
- float stop,
2265
- float step);
2297
+ struct wsp_ggml_tensor * a,
2298
+ int k);
2266
2299
 
2267
2300
  // top k elements per row
2301
+ // note: the resulting top k indices are in no particular order
2268
2302
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_top_k(
2269
2303
  struct wsp_ggml_context * ctx,
2270
2304
  struct wsp_ggml_tensor * a,
2271
2305
  int k);
2272
2306
 
2273
- #define WSP_GGML_KQ_MASK_PAD 64
2307
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_arange(
2308
+ struct wsp_ggml_context * ctx,
2309
+ float start,
2310
+ float stop,
2311
+ float step);
2274
2312
 
2275
- // q: [n_embd_k, n_batch, n_head, ne3 ]
2276
- // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2277
- // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2278
- // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = WSP_GGML_PAD(n_batch, WSP_GGML_KQ_MASK_PAD) !!
2279
- // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
2313
+ // q: [n_embd_k, n_batch, n_head, ne3 ]
2314
+ // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2315
+ // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2316
+ // mask: [n_kv, n_batch, ne32, ne33]
2317
+ // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
2280
2318
  //
2281
2319
  // broadcast:
2282
2320
  // n_head % n_head_kv == 0
@@ -2582,7 +2620,8 @@ extern "C" {
2582
2620
 
2583
2621
  // Set callback for all future logging events.
2584
2622
  // If this is not called, or NULL is supplied, everything is output on stderr.
2585
- WSP_GGML_API void wsp_ggml_log_set(wsp_ggml_log_callback log_callback, void * user_data);
2623
+ WSP_GGML_API void wsp_ggml_log_get(wsp_ggml_log_callback * log_callback, void ** user_data);
2624
+ WSP_GGML_API void wsp_ggml_log_set(wsp_ggml_log_callback log_callback, void * user_data);
2586
2625
 
2587
2626
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_zero(struct wsp_ggml_tensor * tensor);
2588
2627
 
@@ -574,6 +574,7 @@ void installJSIBindings(
574
574
  if (code == 0) {
575
575
  auto resultObj = Object(runtime);
576
576
  resultObj.setProperty(runtime, "code", Value(code));
577
+ resultObj.setProperty(runtime, "language", String::createFromUtf8(runtime, whisper_lang_str(whisper_full_lang_id(context))));
577
578
  resultObj.setProperty(runtime, "result", String::createFromUtf8(runtime, createFullTextFromSegments(context, 0)));
578
579
  resultObj.setProperty(runtime, "segments", createSegmentsArray(runtime, context, 0));
579
580
  resolvePtr->call(runtime, resultObj);
package/cpp/whisper.cpp CHANGED
@@ -2505,7 +2505,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_decoder(
2505
2505
 
2506
2506
  const float KQscale = pow(float(n_state_head), -0.25);
2507
2507
 
2508
- struct wsp_ggml_tensor * KQ_mask = wsp_ggml_new_tensor_3d(ctx0, WSP_GGML_TYPE_F32, n_kv, WSP_GGML_PAD(n_tokens, WSP_GGML_KQ_MASK_PAD), 1);
2508
+ struct wsp_ggml_tensor * KQ_mask = wsp_ggml_new_tensor_3d(ctx0, WSP_GGML_TYPE_F32, n_kv, n_tokens, 1);
2509
2509
  wsp_ggml_set_name(KQ_mask, "KQ_mask");
2510
2510
  wsp_ggml_set_input(KQ_mask);
2511
2511
 
@@ -2929,7 +2929,7 @@ static bool whisper_decode_internal(
2929
2929
  }
2930
2930
  }
2931
2931
 
2932
- for (int i = n_tokens; i < WSP_GGML_PAD(n_tokens, WSP_GGML_KQ_MASK_PAD); ++i) {
2932
+ for (int i = n_tokens; i < n_tokens; ++i) {
2933
2933
  for (int j = 0; j < n_kv; ++j) {
2934
2934
  data[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY;
2935
2935
  }
@@ -6030,6 +6030,19 @@ static inline bool should_split_on_word(const char * txt, bool split_on_word) {
6030
6030
  return txt[0] == ' ';
6031
6031
  }
6032
6032
 
6033
+ // Count UTF-8 characters (not bytes) in a string
6034
+ static int utf8_len(const char * str) {
6035
+ int count = 0;
6036
+ while (*str) {
6037
+ // Skip continuation bytes (10xxxxxx)
6038
+ if ((*str & 0xC0) != 0x80) {
6039
+ count++;
6040
+ }
6041
+ str++;
6042
+ }
6043
+ return count;
6044
+ }
6045
+
6033
6046
  static void whisper_exp_compute_token_level_timestamps_dtw(
6034
6047
  struct whisper_context * ctx,
6035
6048
  struct whisper_state * state,
@@ -6058,7 +6071,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
6058
6071
  }
6059
6072
 
6060
6073
  const auto txt = whisper_token_to_str(&ctx, token.id);
6061
- const int cur = strlen(txt);
6074
+ const int cur = utf8_len(txt); // Use UTF-8 character count instead of byte count
6062
6075
 
6063
6076
  if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
6064
6077
  state.result_all.back().text = std::move(text);
@@ -16,6 +16,8 @@ add_definitions(
16
16
  -DWSP_GGML_USE_ACCELERATE
17
17
  -DWSP_GGML_USE_METAL
18
18
  -DWSP_GGML_METAL_USE_BF16
19
+ -DWHISPER_USE_COREML
20
+ -DWHISPER_COREML_ALLOW_FALLBACK
19
21
  )
20
22
 
21
23
  if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
@@ -31,6 +33,14 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
31
33
  )
32
34
  endif ()
33
35
 
36
+ # CoreML source files
37
+ set(SOURCE_FILES_COREML
38
+ ${SOURCE_DIR}/coreml/whisper-encoder.mm
39
+ ${SOURCE_DIR}/coreml/whisper-encoder-impl.m
40
+ ${SOURCE_DIR}/coreml/whisper-decoder-impl.m
41
+ ${SOURCE_DIR}/coreml/whisper-compat.m
42
+ )
43
+
34
44
  # Define public headers
35
45
  set(PUBLIC_HEADERS
36
46
  ${SOURCE_DIR}/rn-whisper.h
@@ -69,6 +79,7 @@ add_library(rnwhisper SHARED
69
79
  ${SOURCE_DIR}/rn-whisper.cpp
70
80
  ${SOURCE_DIR}/rn-audioutils.cpp
71
81
  ${SOURCE_FILES_ARCH}
82
+ ${SOURCE_FILES_COREML}
72
83
  )
73
84
 
74
85
  # Setup include directories
@@ -76,6 +87,7 @@ target_include_directories(rnwhisper
76
87
  PUBLIC
77
88
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
78
89
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
90
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/coreml>
79
91
  $<INSTALL_INTERFACE:include>
80
92
  )
81
93
 
@@ -85,6 +97,7 @@ target_link_libraries(rnwhisper PRIVATE
85
97
  "-framework Foundation"
86
98
  "-framework Metal"
87
99
  "-framework MetalKit"
100
+ "-framework CoreML"
88
101
  )
89
102
 
90
103
  # Set properties for framework
@@ -96,5 +109,12 @@ set_target_properties(rnwhisper PROPERTIES
96
109
  FRAMEWORK_VERSION 1.0.0
97
110
  VERSION 1.0.0
98
111
  PUBLIC_HEADER "${PUBLIC_HEADERS}"
99
- XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC NO
112
+ XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
113
+ )
114
+
115
+ # Disable ARC for Metal Objective-C files (they don't support ARC)
116
+ set_source_files_properties(
117
+ ${SOURCE_DIR}/ggml-metal/ggml-metal-context.m
118
+ ${SOURCE_DIR}/ggml-metal/ggml-metal-device.m
119
+ PROPERTIES COMPILE_FLAGS "-fno-objc-arc"
100
120
  )
@@ -612,6 +612,11 @@ struct rnwhisper_segments_callback_data {
612
612
  NSMutableDictionary *result = [[NSMutableDictionary alloc] init];
613
613
  result[@"result"] = text;
614
614
  result[@"segments"] = segments;
615
+ int lang_id = whisper_full_lang_id(self->ctx);
616
+ const char *lang_str = whisper_lang_str(lang_id);
617
+ if (lang_str != nullptr) {
618
+ result[@"language"] = [NSString stringWithUTF8String:lang_str];
619
+ }
615
620
  return result;
616
621
  }
617
622
 
@@ -53,7 +53,14 @@ WSP_GGML_API void wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
53
53
  // call with a worst-case graph to avoid buffer reallocations
54
54
  // not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
55
55
  // returns false if the buffer allocation failed
56
+ // wsp_ggml_gallocr_resrve_n_size writes the buffer sizes per galloc buffer that would be allocated by wsp_ggml_gallocr_reserve_n to sizes
56
57
  WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
58
+ WSP_GGML_API void wsp_ggml_gallocr_reserve_n_size(
59
+ wsp_ggml_gallocr_t galloc,
60
+ struct wsp_ggml_cgraph * graph,
61
+ const int * node_buffer_ids,
62
+ const int * leaf_buffer_ids,
63
+ size_t * sizes);
57
64
  WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
58
65
  wsp_ggml_gallocr_t galloc,
59
66
  struct wsp_ggml_cgraph * graph,
@@ -68,6 +75,8 @@ WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc,
68
75
 
69
76
  // Utils
70
77
  // Create a buffer and allocate all the tensors in a wsp_ggml_context
78
+ // wsp_ggml_backend_alloc_ctx_tensors_from_buft_size returns the size of the buffer that would be allocated by wsp_ggml_backend_alloc_ctx_tensors_from_buft
79
+ WSP_GGML_API size_t wsp_ggml_backend_alloc_ctx_tensors_from_buft_size(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
71
80
  WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
72
81
  WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);
73
82
 
@@ -144,7 +144,7 @@ extern "C" {
144
144
  // device description: short informative description of the device, could be the model name
145
145
  const char * (*get_description)(wsp_ggml_backend_dev_t dev);
146
146
 
147
- // device memory in bytes
147
+ // device memory in bytes: 0 bytes to indicate no memory to report
148
148
  void (*get_memory)(wsp_ggml_backend_dev_t dev, size_t * free, size_t * total);
149
149
 
150
150
  // device type
@@ -307,6 +307,7 @@ extern "C" {
307
307
  WSP_GGML_API void wsp_ggml_backend_sched_free(wsp_ggml_backend_sched_t sched);
308
308
 
309
309
  // Initialize backend buffers from a measure graph
310
+ WSP_GGML_API void wsp_ggml_backend_sched_reserve_size(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph, size_t * sizes);
310
311
  WSP_GGML_API bool wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph); // returns success
311
312
 
312
313
  WSP_GGML_API int wsp_ggml_backend_sched_get_n_backends(wsp_ggml_backend_sched_t sched);
@@ -357,7 +358,7 @@ extern "C" {
357
358
  typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
358
359
 
359
360
  // Compare the output of two backends
360
- WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
361
+ WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor const * const * test_nodes, size_t num_test_nodes);
361
362
 
362
363
  // Tensor initialization
363
364
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
@@ -99,6 +99,7 @@ extern "C" {
99
99
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_sme (void);
100
100
  // other
101
101
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_riscv_v (void);
102
+ WSP_GGML_BACKEND_API int wsp_ggml_cpu_get_rvv_vlen (void); // risc-v vector length in bytes
102
103
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vsx (void);
103
104
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vxe (void);
104
105
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_wasm_simd (void);
@@ -24,10 +24,6 @@
24
24
  #include <arm_neon.h>
25
25
  #endif
26
26
 
27
- #if defined(__F16C__)
28
- #include <immintrin.h>
29
- #endif
30
-
31
27
  #ifdef __cplusplus
32
28
  extern "C" {
33
29
  #endif