whisper.rn 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/gradle.properties +1 -1
  3. package/android/src/main/jni.cpp +12 -3
  4. package/cpp/ggml-alloc.c +292 -130
  5. package/cpp/ggml-backend-impl.h +4 -4
  6. package/cpp/ggml-backend-reg.cpp +13 -5
  7. package/cpp/ggml-backend.cpp +207 -17
  8. package/cpp/ggml-backend.h +19 -1
  9. package/cpp/ggml-cpu/amx/amx.cpp +5 -2
  10. package/cpp/ggml-cpu/arch/x86/repack.cpp +2 -2
  11. package/cpp/ggml-cpu/arch-fallback.h +0 -4
  12. package/cpp/ggml-cpu/common.h +14 -0
  13. package/cpp/ggml-cpu/ggml-cpu-impl.h +14 -7
  14. package/cpp/ggml-cpu/ggml-cpu.c +65 -44
  15. package/cpp/ggml-cpu/ggml-cpu.cpp +14 -4
  16. package/cpp/ggml-cpu/ops.cpp +542 -775
  17. package/cpp/ggml-cpu/ops.h +2 -0
  18. package/cpp/ggml-cpu/simd-mappings.h +88 -59
  19. package/cpp/ggml-cpu/unary-ops.cpp +135 -0
  20. package/cpp/ggml-cpu/unary-ops.h +5 -0
  21. package/cpp/ggml-cpu/vec.cpp +227 -20
  22. package/cpp/ggml-cpu/vec.h +407 -56
  23. package/cpp/ggml-cpu.h +1 -1
  24. package/cpp/ggml-impl.h +94 -12
  25. package/cpp/ggml-metal/ggml-metal-common.cpp +446 -0
  26. package/cpp/ggml-metal/ggml-metal-common.h +52 -0
  27. package/cpp/ggml-metal/ggml-metal-context.h +33 -0
  28. package/cpp/ggml-metal/ggml-metal-context.m +600 -0
  29. package/cpp/ggml-metal/ggml-metal-device.cpp +1565 -0
  30. package/cpp/ggml-metal/ggml-metal-device.h +244 -0
  31. package/cpp/ggml-metal/ggml-metal-device.m +1325 -0
  32. package/cpp/ggml-metal/ggml-metal-impl.h +802 -0
  33. package/cpp/ggml-metal/ggml-metal-ops.cpp +3583 -0
  34. package/cpp/ggml-metal/ggml-metal-ops.h +88 -0
  35. package/cpp/ggml-metal/ggml-metal.cpp +718 -0
  36. package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
  37. package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
  38. package/cpp/ggml-metal-impl.h +40 -40
  39. package/cpp/ggml-metal.h +1 -6
  40. package/cpp/ggml-quants.c +1 -0
  41. package/cpp/ggml.c +341 -15
  42. package/cpp/ggml.h +150 -5
  43. package/cpp/jsi/RNWhisperJSI.cpp +9 -2
  44. package/cpp/jsi/ThreadPool.h +3 -3
  45. package/cpp/rn-whisper.h +1 -0
  46. package/cpp/whisper.cpp +89 -72
  47. package/cpp/whisper.h +1 -0
  48. package/ios/CMakeLists.txt +6 -1
  49. package/ios/RNWhisperContext.mm +3 -1
  50. package/ios/RNWhisperVadContext.mm +14 -13
  51. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  52. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  53. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  54. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  55. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  56. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  57. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +150 -5
  58. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  59. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
  60. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  61. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  62. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  63. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  64. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  65. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  66. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  67. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  68. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  69. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +150 -5
  70. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  71. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
  72. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  73. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  74. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  75. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  76. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  77. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  78. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  79. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  80. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  81. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  82. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +150 -5
  83. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  84. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
  85. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  86. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  87. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  88. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  89. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  90. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  91. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  92. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  93. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  94. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +150 -5
  95. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  96. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
  97. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  98. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  99. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  100. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  101. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  102. package/lib/commonjs/version.json +1 -1
  103. package/lib/module/NativeRNWhisper.js.map +1 -1
  104. package/lib/module/version.json +1 -1
  105. package/lib/typescript/NativeRNWhisper.d.ts +2 -0
  106. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  107. package/package.json +1 -1
  108. package/src/NativeRNWhisper.ts +2 -0
  109. package/src/version.json +1 -1
  110. package/whisper-rn.podspec +8 -9
  111. package/cpp/ggml-metal.m +0 -6779
  112. package/cpp/ggml-whisper-sim.metallib +0 -0
  113. package/cpp/ggml-whisper.metallib +0 -0
package/cpp/ggml.h CHANGED
@@ -237,6 +237,8 @@
237
237
  #define WSP_GGML_EXIT_SUCCESS 0
238
238
  #define WSP_GGML_EXIT_ABORTED 1
239
239
 
240
+ // TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
241
+ #define WSP_GGML_ROPE_TYPE_NORMAL 0
240
242
  #define WSP_GGML_ROPE_TYPE_NEOX 2
241
243
  #define WSP_GGML_ROPE_TYPE_MROPE 8
242
244
  #define WSP_GGML_ROPE_TYPE_VISION 24
@@ -244,6 +246,13 @@
244
246
  #define WSP_GGML_MROPE_SECTIONS 4
245
247
 
246
248
  #define WSP_GGML_UNUSED(x) (void)(x)
249
+ #ifdef __CUDACC__
250
+ template<typename... Args>
251
+ __host__ __device__ constexpr inline void wsp_ggml_unused_vars_impl(Args&&...) noexcept {}
252
+ #define WSP_GGML_UNUSED_VARS(...) wsp_ggml_unused_vars_impl(__VA_ARGS__)
253
+ #else
254
+ #define WSP_GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
255
+ #endif // __CUDACC__
247
256
 
248
257
  #define WSP_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
249
258
 
@@ -277,19 +286,19 @@
277
286
  // WSP_GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
278
287
  //
279
288
  #define WSP_GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
280
- const type prefix##0 = (pointer)->array[0]; \
289
+ const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
281
290
  WSP_GGML_UNUSED(prefix##0);
282
291
  #define WSP_GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
283
292
  WSP_GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
284
- const type prefix##1 = (pointer)->array[1]; \
293
+ const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
285
294
  WSP_GGML_UNUSED(prefix##1);
286
295
  #define WSP_GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
287
296
  WSP_GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
288
- const type prefix##2 = (pointer)->array[2]; \
297
+ const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
289
298
  WSP_GGML_UNUSED(prefix##2);
290
299
  #define WSP_GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
291
300
  WSP_GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
292
- const type prefix##3 = (pointer)->array[3]; \
301
+ const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
293
302
  WSP_GGML_UNUSED(prefix##3);
294
303
 
295
304
  #define WSP_GGML_TENSOR_UNARY_OP_LOCALS \
@@ -504,7 +513,9 @@ extern "C" {
504
513
  WSP_GGML_OP_CONV_TRANSPOSE_1D,
505
514
  WSP_GGML_OP_IM2COL,
506
515
  WSP_GGML_OP_IM2COL_BACK,
516
+ WSP_GGML_OP_IM2COL_3D,
507
517
  WSP_GGML_OP_CONV_2D,
518
+ WSP_GGML_OP_CONV_3D,
508
519
  WSP_GGML_OP_CONV_2D_DW,
509
520
  WSP_GGML_OP_CONV_TRANSPOSE_2D,
510
521
  WSP_GGML_OP_POOL_1D,
@@ -565,6 +576,11 @@ extern "C" {
565
576
  WSP_GGML_UNARY_OP_HARDSIGMOID,
566
577
  WSP_GGML_UNARY_OP_EXP,
567
578
  WSP_GGML_UNARY_OP_GELU_ERF,
579
+ WSP_GGML_UNARY_OP_XIELU,
580
+ WSP_GGML_UNARY_OP_FLOOR,
581
+ WSP_GGML_UNARY_OP_CEIL,
582
+ WSP_GGML_UNARY_OP_ROUND,
583
+ WSP_GGML_UNARY_OP_TRUNC,
568
584
 
569
585
  WSP_GGML_UNARY_OP_COUNT,
570
586
  };
@@ -1139,6 +1155,58 @@ extern "C" {
1139
1155
  struct wsp_ggml_context * ctx,
1140
1156
  struct wsp_ggml_tensor * a);
1141
1157
 
1158
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
1159
+ struct wsp_ggml_context * ctx,
1160
+ struct wsp_ggml_tensor * a);
1161
+
1162
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
1163
+ struct wsp_ggml_context * ctx,
1164
+ struct wsp_ggml_tensor * a);
1165
+
1166
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
1167
+ struct wsp_ggml_context * ctx,
1168
+ struct wsp_ggml_tensor * a);
1169
+
1170
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
1171
+ struct wsp_ggml_context * ctx,
1172
+ struct wsp_ggml_tensor * a);
1173
+
1174
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
1175
+ struct wsp_ggml_context * ctx,
1176
+ struct wsp_ggml_tensor * a);
1177
+
1178
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
1179
+ struct wsp_ggml_context * ctx,
1180
+ struct wsp_ggml_tensor * a);
1181
+
1182
+ /**
1183
+ * Truncates the fractional part of each element in the tensor (towards zero).
1184
+ * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
1185
+ * Similar to std::trunc in C/C++.
1186
+ */
1187
+
1188
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
1189
+ struct wsp_ggml_context * ctx,
1190
+ struct wsp_ggml_tensor * a);
1191
+
1192
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
1193
+ struct wsp_ggml_context * ctx,
1194
+ struct wsp_ggml_tensor * a);
1195
+
1196
+
1197
+
1198
+ // xIELU activation function
1199
+ // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
1200
+ // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
1201
+ // that constrain the positive and negative source alpha values respectively
1202
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
1203
+ struct wsp_ggml_context * ctx,
1204
+ struct wsp_ggml_tensor * a,
1205
+ float alpha_n,
1206
+ float alpha_p,
1207
+ float beta,
1208
+ float eps);
1209
+
1142
1210
  // gated linear unit ops
1143
1211
  // A: n columns, r rows,
1144
1212
  // result is n / 2 columns, r rows,
@@ -1395,6 +1463,7 @@ extern "C" {
1395
1463
  struct wsp_ggml_tensor * a,
1396
1464
  struct wsp_ggml_tensor * b);
1397
1465
 
1466
+ // note: casting from f32 to i32 will discard the fractional part
1398
1467
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cast(
1399
1468
  struct wsp_ggml_context * ctx,
1400
1469
  struct wsp_ggml_tensor * a,
@@ -1519,7 +1588,11 @@ extern "C" {
1519
1588
  struct wsp_ggml_context * ctx,
1520
1589
  struct wsp_ggml_tensor * a);
1521
1590
 
1522
- // supports 3D: a->ne[2] == b->ne[1]
1591
+ // supports 4D a:
1592
+ // a [n_embd, ne1, ne2, ne3]
1593
+ // b I32 [n_rows, ne2, ne3, 1]
1594
+ //
1595
+ // return [n_embd, n_rows, ne2, ne3]
1523
1596
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_get_rows(
1524
1597
  struct wsp_ggml_context * ctx,
1525
1598
  struct wsp_ggml_tensor * a, // data
@@ -1601,6 +1674,13 @@ extern "C" {
1601
1674
  float scale,
1602
1675
  float max_bias);
1603
1676
 
1677
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
1678
+ struct wsp_ggml_context * ctx,
1679
+ struct wsp_ggml_tensor * a,
1680
+ struct wsp_ggml_tensor * mask,
1681
+ float scale,
1682
+ float max_bias);
1683
+
1604
1684
  WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
1605
1685
  struct wsp_ggml_tensor * a,
1606
1686
  struct wsp_ggml_tensor * sinks);
@@ -1862,6 +1942,41 @@ extern "C" {
1862
1942
  int d0, // dilation dimension 0
1863
1943
  int d1); // dilation dimension 1
1864
1944
 
1945
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_im2col_3d(
1946
+ struct wsp_ggml_context * ctx,
1947
+ struct wsp_ggml_tensor * a,
1948
+ struct wsp_ggml_tensor * b,
1949
+ int64_t IC,
1950
+ int s0, // stride width
1951
+ int s1, // stride height
1952
+ int s2, // stride depth
1953
+ int p0, // padding width
1954
+ int p1, // padding height
1955
+ int p2, // padding depth
1956
+ int d0, // dilation width
1957
+ int d1, // dilation height
1958
+ int d2, // dilation depth
1959
+ enum wsp_ggml_type dst_type);
1960
+
1961
+ // a: [OC*IC, KD, KH, KW]
1962
+ // b: [N*IC, ID, IH, IW]
1963
+ // result: [N*OC, OD, OH, OW]
1964
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_3d(
1965
+ struct wsp_ggml_context * ctx,
1966
+ struct wsp_ggml_tensor * a,
1967
+ struct wsp_ggml_tensor * b,
1968
+ int64_t IC,
1969
+ int s0, // stride width
1970
+ int s1, // stride height
1971
+ int s2, // stride depth
1972
+ int p0, // padding width
1973
+ int p1, // padding height
1974
+ int p2, // padding depth
1975
+ int d0, // dilation width
1976
+ int d1, // dilation height
1977
+ int d2 // dilation depth
1978
+ );
1979
+
1865
1980
  // kernel size is a->ne[0] x a->ne[1]
1866
1981
  // stride is equal to kernel size
1867
1982
  // padding is zero
@@ -1933,6 +2048,23 @@ extern "C" {
1933
2048
  int d0, // dilation dimension 0
1934
2049
  int d1); // dilation dimension 1
1935
2050
 
2051
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_3d_direct(
2052
+ struct wsp_ggml_context * ctx,
2053
+ struct wsp_ggml_tensor * a, // kernel [KW, KH, KD, IC * OC]
2054
+ struct wsp_ggml_tensor * b, // input [W, H, D, C * N]
2055
+ int s0, // stride
2056
+ int s1,
2057
+ int s2,
2058
+ int p0, // padding
2059
+ int p1,
2060
+ int p2,
2061
+ int d0, // dilation
2062
+ int d1,
2063
+ int d2,
2064
+ int n_channels,
2065
+ int n_batch,
2066
+ int n_channels_out);
2067
+
1936
2068
  enum wsp_ggml_op_pool {
1937
2069
  WSP_GGML_OP_POOL_MAX,
1938
2070
  WSP_GGML_OP_POOL_AVG,
@@ -2023,6 +2155,19 @@ extern "C" {
2023
2155
  int p2,
2024
2156
  int p3);
2025
2157
 
2158
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_ext(
2159
+ struct wsp_ggml_context * ctx,
2160
+ struct wsp_ggml_tensor * a,
2161
+ int lp0,
2162
+ int rp0,
2163
+ int lp1,
2164
+ int rp1,
2165
+ int lp2,
2166
+ int rp2,
2167
+ int lp3,
2168
+ int rp3
2169
+ );
2170
+
2026
2171
  // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
2027
2172
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
2028
2173
  struct wsp_ggml_context * ctx,
@@ -17,6 +17,8 @@ using namespace facebook::jsi;
17
17
 
18
18
  namespace rnwhisper_jsi {
19
19
 
20
+ using namespace facebook::jsi;
21
+
20
22
  // Consolidated logging function
21
23
  enum class LogLevel { LOG_DEBUG, LOG_INFO, LOG_ERROR };
22
24
 
@@ -267,11 +269,13 @@ struct CallbackInfo {
267
269
  std::shared_ptr<Function> onProgressCallback;
268
270
  std::shared_ptr<Function> onNewSegmentsCallback;
269
271
  int jobId;
272
+ int nProcessors;
270
273
  };
271
274
 
272
275
  CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
273
276
  CallbackInfo info;
274
277
  info.jobId = rand(); // Default fallback jobId
278
+ info.nProcessors = 1; // Default to 1 processor
275
279
 
276
280
  try {
277
281
  auto propNames = optionsObj.getPropertyNames(runtime);
@@ -286,6 +290,8 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
286
290
  info.onNewSegmentsCallback = std::make_shared<Function>(propValue.getObject(runtime).getFunction(runtime));
287
291
  } else if (propName == "jobId" && propValue.isNumber()) {
288
292
  info.jobId = (int)propValue.getNumber();
293
+ } else if (propName == "nProcessors" && propValue.isNumber()) {
294
+ info.nProcessors = (int)propValue.getNumber();
289
295
  }
290
296
  }
291
297
  } catch (...) {
@@ -549,12 +555,13 @@ void installJSIBindings(
549
555
  code = -2;
550
556
  } else {
551
557
  try {
552
- code = whisper_full(context, job->params, audioResult.data.data(), audioResult.count);
558
+ job->n_processors = callbackInfo.nProcessors;
559
+ code = whisper_full_parallel(context, job->params, audioResult.data.data(), audioResult.count, job->n_processors);
553
560
  if (job->is_aborted()) {
554
561
  code = -999;
555
562
  }
556
563
  } catch (...) {
557
- logError("Exception during whisper_full transcription");
564
+ logError("Exception during whisper_full_parallel transcription");
558
565
  code = -3;
559
566
  }
560
567
  rnwhisper::job_remove(callbackInfo.jobId);
@@ -18,7 +18,7 @@ public:
18
18
  ThreadPool(size_t);
19
19
  template<class F, class... Args>
20
20
  auto enqueue(F&& f, Args&&... args)
21
- -> std::future<typename std::result_of<F(Args...)>::type>;
21
+ -> std::future<std::invoke_result_t<F, Args...>>;
22
22
  ~ThreadPool();
23
23
  private:
24
24
  // need to keep track of threads so we can join them
@@ -63,9 +63,9 @@ inline ThreadPool::ThreadPool(size_t threads)
63
63
  // add new work item to the pool
64
64
  template<class F, class... Args>
65
65
  auto ThreadPool::enqueue(F&& f, Args&&... args)
66
- -> std::future<typename std::result_of<F(Args...)>::type>
66
+ -> std::future<std::invoke_result_t<F, Args...>>
67
67
  {
68
- using return_type = typename std::result_of<F(Args...)>::type;
68
+ using return_type = std::invoke_result_t<F, Args...>;
69
69
 
70
70
  auto task = std::make_shared< std::packaged_task<return_type()> >(
71
71
  std::bind(std::forward<F>(f), std::forward<Args>(args)...)
package/cpp/rn-whisper.h CHANGED
@@ -24,6 +24,7 @@ struct job {
24
24
  int job_id;
25
25
  bool aborted = false;
26
26
  whisper_full_params params;
27
+ int n_processors = 1;
27
28
 
28
29
  ~job();
29
30
  bool is_aborted();
package/cpp/whisper.cpp CHANGED
@@ -21,14 +21,12 @@
21
21
  #define _USE_MATH_DEFINES
22
22
  #include <cmath>
23
23
  #include <climits>
24
- #include <codecvt>
25
24
  #include <cstdarg>
26
25
  #include <cstdio>
27
26
  #include <cstring>
28
27
  #include <fstream>
29
28
  #include <functional>
30
29
  #include <map>
31
- #include <mutex>
32
30
  #include <random>
33
31
  #include <regex>
34
32
  #include <set>
@@ -36,6 +34,10 @@
36
34
  #include <thread>
37
35
  #include <vector>
38
36
 
37
+ #ifdef _MSC_VER
38
+ #include <codecvt>
39
+ #endif
40
+
39
41
  #if defined(WHISPER_BIG_ENDIAN)
40
42
  template<typename T>
41
43
  static T byteswap(T value) {
@@ -138,6 +140,10 @@ static void whisper_log_callback_default(wsp_ggml_log_level level, const char *
138
140
  } while (0)
139
141
 
140
142
  #define WHISPER_MAX_DECODERS 8
143
+
144
+ // temperature below which we condition on past text history
145
+ static constexpr float WHISPER_HISTORY_CONDITIONING_TEMP_CUTOFF = 0.5f;
146
+
141
147
  #define WHISPER_MAX_NODES 4096
142
148
 
143
149
  static std::string format(const char * fmt, ...) {
@@ -252,45 +258,6 @@ static void whisper_set_i32_nd(struct wsp_ggml_tensor * t, int64_t i0, int64_t i
252
258
  *(int32_t *) data = v;
253
259
  }
254
260
 
255
- // faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
256
- // the idea is to represent the original matrix multiplication:
257
- //
258
- // Z = X @ Y
259
- //
260
- // with the sum of two matrix multiplications:
261
- //
262
- // Z = (X_0 @ Y_0) + (X_1 @ Y_1)
263
- //
264
- // here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
265
- // and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
266
- // general-purpose kernels
267
- //
268
- static struct wsp_ggml_tensor * wsp_ggml_mul_mat_pad(struct wsp_ggml_context * ctx, struct wsp_ggml_tensor * x, struct wsp_ggml_tensor * y, int pad = 32) {
269
- // use padding only if dimension 0 is at least 8 times larger than the padding
270
- // else we won't get much benefit from the optimization
271
- const int n_pad_req = 8;
272
-
273
- if (x->ne[0] % pad == 0 || x->ne[0] / pad < n_pad_req) {
274
- return wsp_ggml_mul_mat(ctx, x, y);
275
- }
276
-
277
- struct wsp_ggml_tensor * x_0 = wsp_ggml_view_3d(ctx, x, (x->ne[0]/pad)*pad, x->ne[1], x->ne[2], x->nb[1], x->nb[2], 0);
278
- struct wsp_ggml_tensor * x_1 = wsp_ggml_view_3d(ctx, x, x->ne[0]%pad, x->ne[1], x->ne[2], x->nb[1], x->nb[2], x_0->ne[0]*x_0->nb[0]);
279
-
280
- struct wsp_ggml_tensor * y_0 = wsp_ggml_view_3d(ctx, y, (y->ne[0]/pad)*pad, y->ne[1], y->ne[2], y->nb[1], y->nb[2], 0);
281
- struct wsp_ggml_tensor * y_1 = wsp_ggml_view_3d(ctx, y, y->ne[0]%pad, y->ne[1], y->ne[2], y->nb[1], y->nb[2], y_0->ne[0]*y_0->nb[0]);
282
-
283
- return wsp_ggml_add(ctx,
284
- wsp_ggml_mul_mat(ctx, x_0, y_0),
285
- wsp_ggml_mul_mat(ctx, x_1, y_1));
286
- }
287
-
288
- // TODO: check if other platforms can benefit from this optimization
289
- // TODO: CUDA is currently broken - seems wsp_ggml_mul_mat does not handle views correctly
290
- #if defined(WSP_GGML_USE_METAL)
291
- #define wsp_ggml_mul_mat wsp_ggml_mul_mat_pad
292
- #endif
293
-
294
261
  // available whisper models
295
262
  enum e_model {
296
263
  MODEL_UNKNOWN,
@@ -919,7 +886,10 @@ struct whisper_state {
919
886
  std::vector<float> logits;
920
887
 
921
888
  std::vector<whisper_segment> result_all;
922
- std::vector<whisper_token> prompt_past;
889
+
890
+ // prompt history split into static prefix (prompt_past0) and dynamic rolling context (prompt_past1)
891
+ std::vector<whisper_token> prompt_past0; // static carried initial prompt (if enabled)
892
+ std::vector<whisper_token> prompt_past1; // dynamic context from decoded output
923
893
 
924
894
  int lang_id = 0; // english by default
925
895
 
@@ -1326,7 +1296,7 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
1326
1296
  if (params.use_gpu) {
1327
1297
  for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
1328
1298
  wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
1329
- if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
1299
+ if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
1330
1300
  if (cnt == params.gpu_device) {
1331
1301
  dev = dev_cur;
1332
1302
  }
@@ -1395,7 +1365,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
1395
1365
  int cnt = 0;
1396
1366
  for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
1397
1367
  wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
1398
- if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
1368
+ if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
1399
1369
  if (cnt == params.gpu_device) {
1400
1370
  auto * buft = wsp_ggml_backend_dev_buffer_type(dev);
1401
1371
  if (buft) {
@@ -1433,6 +1403,7 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
1433
1403
  bool op_supported = true;
1434
1404
 
1435
1405
  if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
1406
+ wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
1436
1407
  (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
1437
1408
  // GPU and default CPU backend support all operators
1438
1409
  op_supported = true;
@@ -3635,7 +3606,7 @@ struct whisper_context_params whisper_context_default_params() {
3635
3606
  struct whisper_context_params result = {
3636
3607
  /*.use_gpu =*/ true,
3637
3608
  /*.use_coreml =*/ false,
3638
- /*.flash_attn =*/ false,
3609
+ /*.flash_attn =*/ true,
3639
3610
  /*.gpu_device =*/ 0,
3640
3611
 
3641
3612
  /*.dtw_token_timestamps =*/ false,
@@ -4489,6 +4460,7 @@ static bool weight_buft_supported(const whisper_vad_hparams & hparams, wsp_ggml_
4489
4460
  bool op_supported = true;
4490
4461
 
4491
4462
  if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
4463
+ wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
4492
4464
  (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
4493
4465
  // GPU and default CPU backend support all operators
4494
4466
  op_supported = true;
@@ -4719,6 +4691,7 @@ static bool whisper_vad_init_context(whisper_vad_context * vctx) {
4719
4691
  wsp_ggml_set_name(vctx->c_state, "c_state");
4720
4692
 
4721
4693
  vctx->buffer = wsp_ggml_backend_alloc_ctx_tensors(ctx, vctx->backends[0]);
4694
+ wsp_ggml_free(ctx);
4722
4695
  if (!vctx->buffer) {
4723
4696
  WHISPER_LOG_ERROR("%s: failed to allocate memory for the VAD state\n", __func__);
4724
4697
  return false;
@@ -5463,6 +5436,9 @@ struct whisper_vad_segments * whisper_vad_segments_from_samples(
5463
5436
 
5464
5437
  void whisper_vad_free(whisper_vad_context * ctx) {
5465
5438
  if (ctx) {
5439
+ if (ctx->buffer) {
5440
+ wsp_ggml_backend_buffer_free(ctx->buffer);
5441
+ }
5466
5442
  for (wsp_ggml_context * context : ctx->model.ctxs) {
5467
5443
  wsp_ggml_free(context);
5468
5444
  }
@@ -5477,6 +5453,9 @@ void whisper_vad_free(whisper_vad_context * ctx) {
5477
5453
  wsp_ggml_backend_free(backend);
5478
5454
  }
5479
5455
 
5456
+ delete[] ctx->model.hparams.encoder_in_channels;
5457
+ delete[] ctx->model.hparams.encoder_out_channels;
5458
+ delete[] ctx->model.hparams.kernel_sizes;
5480
5459
 
5481
5460
  delete ctx;
5482
5461
  }
@@ -5956,9 +5935,10 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
5956
5935
 
5957
5936
  /* suppress_regex =*/ nullptr,
5958
5937
 
5959
- /*.initial_prompt =*/ nullptr,
5960
- /*.prompt_tokens =*/ nullptr,
5961
- /*.prompt_n_tokens =*/ 0,
5938
+ /*.initial_prompt =*/ nullptr,
5939
+ /*.carry_initial_prompt =*/ false,
5940
+ /*.prompt_tokens =*/ nullptr,
5941
+ /*.prompt_n_tokens =*/ 0,
5962
5942
 
5963
5943
  /*.language =*/ "en",
5964
5944
  /*.detect_language =*/ false,
@@ -6654,6 +6634,10 @@ static bool whisper_vad(
6654
6634
 
6655
6635
  whisper_vad_segments * vad_segments = whisper_vad_segments_from_samples(vctx, vad_params, samples, n_samples);
6656
6636
 
6637
+ if (!vad_segments) {
6638
+ return false;
6639
+ }
6640
+
6657
6641
  if (vad_segments->data.size() > 0) {
6658
6642
  state->has_vad_segments = true;
6659
6643
  ctx->state->vad_segments.clear();
@@ -6696,7 +6680,6 @@ static bool whisper_vad(
6696
6680
  } catch (const std::bad_alloc & /* e */) {
6697
6681
  WHISPER_LOG_ERROR("%s: failed to allocate memory for filtered samples\n", __func__);
6698
6682
  whisper_vad_free_segments(vad_segments);
6699
- whisper_vad_free(vctx);
6700
6683
  return false;
6701
6684
  }
6702
6685
 
@@ -6802,6 +6785,7 @@ static bool whisper_vad(
6802
6785
  __func__, n_samples, filtered_n_samples, 100.0f * (1.0f - (float)filtered_n_samples / n_samples));
6803
6786
  }
6804
6787
 
6788
+ whisper_vad_free_segments(vad_segments);
6805
6789
  return true;
6806
6790
  }
6807
6791
 
@@ -6910,17 +6894,22 @@ int whisper_full_with_state(
6910
6894
  decoder.rng = std::mt19937(j);
6911
6895
  }
6912
6896
 
6913
- // the accumulated text context so far
6914
- auto & prompt_past = state->prompt_past;
6897
+ // the accumulated text context split into static (prompt_past0) and dynamic (prompt_past1)
6898
+ auto & prompt_past0 = state->prompt_past0;
6899
+ auto & prompt_past1 = state->prompt_past1;
6915
6900
  if (params.no_context) {
6916
- prompt_past.clear();
6901
+ prompt_past0.clear();
6902
+ prompt_past1.clear();
6917
6903
  }
6918
6904
 
6905
+ // calculate the maximum context budget for prompt history
6906
+ const int max_prompt_ctx = std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2);
6907
+
6919
6908
  // prepare prompt
6920
6909
  {
6921
6910
  std::vector<whisper_token> prompt_tokens;
6922
6911
 
6923
- // initial prompt
6912
+ // tokenize the initial prompt
6924
6913
  if (!params.prompt_tokens && params.initial_prompt) {
6925
6914
  prompt_tokens.resize(1024);
6926
6915
  int n_needed = whisper_tokenize(ctx, params.initial_prompt, prompt_tokens.data(), prompt_tokens.size());
@@ -6932,14 +6921,25 @@ int whisper_full_with_state(
6932
6921
  params.prompt_tokens = prompt_tokens.data();
6933
6922
  params.prompt_n_tokens = prompt_tokens.size();
6934
6923
  }
6935
-
6936
- // prepend the prompt tokens to the prompt_past
6937
6924
  if (params.prompt_tokens && params.prompt_n_tokens > 0) {
6938
- // parse tokens from the pointer
6939
- for (int i = 0; i < params.prompt_n_tokens; i++) {
6940
- prompt_past.push_back(params.prompt_tokens[i]);
6925
+ if (params.carry_initial_prompt) {
6926
+ if (prompt_past0.empty()) {
6927
+ const int max_tokens = std::max(1, max_prompt_ctx - 1);
6928
+
6929
+ if (params.prompt_n_tokens > max_tokens) {
6930
+ WHISPER_LOG_WARN("%s: initial prompt is too long (%d tokens), will use only the last %d tokens\n",
6931
+ __func__, params.prompt_n_tokens, max_tokens);
6932
+ }
6933
+
6934
+ const int n_tokens = std::min(params.prompt_n_tokens, max_tokens);
6935
+ prompt_past0.assign(params.prompt_tokens + (params.prompt_n_tokens - n_tokens), params.prompt_tokens + params.prompt_n_tokens);
6936
+ }
6937
+ } else {
6938
+ for (int i = 0; i < params.prompt_n_tokens; ++i) {
6939
+ prompt_past1.push_back(params.prompt_tokens[i]);
6940
+ }
6941
+ std::rotate(prompt_past1.begin(), prompt_past1.end() - params.prompt_n_tokens, prompt_past1.end());
6941
6942
  }
6942
- std::rotate(prompt_past.begin(), prompt_past.end() - params.prompt_n_tokens, prompt_past.end());
6943
6943
  }
6944
6944
  }
6945
6945
 
@@ -7025,7 +7025,8 @@ int whisper_full_with_state(
7025
7025
  // if there is a very short audio segment left to process, we remove any past prompt since it tends
7026
7026
  // to confuse the decoder and often make it repeat or hallucinate stuff
7027
7027
  if (seek > seek_start && seek + 500 >= seek_end) {
7028
- prompt_past.clear();
7028
+ prompt_past0.clear();
7029
+ prompt_past1.clear();
7029
7030
  }
7030
7031
 
7031
7032
  int best_decoder_id = 0;
@@ -7086,12 +7087,25 @@ int whisper_full_with_state(
7086
7087
  {
7087
7088
  prompt.clear();
7088
7089
 
7089
- // if we have already generated some text, use it as a prompt to condition the next generation
7090
- if (!prompt_past.empty() && t_cur < 0.5f && params.n_max_text_ctx > 0) {
7091
- int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
7090
+ if (params.n_max_text_ctx > 0 && t_cur < WHISPER_HISTORY_CONDITIONING_TEMP_CUTOFF) {
7091
+ const bool can_take0 = params.carry_initial_prompt && !prompt_past0.empty();
7092
+ const bool can_take1 = !prompt_past1.empty();
7092
7093
 
7093
- prompt = { whisper_token_prev(ctx) };
7094
- prompt.insert(prompt.begin() + 1, prompt_past.end() - n_take, prompt_past.end());
7094
+ if (max_prompt_ctx > 0 && (can_take0 || can_take1)) {
7095
+ // Always start with previous token marker to connect continuity
7096
+ prompt.push_back(whisper_token_prev(ctx));
7097
+
7098
+ // Take static tokens (initial prompt) first
7099
+ int n_take0 = 0;
7100
+ if (can_take0) {
7101
+ n_take0 = prompt_past0.size();
7102
+ prompt.insert(prompt.end(), prompt_past0.end() - n_take0, prompt_past0.end());
7103
+ }
7104
+
7105
+ // Fill remaining budget with dynamic tokens (rolling context)
7106
+ const int n_take1 = std::min<int>(max_prompt_ctx - n_take0 - 1, prompt_past1.size());
7107
+ prompt.insert(prompt.end(), prompt_past1.end() - n_take1, prompt_past1.end());
7108
+ }
7095
7109
  }
7096
7110
 
7097
7111
  // init new transcription with sot, language (opt) and task tokens
@@ -7573,14 +7587,17 @@ int whisper_full_with_state(
7573
7587
 
7574
7588
  //WHISPER_LOG_DEBUG("prompt_init.size() = %d, prompt.size() = %d, result_len = %d, seek_delta = %d\n", prompt_init.size(), prompt.size(), result_len, seek_delta);
7575
7589
 
7576
- // update prompt_past
7577
- prompt_past.clear();
7578
- if (prompt.front() == whisper_token_prev(ctx)) {
7579
- prompt_past.insert(prompt_past.end(), prompt.begin() + 1, prompt.end() - prompt_init.size());
7590
+ // update prompt_past1
7591
+ prompt_past1.clear();
7592
+ if (!params.carry_initial_prompt && !prompt.empty() && prompt.front() == whisper_token_prev(ctx)) {
7593
+ prompt_past1.insert(prompt_past1.end(), prompt.begin() + 1, prompt.end() - prompt_init.size());
7580
7594
  }
7581
7595
 
7582
- for (int i = 0; i < result_len && !is_no_speech; ++i) {
7583
- prompt_past.push_back(tokens_cur[i].id);
7596
+ // Add newly decoded tokens to the rolling context
7597
+ if (!is_no_speech) {
7598
+ for (int i = 0; i < result_len; ++i) {
7599
+ prompt_past1.push_back(tokens_cur[i].id);
7600
+ }
7584
7601
  }
7585
7602
 
7586
7603
  if (!tokens_cur.empty() && ctx->model.n_loaded > 0 && !is_no_speech) {
@@ -8952,7 +8969,7 @@ void whisper_log_set(wsp_ggml_log_callback log_callback, void * user_data) {
8952
8969
  }
8953
8970
 
8954
8971
  const char * whisper_version(void) {
8955
- return "1.7.6";
8972
+ return "1.8.0";
8956
8973
  }
8957
8974
 
8958
8975
  WSP_GGML_ATTRIBUTE_FORMAT(2, 3)
package/cpp/whisper.h CHANGED
@@ -526,6 +526,7 @@ extern "C" {
526
526
  // use whisper_tokenize() to convert text to tokens
527
527
  // maximum of whisper_n_text_ctx()/2 tokens are used (typically 224)
528
528
  const char * initial_prompt;
529
+ bool carry_initial_prompt; // if true, always prepend initial_prompt to every decode window (may reduce conditioning on previous text)
529
530
  const whisper_token * prompt_tokens;
530
531
  int prompt_n_tokens;
531
532