whisper.rn 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +1 -1
  2. package/cpp/ggml-alloc.c +11 -4
  3. package/cpp/ggml-backend-reg.cpp +8 -0
  4. package/cpp/ggml-backend.cpp +0 -2
  5. package/cpp/ggml-cpu/arch/arm/quants.c +428 -26
  6. package/cpp/ggml-cpu/ggml-cpu-impl.h +3 -1
  7. package/cpp/ggml-cpu/ggml-cpu.c +50 -21
  8. package/cpp/ggml-cpu/ops.cpp +458 -349
  9. package/cpp/ggml-cpu/ops.h +4 -4
  10. package/cpp/ggml-cpu/repack.cpp +143 -29
  11. package/cpp/ggml-cpu/simd-mappings.h +25 -25
  12. package/cpp/ggml-cpu/unary-ops.cpp +16 -0
  13. package/cpp/ggml-cpu/unary-ops.h +2 -0
  14. package/cpp/ggml-cpu/vec.cpp +17 -0
  15. package/cpp/ggml-cpu/vec.h +10 -0
  16. package/cpp/ggml-impl.h +17 -1
  17. package/cpp/ggml-metal/ggml-metal-context.m +5 -6
  18. package/cpp/ggml-metal/ggml-metal-device.cpp +101 -4
  19. package/cpp/ggml-metal/ggml-metal-device.h +8 -1
  20. package/cpp/ggml-metal/ggml-metal-device.m +216 -14
  21. package/cpp/ggml-metal/ggml-metal-impl.h +90 -2
  22. package/cpp/ggml-metal/ggml-metal-ops.cpp +346 -85
  23. package/cpp/ggml-metal/ggml-metal-ops.h +2 -0
  24. package/cpp/ggml-metal/ggml-metal.cpp +5 -0
  25. package/cpp/ggml-metal/ggml-metal.metal +12436 -0
  26. package/cpp/ggml.c +154 -5
  27. package/cpp/ggml.h +73 -0
  28. package/cpp/whisper.cpp +6 -2
  29. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +17 -1
  30. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +73 -0
  31. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  32. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +12436 -0
  33. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  34. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +17 -1
  35. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +73 -0
  36. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  37. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  38. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +12436 -0
  39. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  40. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +17 -1
  41. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +73 -0
  42. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  43. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +12436 -0
  44. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  45. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +17 -1
  46. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +73 -0
  47. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  48. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  49. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +12436 -0
  50. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  51. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +156 -12
  52. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  53. package/lib/module/realtime-transcription/RealtimeTranscriber.js +155 -12
  54. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  55. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +29 -0
  56. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  57. package/lib/typescript/realtime-transcription/types.d.ts +7 -0
  58. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  59. package/package.json +1 -1
  60. package/src/realtime-transcription/RealtimeTranscriber.ts +179 -9
  61. package/src/realtime-transcription/types.ts +9 -0
  62. package/whisper-rn.podspec +1 -1
  63. package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
  64. package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
  65. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  66. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  67. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
package/cpp/ggml.c CHANGED
@@ -943,6 +943,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
943
943
  "COS",
944
944
  "SUM",
945
945
  "SUM_ROWS",
946
+ "CUMSUM",
946
947
  "MEAN",
947
948
  "ARGMAX",
948
949
  "COUNT_EQUAL",
@@ -998,6 +999,8 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
998
999
  "TIMESTEP_EMBEDDING",
999
1000
  "ARGSORT",
1000
1001
  "LEAKY_RELU",
1002
+ "TRI",
1003
+ "FILL",
1001
1004
 
1002
1005
  "FLASH_ATTN_EXT",
1003
1006
  "FLASH_ATTN_BACK",
@@ -1010,6 +1013,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
1010
1013
  "RWKV_WKV6",
1011
1014
  "GATED_LINEAR_ATTN",
1012
1015
  "RWKV_WKV7",
1016
+ "SOLVE_TRI",
1013
1017
 
1014
1018
  "UNARY",
1015
1019
 
@@ -1027,7 +1031,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
1027
1031
  "GLU",
1028
1032
  };
1029
1033
 
1030
- static_assert(WSP_GGML_OP_COUNT == 90, "WSP_GGML_OP_COUNT != 90");
1034
+ static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
1031
1035
 
1032
1036
  static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1033
1037
  "none",
@@ -1047,6 +1051,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1047
1051
  "cos(x)",
1048
1052
  "Σx",
1049
1053
  "Σx_k",
1054
+ "cumsum(x)",
1050
1055
  "Σx/n",
1051
1056
  "argmax(x)",
1052
1057
  "count_equal(x)",
@@ -1102,6 +1107,8 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1102
1107
  "timestep_embedding(timesteps, dim, max_period)",
1103
1108
  "argsort(x)",
1104
1109
  "leaky_relu(x)",
1110
+ "tri(x)",
1111
+ "fill(x, c)",
1105
1112
 
1106
1113
  "flash_attn_ext(x)",
1107
1114
  "flash_attn_back(x)",
@@ -1114,6 +1121,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1114
1121
  "rwkv_wkv6(k, v, r, tf, td, s)",
1115
1122
  "gated_linear_attn(k, v, q, gate, s)",
1116
1123
  "rwkv_wkv7(r, w, k, v, a, b, s)",
1124
+ "A X = B, A triangular, solve X",
1117
1125
 
1118
1126
  "unary(x)",
1119
1127
 
@@ -1131,7 +1139,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
1131
1139
  "glu(x)",
1132
1140
  };
1133
1141
 
1134
- static_assert(WSP_GGML_OP_COUNT == 90, "WSP_GGML_OP_COUNT != 90");
1142
+ static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
1135
1143
 
1136
1144
  static_assert(WSP_GGML_OP_POOL_COUNT == 2, "WSP_GGML_OP_POOL_COUNT != 2");
1137
1145
 
@@ -1150,6 +1158,8 @@ static const char * WSP_GGML_UNARY_OP_NAME[WSP_GGML_UNARY_OP_COUNT] = {
1150
1158
  "HARDSWISH",
1151
1159
  "HARDSIGMOID",
1152
1160
  "EXP",
1161
+ "EXPM1",
1162
+ "SOFTPLUS",
1153
1163
  "GELU_ERF",
1154
1164
  "XIELU",
1155
1165
  "FLOOR",
@@ -1158,7 +1168,7 @@ static const char * WSP_GGML_UNARY_OP_NAME[WSP_GGML_UNARY_OP_COUNT] = {
1158
1168
  "TRUNC",
1159
1169
  };
1160
1170
 
1161
- static_assert(WSP_GGML_UNARY_OP_COUNT == 20, "WSP_GGML_UNARY_OP_COUNT != 20");
1171
+ static_assert(WSP_GGML_UNARY_OP_COUNT == 22, "WSP_GGML_UNARY_OP_COUNT != 22");
1162
1172
 
1163
1173
  static const char * WSP_GGML_GLU_OP_NAME[WSP_GGML_GLU_OP_COUNT] = {
1164
1174
  "REGLU",
@@ -2266,6 +2276,30 @@ struct wsp_ggml_tensor * wsp_ggml_log_inplace(
2266
2276
  return wsp_ggml_log_impl(ctx, a, true);
2267
2277
  }
2268
2278
 
2279
+ struct wsp_ggml_tensor * wsp_ggml_expm1(
2280
+ struct wsp_ggml_context * ctx,
2281
+ struct wsp_ggml_tensor * a) {
2282
+ return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_EXPM1);
2283
+ }
2284
+
2285
+ struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
2286
+ struct wsp_ggml_context * ctx,
2287
+ struct wsp_ggml_tensor * a) {
2288
+ return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_EXPM1);
2289
+ }
2290
+
2291
+ struct wsp_ggml_tensor * wsp_ggml_softplus(
2292
+ struct wsp_ggml_context * ctx,
2293
+ struct wsp_ggml_tensor * a) {
2294
+ return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_SOFTPLUS);
2295
+ }
2296
+
2297
+ struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
2298
+ struct wsp_ggml_context * ctx,
2299
+ struct wsp_ggml_tensor * a) {
2300
+ return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_SOFTPLUS);
2301
+ }
2302
+
2269
2303
  // wsp_ggml_sin
2270
2304
 
2271
2305
  static struct wsp_ggml_tensor * wsp_ggml_sin_impl(
@@ -2349,6 +2383,21 @@ struct wsp_ggml_tensor * wsp_ggml_sum_rows(
2349
2383
  return result;
2350
2384
  }
2351
2385
 
2386
+ // wsp_ggml_cumsum
2387
+
2388
+ struct wsp_ggml_tensor * wsp_ggml_cumsum(
2389
+ struct wsp_ggml_context * ctx,
2390
+ struct wsp_ggml_tensor * a) {
2391
+ WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
2392
+
2393
+ struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
2394
+
2395
+ result->op = WSP_GGML_OP_CUMSUM;
2396
+ result->src[0] = a;
2397
+
2398
+ return result;
2399
+ }
2400
+
2352
2401
  // wsp_ggml_mean
2353
2402
 
2354
2403
  struct wsp_ggml_tensor * wsp_ggml_mean(
@@ -2676,8 +2725,8 @@ struct wsp_ggml_tensor * wsp_ggml_xielu(
2676
2725
  struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
2677
2726
 
2678
2727
  wsp_ggml_set_op_params_i32(result, 0, (int32_t) WSP_GGML_UNARY_OP_XIELU);
2679
- wsp_ggml_set_op_params_f32(result, 1, beta + wsp_ggml_softplus(alpha_n));
2680
- wsp_ggml_set_op_params_f32(result, 2, wsp_ggml_softplus(alpha_p));
2728
+ wsp_ggml_set_op_params_f32(result, 1, beta + wsp_ggml_compute_softplus_f32(alpha_n));
2729
+ wsp_ggml_set_op_params_f32(result, 2, wsp_ggml_compute_softplus_f32(alpha_p));
2681
2730
  wsp_ggml_set_op_params_f32(result, 3, beta);
2682
2731
  wsp_ggml_set_op_params_f32(result, 4, eps);
2683
2732
 
@@ -5036,6 +5085,61 @@ struct wsp_ggml_tensor * wsp_ggml_timestep_embedding(
5036
5085
  return result;
5037
5086
  }
5038
5087
 
5088
+ // wsp_ggml_tri
5089
+
5090
+ struct wsp_ggml_tensor * wsp_ggml_tri(
5091
+ struct wsp_ggml_context * ctx,
5092
+ struct wsp_ggml_tensor * a,
5093
+ enum wsp_ggml_tri_type type) {
5094
+ WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
5095
+
5096
+ WSP_GGML_ASSERT(wsp_ggml_is_contiguous(a));
5097
+ WSP_GGML_ASSERT(a->ne[0] == a->ne[1]);
5098
+
5099
+ struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
5100
+
5101
+ wsp_ggml_set_op_params_i32(result, 0, type);
5102
+
5103
+ result->op = WSP_GGML_OP_TRI;
5104
+ result->src[0] = a;
5105
+
5106
+ return result;
5107
+ }
5108
+
5109
+ // wsp_ggml_fill
5110
+
5111
+ static struct wsp_ggml_tensor * wsp_ggml_fill_impl(
5112
+ struct wsp_ggml_context * ctx,
5113
+ struct wsp_ggml_tensor * a,
5114
+ float c,
5115
+ bool inplace) {
5116
+ WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
5117
+ WSP_GGML_ASSERT(wsp_ggml_is_contiguous(a));
5118
+
5119
+ struct wsp_ggml_tensor * result = inplace ? wsp_ggml_view_tensor(ctx, a) : wsp_ggml_dup_tensor(ctx, a);
5120
+
5121
+ wsp_ggml_set_op_params_f32(result, 0, c);
5122
+
5123
+ result->op = WSP_GGML_OP_FILL;
5124
+ result->src[0] = a;
5125
+
5126
+ return result;
5127
+ }
5128
+
5129
+ struct wsp_ggml_tensor * wsp_ggml_fill(
5130
+ struct wsp_ggml_context * ctx,
5131
+ struct wsp_ggml_tensor * a,
5132
+ float c) {
5133
+ return wsp_ggml_fill_impl(ctx, a, c, false);
5134
+ }
5135
+
5136
+ struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
5137
+ struct wsp_ggml_context * ctx,
5138
+ struct wsp_ggml_tensor * a,
5139
+ float c) {
5140
+ return wsp_ggml_fill_impl(ctx, a, c, true);
5141
+ }
5142
+
5039
5143
  // wsp_ggml_argsort
5040
5144
 
5041
5145
  struct wsp_ggml_tensor * wsp_ggml_argsort(
@@ -5890,6 +5994,41 @@ struct wsp_ggml_tensor * wsp_ggml_opt_step_sgd(
5890
5994
  return result;
5891
5995
  }
5892
5996
 
5997
+ // solve_tri
5998
+
5999
+ struct wsp_ggml_tensor * wsp_ggml_solve_tri(
6000
+ struct wsp_ggml_context * ctx,
6001
+ struct wsp_ggml_tensor * a,
6002
+ struct wsp_ggml_tensor * b,
6003
+ bool left,
6004
+ bool lower,
6005
+ bool uni) {
6006
+ WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
6007
+ WSP_GGML_ASSERT(b->type == WSP_GGML_TYPE_F32);
6008
+
6009
+ // A must be square and lower diagonal
6010
+ WSP_GGML_ASSERT(a->ne[0] == a->ne[1]);
6011
+ // B must have same outer dimension as A
6012
+ WSP_GGML_ASSERT(a->ne[1] == b->ne[1]);
6013
+
6014
+ // batch dimensions must be equal
6015
+ WSP_GGML_ASSERT(a->ne[2] == b->ne[2]);
6016
+ WSP_GGML_ASSERT(a->ne[3] == b->ne[3]);
6017
+
6018
+ WSP_GGML_ASSERT(wsp_ggml_is_contiguous(a));
6019
+ WSP_GGML_ASSERT(wsp_ggml_is_contiguous(b));
6020
+
6021
+ WSP_GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6022
+
6023
+ struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, WSP_GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6024
+
6025
+ result->op = WSP_GGML_OP_SOLVE_TRI;
6026
+ result->src[0] = a;
6027
+ result->src[1] = b;
6028
+
6029
+ return result;
6030
+ }
6031
+
5893
6032
  ////////////////////////////////////////////////////////////////////////////////
5894
6033
 
5895
6034
  struct wsp_ggml_hash_set wsp_ggml_hash_set_new(size_t size) {
@@ -6462,6 +6601,16 @@ static void wsp_ggml_compute_backward(
6462
6601
  wsp_ggml_add_or_set(ctx, cgraph, isrc0, wsp_ggml_mul(ctx, tensor, grad));
6463
6602
  }
6464
6603
  } break;
6604
+ case WSP_GGML_UNARY_OP_EXPM1: {
6605
+ if (src0_needs_grads) {
6606
+ wsp_ggml_add_or_set(ctx, cgraph, isrc0, wsp_ggml_mul(ctx, grad, wsp_ggml_exp(ctx, src0)));
6607
+ }
6608
+ } break;
6609
+ case WSP_GGML_UNARY_OP_SOFTPLUS: {
6610
+ if (src0_needs_grads) {
6611
+ wsp_ggml_add_or_set(ctx, cgraph, isrc0, wsp_ggml_mul(ctx, grad, wsp_ggml_sigmoid(ctx, src0)));
6612
+ }
6613
+ } break;
6465
6614
  default: {
6466
6615
  fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6467
6616
  __func__, wsp_ggml_unary_op_name(wsp_ggml_get_unary_op(tensor)));
package/cpp/ggml.h CHANGED
@@ -242,6 +242,7 @@
242
242
  #define WSP_GGML_ROPE_TYPE_NEOX 2
243
243
  #define WSP_GGML_ROPE_TYPE_MROPE 8
244
244
  #define WSP_GGML_ROPE_TYPE_VISION 24
245
+ #define WSP_GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
245
246
 
246
247
  #define WSP_GGML_MROPE_SECTIONS 4
247
248
 
@@ -474,6 +475,7 @@ extern "C" {
474
475
  WSP_GGML_OP_COS,
475
476
  WSP_GGML_OP_SUM,
476
477
  WSP_GGML_OP_SUM_ROWS,
478
+ WSP_GGML_OP_CUMSUM,
477
479
  WSP_GGML_OP_MEAN,
478
480
  WSP_GGML_OP_ARGMAX,
479
481
  WSP_GGML_OP_COUNT_EQUAL,
@@ -529,6 +531,8 @@ extern "C" {
529
531
  WSP_GGML_OP_TIMESTEP_EMBEDDING,
530
532
  WSP_GGML_OP_ARGSORT,
531
533
  WSP_GGML_OP_LEAKY_RELU,
534
+ WSP_GGML_OP_TRI,
535
+ WSP_GGML_OP_FILL,
532
536
 
533
537
  WSP_GGML_OP_FLASH_ATTN_EXT,
534
538
  WSP_GGML_OP_FLASH_ATTN_BACK,
@@ -541,6 +545,7 @@ extern "C" {
541
545
  WSP_GGML_OP_RWKV_WKV6,
542
546
  WSP_GGML_OP_GATED_LINEAR_ATTN,
543
547
  WSP_GGML_OP_RWKV_WKV7,
548
+ WSP_GGML_OP_SOLVE_TRI,
544
549
 
545
550
  WSP_GGML_OP_UNARY,
546
551
 
@@ -575,6 +580,8 @@ extern "C" {
575
580
  WSP_GGML_UNARY_OP_HARDSWISH,
576
581
  WSP_GGML_UNARY_OP_HARDSIGMOID,
577
582
  WSP_GGML_UNARY_OP_EXP,
583
+ WSP_GGML_UNARY_OP_EXPM1,
584
+ WSP_GGML_UNARY_OP_SOFTPLUS,
578
585
  WSP_GGML_UNARY_OP_GELU_ERF,
579
586
  WSP_GGML_UNARY_OP_XIELU,
580
587
  WSP_GGML_UNARY_OP_FLOOR,
@@ -619,6 +626,13 @@ extern "C" {
619
626
  WSP_GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
620
627
  };
621
628
 
629
+ enum wsp_ggml_tri_type {
630
+ WSP_GGML_TRI_TYPE_UPPER_DIAG = 0,
631
+ WSP_GGML_TRI_TYPE_UPPER = 1,
632
+ WSP_GGML_TRI_TYPE_LOWER_DIAG = 2,
633
+ WSP_GGML_TRI_TYPE_LOWER = 3
634
+ };
635
+
622
636
  struct wsp_ggml_init_params {
623
637
  // memory pool
624
638
  size_t mem_size; // bytes
@@ -956,6 +970,22 @@ extern "C" {
956
970
  struct wsp_ggml_context * ctx,
957
971
  struct wsp_ggml_tensor * a);
958
972
 
973
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1(
974
+ struct wsp_ggml_context * ctx,
975
+ struct wsp_ggml_tensor * a);
976
+
977
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
978
+ struct wsp_ggml_context * ctx,
979
+ struct wsp_ggml_tensor * a);
980
+
981
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus(
982
+ struct wsp_ggml_context * ctx,
983
+ struct wsp_ggml_tensor * a);
984
+
985
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
986
+ struct wsp_ggml_context * ctx,
987
+ struct wsp_ggml_tensor * a);
988
+
959
989
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sin(
960
990
  struct wsp_ggml_context * ctx,
961
991
  struct wsp_ggml_tensor * a);
@@ -982,6 +1012,10 @@ extern "C" {
982
1012
  struct wsp_ggml_context * ctx,
983
1013
  struct wsp_ggml_tensor * a);
984
1014
 
1015
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cumsum(
1016
+ struct wsp_ggml_context * ctx,
1017
+ struct wsp_ggml_tensor * a);
1018
+
985
1019
  // mean along rows
986
1020
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
987
1021
  struct wsp_ggml_context * ctx,
@@ -2107,6 +2141,7 @@ extern "C" {
2107
2141
  enum wsp_ggml_scale_mode {
2108
2142
  WSP_GGML_SCALE_MODE_NEAREST = 0,
2109
2143
  WSP_GGML_SCALE_MODE_BILINEAR = 1,
2144
+ WSP_GGML_SCALE_MODE_BICUBIC = 2,
2110
2145
 
2111
2146
  WSP_GGML_SCALE_MODE_COUNT
2112
2147
  };
@@ -2185,6 +2220,23 @@ extern "C" {
2185
2220
  int shift2,
2186
2221
  int shift3);
2187
2222
 
2223
+ // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
2224
+ // zeroes everywhere outside the masked area
2225
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tri(
2226
+ struct wsp_ggml_context * ctx,
2227
+ struct wsp_ggml_tensor * a,
2228
+ enum wsp_ggml_tri_type type);
2229
+
2230
+ // Fill tensor a with constant c
2231
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill(
2232
+ struct wsp_ggml_context * ctx,
2233
+ struct wsp_ggml_tensor * a,
2234
+ float c);
2235
+
2236
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
2237
+ struct wsp_ggml_context * ctx,
2238
+ struct wsp_ggml_tensor * a,
2239
+ float c);
2188
2240
 
2189
2241
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
2190
2242
  // timesteps: [N,]
@@ -2354,6 +2406,27 @@ extern "C" {
2354
2406
  struct wsp_ggml_tensor * b,
2355
2407
  struct wsp_ggml_tensor * state);
2356
2408
 
2409
+ /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
2410
+ * without zeroes on the diagonal (i.e. invertible).
2411
+ * B can have any number of columns, but must have the same number of rows as A
2412
+ * If A is [n, n] and B is [n, m], then the result will be [n, m] as well
2413
+ * Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
2414
+ * where n > 100 sparingly, pre-chunk if necessary.
2415
+ *
2416
+ * If left = false, solves xA=B instead
2417
+ * If lower = false, assumes upper triangular instead
2418
+ * If uni = true, assumes diagonal of A to be all ones (will override actual values)
2419
+ *
2420
+ * TODO: currently only lower, right, non-unitriangular variant is implemented
2421
+ */
2422
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_solve_tri(
2423
+ struct wsp_ggml_context * ctx,
2424
+ struct wsp_ggml_tensor * a,
2425
+ struct wsp_ggml_tensor * b,
2426
+ bool left,
2427
+ bool lower,
2428
+ bool uni);
2429
+
2357
2430
  // custom operators
2358
2431
 
2359
2432
  typedef void (*wsp_ggml_custom1_op_t)(struct wsp_ggml_tensor * dst , const struct wsp_ggml_tensor * a, int ith, int nth, void * userdata);
package/cpp/whisper.cpp CHANGED
@@ -1296,7 +1296,11 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
1296
1296
  if (params.use_gpu) {
1297
1297
  for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
1298
1298
  wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
1299
- if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
1299
+ enum wsp_ggml_backend_dev_type dev_type = wsp_ggml_backend_dev_type(dev_cur);
1300
+ const char * dev_name = wsp_ggml_backend_dev_name(dev_cur);
1301
+ WHISPER_LOG_INFO("%s: device %zu: %s (type: %d)\n", __func__, i, dev_name, dev_type);
1302
+ if (dev_type == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || dev_type == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
1303
+ WHISPER_LOG_INFO("%s: found GPU device %zu: %s (type: %d, cnt: %d)\n", __func__, i, dev_name, dev_type, cnt);
1300
1304
  if (cnt == params.gpu_device) {
1301
1305
  dev = dev_cur;
1302
1306
  }
@@ -6693,7 +6697,7 @@ static bool whisper_vad(
6693
6697
  }
6694
6698
 
6695
6699
  segment_start_samples = std::min(segment_start_samples, n_samples - 1);
6696
- segment_end_samples = std::min(segment_end_samples, n_samples);
6700
+ segment_end_samples = std::min(segment_end_samples, n_samples - 1);
6697
6701
  int segment_length = segment_end_samples - segment_start_samples;
6698
6702
  if (segment_length > 0) {
6699
6703
  whisper_state::vad_segment_info segment;
@@ -102,7 +102,7 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
102
102
  }
103
103
  }
104
104
 
105
- static inline float wsp_ggml_softplus(float input) {
105
+ static inline float wsp_ggml_compute_softplus_f32(float input) {
106
106
  return (input > 20.0f) ? input : logf(1 + expf(input));
107
107
  }
108
108
  //
@@ -682,6 +682,7 @@ static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgr
682
682
  #endif
683
683
 
684
684
  #ifdef __cplusplus
685
+ #include <array>
685
686
  #include <initializer_list>
686
687
  #include <vector>
687
688
 
@@ -697,6 +698,21 @@ inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * c
697
698
  return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
698
699
  }
699
700
 
701
+ // Return true if the edges in the graph match expectations.
702
+ inline bool wsp_ggml_check_edges(const struct wsp_ggml_cgraph * cgraph,
703
+ int start_idx,
704
+ std::initializer_list<std::array<int, 3>> edges) {
705
+ for (const auto & edge : edges) {
706
+ int dst_node = edge[0];
707
+ int src_idx = edge[1];
708
+ int src_node = edge[2];
709
+ if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) {
710
+ return false;
711
+ }
712
+ }
713
+ return true;
714
+ }
715
+
700
716
  // expose GGUF internals for test code
701
717
  WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
702
718
  WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
@@ -242,6 +242,7 @@
242
242
  #define WSP_GGML_ROPE_TYPE_NEOX 2
243
243
  #define WSP_GGML_ROPE_TYPE_MROPE 8
244
244
  #define WSP_GGML_ROPE_TYPE_VISION 24
245
+ #define WSP_GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
245
246
 
246
247
  #define WSP_GGML_MROPE_SECTIONS 4
247
248
 
@@ -474,6 +475,7 @@ extern "C" {
474
475
  WSP_GGML_OP_COS,
475
476
  WSP_GGML_OP_SUM,
476
477
  WSP_GGML_OP_SUM_ROWS,
478
+ WSP_GGML_OP_CUMSUM,
477
479
  WSP_GGML_OP_MEAN,
478
480
  WSP_GGML_OP_ARGMAX,
479
481
  WSP_GGML_OP_COUNT_EQUAL,
@@ -529,6 +531,8 @@ extern "C" {
529
531
  WSP_GGML_OP_TIMESTEP_EMBEDDING,
530
532
  WSP_GGML_OP_ARGSORT,
531
533
  WSP_GGML_OP_LEAKY_RELU,
534
+ WSP_GGML_OP_TRI,
535
+ WSP_GGML_OP_FILL,
532
536
 
533
537
  WSP_GGML_OP_FLASH_ATTN_EXT,
534
538
  WSP_GGML_OP_FLASH_ATTN_BACK,
@@ -541,6 +545,7 @@ extern "C" {
541
545
  WSP_GGML_OP_RWKV_WKV6,
542
546
  WSP_GGML_OP_GATED_LINEAR_ATTN,
543
547
  WSP_GGML_OP_RWKV_WKV7,
548
+ WSP_GGML_OP_SOLVE_TRI,
544
549
 
545
550
  WSP_GGML_OP_UNARY,
546
551
 
@@ -575,6 +580,8 @@ extern "C" {
575
580
  WSP_GGML_UNARY_OP_HARDSWISH,
576
581
  WSP_GGML_UNARY_OP_HARDSIGMOID,
577
582
  WSP_GGML_UNARY_OP_EXP,
583
+ WSP_GGML_UNARY_OP_EXPM1,
584
+ WSP_GGML_UNARY_OP_SOFTPLUS,
578
585
  WSP_GGML_UNARY_OP_GELU_ERF,
579
586
  WSP_GGML_UNARY_OP_XIELU,
580
587
  WSP_GGML_UNARY_OP_FLOOR,
@@ -619,6 +626,13 @@ extern "C" {
619
626
  WSP_GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
620
627
  };
621
628
 
629
+ enum wsp_ggml_tri_type {
630
+ WSP_GGML_TRI_TYPE_UPPER_DIAG = 0,
631
+ WSP_GGML_TRI_TYPE_UPPER = 1,
632
+ WSP_GGML_TRI_TYPE_LOWER_DIAG = 2,
633
+ WSP_GGML_TRI_TYPE_LOWER = 3
634
+ };
635
+
622
636
  struct wsp_ggml_init_params {
623
637
  // memory pool
624
638
  size_t mem_size; // bytes
@@ -956,6 +970,22 @@ extern "C" {
956
970
  struct wsp_ggml_context * ctx,
957
971
  struct wsp_ggml_tensor * a);
958
972
 
973
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1(
974
+ struct wsp_ggml_context * ctx,
975
+ struct wsp_ggml_tensor * a);
976
+
977
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
978
+ struct wsp_ggml_context * ctx,
979
+ struct wsp_ggml_tensor * a);
980
+
981
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus(
982
+ struct wsp_ggml_context * ctx,
983
+ struct wsp_ggml_tensor * a);
984
+
985
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
986
+ struct wsp_ggml_context * ctx,
987
+ struct wsp_ggml_tensor * a);
988
+
959
989
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sin(
960
990
  struct wsp_ggml_context * ctx,
961
991
  struct wsp_ggml_tensor * a);
@@ -982,6 +1012,10 @@ extern "C" {
982
1012
  struct wsp_ggml_context * ctx,
983
1013
  struct wsp_ggml_tensor * a);
984
1014
 
1015
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cumsum(
1016
+ struct wsp_ggml_context * ctx,
1017
+ struct wsp_ggml_tensor * a);
1018
+
985
1019
  // mean along rows
986
1020
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
987
1021
  struct wsp_ggml_context * ctx,
@@ -2107,6 +2141,7 @@ extern "C" {
2107
2141
  enum wsp_ggml_scale_mode {
2108
2142
  WSP_GGML_SCALE_MODE_NEAREST = 0,
2109
2143
  WSP_GGML_SCALE_MODE_BILINEAR = 1,
2144
+ WSP_GGML_SCALE_MODE_BICUBIC = 2,
2110
2145
 
2111
2146
  WSP_GGML_SCALE_MODE_COUNT
2112
2147
  };
@@ -2185,6 +2220,23 @@ extern "C" {
2185
2220
  int shift2,
2186
2221
  int shift3);
2187
2222
 
2223
+ // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
2224
+ // zeroes everywhere outside the masked area
2225
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tri(
2226
+ struct wsp_ggml_context * ctx,
2227
+ struct wsp_ggml_tensor * a,
2228
+ enum wsp_ggml_tri_type type);
2229
+
2230
+ // Fill tensor a with constant c
2231
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill(
2232
+ struct wsp_ggml_context * ctx,
2233
+ struct wsp_ggml_tensor * a,
2234
+ float c);
2235
+
2236
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
2237
+ struct wsp_ggml_context * ctx,
2238
+ struct wsp_ggml_tensor * a,
2239
+ float c);
2188
2240
 
2189
2241
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
2190
2242
  // timesteps: [N,]
@@ -2354,6 +2406,27 @@ extern "C" {
2354
2406
  struct wsp_ggml_tensor * b,
2355
2407
  struct wsp_ggml_tensor * state);
2356
2408
 
2409
+ /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
2410
+ * without zeroes on the diagonal (i.e. invertible).
2411
+ * B can have any number of columns, but must have the same number of rows as A
2412
+ * If A is [n, n] and B is [n, m], then the result will be [n, m] as well
2413
+ * Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
2414
+ * where n > 100 sparingly, pre-chunk if necessary.
2415
+ *
2416
+ * If left = false, solves xA=B instead
2417
+ * If lower = false, assumes upper triangular instead
2418
+ * If uni = true, assumes diagonal of A to be all ones (will override actual values)
2419
+ *
2420
+ * TODO: currently only lower, right, non-unitriangular variant is implemented
2421
+ */
2422
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_solve_tri(
2423
+ struct wsp_ggml_context * ctx,
2424
+ struct wsp_ggml_tensor * a,
2425
+ struct wsp_ggml_tensor * b,
2426
+ bool left,
2427
+ bool lower,
2428
+ bool uni);
2429
+
2357
2430
  // custom operators
2358
2431
 
2359
2432
  typedef void (*wsp_ggml_custom1_op_t)(struct wsp_ggml_tensor * dst , const struct wsp_ggml_tensor * a, int ith, int nth, void * userdata);