whisper.rn 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/android/src/main/jni.cpp +12 -3
  2. package/cpp/ggml-alloc.c +38 -14
  3. package/cpp/ggml-backend-impl.h +0 -3
  4. package/cpp/ggml-backend.h +2 -0
  5. package/cpp/ggml-cpu/amx/amx.cpp +1 -0
  6. package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
  7. package/cpp/ggml-cpu/ggml-cpu.c +17 -3
  8. package/cpp/ggml-cpu/ops.cpp +33 -17
  9. package/cpp/ggml-cpu/unary-ops.cpp +135 -0
  10. package/cpp/ggml-cpu/unary-ops.h +5 -0
  11. package/cpp/ggml-cpu/vec.cpp +66 -0
  12. package/cpp/ggml-cpu/vec.h +10 -8
  13. package/cpp/ggml-impl.h +51 -2
  14. package/cpp/ggml-metal/ggml-metal-common.cpp +2 -2
  15. package/cpp/ggml-metal/ggml-metal-device.cpp +199 -10
  16. package/cpp/ggml-metal/ggml-metal-device.h +18 -0
  17. package/cpp/ggml-metal/ggml-metal-device.m +27 -14
  18. package/cpp/ggml-metal/ggml-metal-impl.h +87 -7
  19. package/cpp/ggml-metal/ggml-metal-ops.cpp +513 -88
  20. package/cpp/ggml-metal/ggml-metal-ops.h +6 -0
  21. package/cpp/ggml-metal/ggml-metal.cpp +3 -3
  22. package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
  23. package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
  24. package/cpp/ggml.c +166 -2
  25. package/cpp/ggml.h +66 -0
  26. package/cpp/jsi/RNWhisperJSI.cpp +7 -2
  27. package/cpp/rn-whisper.h +1 -0
  28. package/cpp/whisper.cpp +4 -2
  29. package/ios/RNWhisperContext.mm +3 -1
  30. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  31. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  32. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +51 -2
  33. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +66 -0
  34. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  35. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  36. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  37. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  38. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  39. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +51 -2
  40. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +66 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  43. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  44. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  45. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  46. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +51 -2
  47. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +66 -0
  48. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  49. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  50. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  51. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +0 -3
  52. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -0
  53. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +51 -2
  54. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +66 -0
  55. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  56. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  58. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  59. package/lib/commonjs/version.json +1 -1
  60. package/lib/module/NativeRNWhisper.js.map +1 -1
  61. package/lib/module/version.json +1 -1
  62. package/lib/typescript/NativeRNWhisper.d.ts +2 -0
  63. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  64. package/package.json +1 -1
  65. package/src/NativeRNWhisper.ts +2 -0
  66. package/src/version.json +1 -1
@@ -39,6 +39,8 @@ size_t wsp_ggml_metal_op_mul_mat_id_extra_ids(const struct wsp_ggml_tensor * op)
39
39
  // return true if we should use the FA vector kernel for this op
40
40
  bool wsp_ggml_metal_op_flash_attn_ext_use_vec(const struct wsp_ggml_tensor * op);
41
41
 
42
+ size_t wsp_ggml_metal_op_flash_attn_ext_extra_pad(const struct wsp_ggml_tensor * op);
43
+ size_t wsp_ggml_metal_op_flash_attn_ext_extra_blk(const struct wsp_ggml_tensor * op);
42
44
  size_t wsp_ggml_metal_op_flash_attn_ext_extra_tmp(const struct wsp_ggml_tensor * op);
43
45
 
44
46
  int wsp_ggml_metal_op_concat (wsp_ggml_metal_op_t ctx, int idx);
@@ -48,6 +50,7 @@ int wsp_ggml_metal_op_scale (wsp_ggml_metal_op_t ctx, int idx);
48
50
  int wsp_ggml_metal_op_clamp (wsp_ggml_metal_op_t ctx, int idx);
49
51
  int wsp_ggml_metal_op_unary (wsp_ggml_metal_op_t ctx, int idx);
50
52
  int wsp_ggml_metal_op_glu (wsp_ggml_metal_op_t ctx, int idx);
53
+ int wsp_ggml_metal_op_sum (wsp_ggml_metal_op_t ctx, int idx);
51
54
  int wsp_ggml_metal_op_sum_rows (wsp_ggml_metal_op_t ctx, int idx);
52
55
  int wsp_ggml_metal_op_get_rows (wsp_ggml_metal_op_t ctx, int idx);
53
56
  int wsp_ggml_metal_op_set_rows (wsp_ggml_metal_op_t ctx, int idx);
@@ -68,6 +71,7 @@ int wsp_ggml_metal_op_norm (wsp_ggml_metal_op_t ctx, int idx);
68
71
  int wsp_ggml_metal_op_rope (wsp_ggml_metal_op_t ctx, int idx);
69
72
  int wsp_ggml_metal_op_im2col (wsp_ggml_metal_op_t ctx, int idx);
70
73
  int wsp_ggml_metal_op_conv_transpose_1d (wsp_ggml_metal_op_t ctx, int idx);
74
+ int wsp_ggml_metal_op_conv_transpose_2d (wsp_ggml_metal_op_t ctx, int idx);
71
75
  int wsp_ggml_metal_op_upscale (wsp_ggml_metal_op_t ctx, int idx);
72
76
  int wsp_ggml_metal_op_pad (wsp_ggml_metal_op_t ctx, int idx);
73
77
  int wsp_ggml_metal_op_pad_reflect_1d (wsp_ggml_metal_op_t ctx, int idx);
@@ -76,6 +80,8 @@ int wsp_ggml_metal_op_timestep_embedding(wsp_ggml_metal_op_t ctx, int idx);
76
80
  int wsp_ggml_metal_op_argmax (wsp_ggml_metal_op_t ctx, int idx);
77
81
  int wsp_ggml_metal_op_argsort (wsp_ggml_metal_op_t ctx, int idx);
78
82
  int wsp_ggml_metal_op_leaky_relu (wsp_ggml_metal_op_t ctx, int idx);
83
+ int wsp_ggml_metal_op_opt_step_adamw (wsp_ggml_metal_op_t ctx, int idx);
84
+ int wsp_ggml_metal_op_opt_step_sgd (wsp_ggml_metal_op_t ctx, int idx);
79
85
 
80
86
  #ifdef __cplusplus
81
87
  }
@@ -193,9 +193,9 @@ static size_t wsp_ggml_backend_metal_buffer_type_get_alloc_size(wsp_ggml_backend
193
193
  } break;
194
194
  case WSP_GGML_OP_FLASH_ATTN_EXT:
195
195
  {
196
- if (wsp_ggml_metal_op_flash_attn_ext_use_vec(tensor)) {
197
- res += wsp_ggml_metal_op_flash_attn_ext_extra_tmp(tensor);
198
- }
196
+ res += wsp_ggml_metal_op_flash_attn_ext_extra_pad(tensor);
197
+ res += wsp_ggml_metal_op_flash_attn_ext_extra_blk(tensor);
198
+ res += wsp_ggml_metal_op_flash_attn_ext_extra_tmp(tensor);
199
199
  } break;
200
200
  default:
201
201
  break;
package/cpp/ggml.c CHANGED
@@ -1151,10 +1151,14 @@ static const char * WSP_GGML_UNARY_OP_NAME[WSP_GGML_UNARY_OP_COUNT] = {
1151
1151
  "HARDSIGMOID",
1152
1152
  "EXP",
1153
1153
  "GELU_ERF",
1154
+ "XIELU",
1155
+ "FLOOR",
1156
+ "CEIL",
1157
+ "ROUND",
1158
+ "TRUNC",
1154
1159
  };
1155
1160
 
1156
- static_assert(WSP_GGML_UNARY_OP_COUNT == 15, "WSP_GGML_UNARY_OP_COUNT != 15");
1157
-
1161
+ static_assert(WSP_GGML_UNARY_OP_COUNT == 20, "WSP_GGML_UNARY_OP_COUNT != 20");
1158
1162
 
1159
1163
  static const char * WSP_GGML_GLU_OP_NAME[WSP_GGML_GLU_OP_COUNT] = {
1160
1164
  "REGLU",
@@ -2660,6 +2664,29 @@ struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
2660
2664
  return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_SILU);
2661
2665
  }
2662
2666
 
2667
+ // wsp_ggml_xielu
2668
+
2669
+ struct wsp_ggml_tensor * wsp_ggml_xielu(
2670
+ struct wsp_ggml_context * ctx,
2671
+ struct wsp_ggml_tensor * a,
2672
+ float alpha_n,
2673
+ float alpha_p,
2674
+ float beta,
2675
+ float eps) {
2676
+ struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
2677
+
2678
+ wsp_ggml_set_op_params_i32(result, 0, (int32_t) WSP_GGML_UNARY_OP_XIELU);
2679
+ wsp_ggml_set_op_params_f32(result, 1, beta + wsp_ggml_softplus(alpha_n));
2680
+ wsp_ggml_set_op_params_f32(result, 2, wsp_ggml_softplus(alpha_p));
2681
+ wsp_ggml_set_op_params_f32(result, 3, beta);
2682
+ wsp_ggml_set_op_params_f32(result, 4, eps);
2683
+
2684
+ result->op = WSP_GGML_OP_UNARY;
2685
+ result->src[0] = a;
2686
+
2687
+ return result;
2688
+ }
2689
+
2663
2690
  // wsp_ggml_silu_back
2664
2691
 
2665
2692
  struct wsp_ggml_tensor * wsp_ggml_silu_back(
@@ -2734,6 +2761,62 @@ static struct wsp_ggml_tensor * wsp_ggml_glu_impl(
2734
2761
  return result;
2735
2762
  }
2736
2763
 
2764
+ // wsp_ggml_floor
2765
+
2766
+ struct wsp_ggml_tensor * wsp_ggml_floor(
2767
+ struct wsp_ggml_context * ctx,
2768
+ struct wsp_ggml_tensor * a) {
2769
+ return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
2770
+ }
2771
+
2772
+ struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
2773
+ struct wsp_ggml_context * ctx,
2774
+ struct wsp_ggml_tensor * a) {
2775
+ return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
2776
+ }
2777
+
2778
+ // wsp_ggml_ceil
2779
+
2780
+ struct wsp_ggml_tensor * wsp_ggml_ceil(
2781
+ struct wsp_ggml_context * ctx,
2782
+ struct wsp_ggml_tensor * a) {
2783
+ return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_CEIL);
2784
+ }
2785
+
2786
+ struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
2787
+ struct wsp_ggml_context * ctx,
2788
+ struct wsp_ggml_tensor * a) {
2789
+ return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_CEIL);
2790
+ }
2791
+
2792
+ //wsp_ggml_round
2793
+
2794
+ struct wsp_ggml_tensor * wsp_ggml_round(
2795
+ struct wsp_ggml_context * ctx,
2796
+ struct wsp_ggml_tensor * a) {
2797
+ return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_ROUND);
2798
+ }
2799
+
2800
+ struct wsp_ggml_tensor * wsp_ggml_round_inplace(
2801
+ struct wsp_ggml_context * ctx,
2802
+ struct wsp_ggml_tensor * a) {
2803
+ return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_ROUND);
2804
+ }
2805
+
2806
+ //wsp_ggml_trunc
2807
+
2808
+ struct wsp_ggml_tensor * wsp_ggml_trunc(
2809
+ struct wsp_ggml_context * ctx,
2810
+ struct wsp_ggml_tensor * a) {
2811
+ return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
2812
+ }
2813
+
2814
+ struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
2815
+ struct wsp_ggml_context * ctx,
2816
+ struct wsp_ggml_tensor * a) {
2817
+ return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
2818
+ }
2819
+
2737
2820
  struct wsp_ggml_tensor * wsp_ggml_glu(
2738
2821
  struct wsp_ggml_context * ctx,
2739
2822
  struct wsp_ggml_tensor * a,
@@ -3837,6 +3920,15 @@ struct wsp_ggml_tensor * wsp_ggml_soft_max_ext(
3837
3920
  return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3838
3921
  }
3839
3922
 
3923
+ struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
3924
+ struct wsp_ggml_context * ctx,
3925
+ struct wsp_ggml_tensor * a,
3926
+ struct wsp_ggml_tensor * mask,
3927
+ float scale,
3928
+ float max_bias) {
3929
+ return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
3930
+ }
3931
+
3840
3932
  void wsp_ggml_soft_max_add_sinks(
3841
3933
  struct wsp_ggml_tensor * a,
3842
3934
  struct wsp_ggml_tensor * sinks) {
@@ -6880,6 +6972,78 @@ void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph) {
6880
6972
  WSP_GGML_LOG_INFO("========================================\n");
6881
6973
  }
6882
6974
 
6975
+ static int wsp_ggml_node_list_find_tensor(const struct wsp_ggml_cgraph * cgraph,
6976
+ const int * idxs,
6977
+ int count,
6978
+ const struct wsp_ggml_tensor * tensor) {
6979
+ WSP_GGML_ASSERT(cgraph && idxs);
6980
+ for (int i = 0; i < count; ++i) {
6981
+ const int node_idx = idxs[i];
6982
+
6983
+ if (node_idx >= cgraph->n_nodes) {
6984
+ return -1;
6985
+ }
6986
+ if (cgraph->nodes[node_idx] == tensor) {
6987
+ return i;
6988
+ }
6989
+ }
6990
+ return -1;
6991
+ }
6992
+
6993
+ bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
6994
+ const int * node_idxs,
6995
+ int count,
6996
+ const enum wsp_ggml_op * ops,
6997
+ const int * outputs,
6998
+ int num_outputs) {
6999
+ WSP_GGML_ASSERT(outputs && num_outputs > 0);
7000
+
7001
+ for (int i = 0; i < count; ++i) {
7002
+ if (node_idxs[i] >= cgraph->n_nodes) {
7003
+ return false;
7004
+ }
7005
+
7006
+ const struct wsp_ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7007
+
7008
+ if (node->op != ops[i]) {
7009
+ return false;
7010
+ }
7011
+
7012
+ if (wsp_ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7013
+ continue;
7014
+ }
7015
+
7016
+ if (node->flags & WSP_GGML_TENSOR_FLAG_OUTPUT) {
7017
+ return false;
7018
+ }
7019
+
7020
+ int subgraph_uses = 0;
7021
+ for (int j = i + 1; j < count; ++j) {
7022
+ const struct wsp_ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7023
+ for (int src_idx = 0; src_idx < WSP_GGML_MAX_SRC; src_idx++) {
7024
+ if (other_node->src[src_idx] == node) {
7025
+ subgraph_uses++;
7026
+ }
7027
+ }
7028
+ }
7029
+
7030
+ if (subgraph_uses != wsp_ggml_node_get_use_count(cgraph, node_idxs[i])) {
7031
+ return false;
7032
+ }
7033
+
7034
+ // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7035
+ struct wsp_ggml_tensor * view_src = node->view_src;
7036
+ while (view_src) {
7037
+ if (wsp_ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7038
+ return false;
7039
+ }
7040
+ view_src = view_src->view_src;
7041
+ }
7042
+ }
7043
+
7044
+ return true;
7045
+ }
7046
+
6883
7047
  // check if node is part of the graph
6884
7048
  static bool wsp_ggml_graph_find(const struct wsp_ggml_cgraph * cgraph, const struct wsp_ggml_tensor * node) {
6885
7049
  if (cgraph == NULL) {
package/cpp/ggml.h CHANGED
@@ -237,6 +237,8 @@
237
237
  #define WSP_GGML_EXIT_SUCCESS 0
238
238
  #define WSP_GGML_EXIT_ABORTED 1
239
239
 
240
+ // TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
241
+ #define WSP_GGML_ROPE_TYPE_NORMAL 0
240
242
  #define WSP_GGML_ROPE_TYPE_NEOX 2
241
243
  #define WSP_GGML_ROPE_TYPE_MROPE 8
242
244
  #define WSP_GGML_ROPE_TYPE_VISION 24
@@ -574,6 +576,11 @@ extern "C" {
574
576
  WSP_GGML_UNARY_OP_HARDSIGMOID,
575
577
  WSP_GGML_UNARY_OP_EXP,
576
578
  WSP_GGML_UNARY_OP_GELU_ERF,
579
+ WSP_GGML_UNARY_OP_XIELU,
580
+ WSP_GGML_UNARY_OP_FLOOR,
581
+ WSP_GGML_UNARY_OP_CEIL,
582
+ WSP_GGML_UNARY_OP_ROUND,
583
+ WSP_GGML_UNARY_OP_TRUNC,
577
584
 
578
585
  WSP_GGML_UNARY_OP_COUNT,
579
586
  };
@@ -1148,6 +1155,58 @@ extern "C" {
1148
1155
  struct wsp_ggml_context * ctx,
1149
1156
  struct wsp_ggml_tensor * a);
1150
1157
 
1158
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
1159
+ struct wsp_ggml_context * ctx,
1160
+ struct wsp_ggml_tensor * a);
1161
+
1162
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
1163
+ struct wsp_ggml_context * ctx,
1164
+ struct wsp_ggml_tensor * a);
1165
+
1166
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
1167
+ struct wsp_ggml_context * ctx,
1168
+ struct wsp_ggml_tensor * a);
1169
+
1170
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
1171
+ struct wsp_ggml_context * ctx,
1172
+ struct wsp_ggml_tensor * a);
1173
+
1174
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
1175
+ struct wsp_ggml_context * ctx,
1176
+ struct wsp_ggml_tensor * a);
1177
+
1178
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
1179
+ struct wsp_ggml_context * ctx,
1180
+ struct wsp_ggml_tensor * a);
1181
+
1182
+ /**
1183
+ * Truncates the fractional part of each element in the tensor (towards zero).
1184
+ * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
1185
+ * Similar to std::trunc in C/C++.
1186
+ */
1187
+
1188
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
1189
+ struct wsp_ggml_context * ctx,
1190
+ struct wsp_ggml_tensor * a);
1191
+
1192
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
1193
+ struct wsp_ggml_context * ctx,
1194
+ struct wsp_ggml_tensor * a);
1195
+
1196
+
1197
+
1198
+ // xIELU activation function
1199
+ // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
1200
+ // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
1201
+ // that constrain the positive and negative source alpha values respectively
1202
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
1203
+ struct wsp_ggml_context * ctx,
1204
+ struct wsp_ggml_tensor * a,
1205
+ float alpha_n,
1206
+ float alpha_p,
1207
+ float beta,
1208
+ float eps);
1209
+
1151
1210
  // gated linear unit ops
1152
1211
  // A: n columns, r rows,
1153
1212
  // result is n / 2 columns, r rows,
@@ -1615,6 +1674,13 @@ extern "C" {
1615
1674
  float scale,
1616
1675
  float max_bias);
1617
1676
 
1677
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
1678
+ struct wsp_ggml_context * ctx,
1679
+ struct wsp_ggml_tensor * a,
1680
+ struct wsp_ggml_tensor * mask,
1681
+ float scale,
1682
+ float max_bias);
1683
+
1618
1684
  WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
1619
1685
  struct wsp_ggml_tensor * a,
1620
1686
  struct wsp_ggml_tensor * sinks);
@@ -269,11 +269,13 @@ struct CallbackInfo {
269
269
  std::shared_ptr<Function> onProgressCallback;
270
270
  std::shared_ptr<Function> onNewSegmentsCallback;
271
271
  int jobId;
272
+ int nProcessors;
272
273
  };
273
274
 
274
275
  CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
275
276
  CallbackInfo info;
276
277
  info.jobId = rand(); // Default fallback jobId
278
+ info.nProcessors = 1; // Default to 1 processor
277
279
 
278
280
  try {
279
281
  auto propNames = optionsObj.getPropertyNames(runtime);
@@ -288,6 +290,8 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
288
290
  info.onNewSegmentsCallback = std::make_shared<Function>(propValue.getObject(runtime).getFunction(runtime));
289
291
  } else if (propName == "jobId" && propValue.isNumber()) {
290
292
  info.jobId = (int)propValue.getNumber();
293
+ } else if (propName == "nProcessors" && propValue.isNumber()) {
294
+ info.nProcessors = (int)propValue.getNumber();
291
295
  }
292
296
  }
293
297
  } catch (...) {
@@ -551,12 +555,13 @@ void installJSIBindings(
551
555
  code = -2;
552
556
  } else {
553
557
  try {
554
- code = whisper_full(context, job->params, audioResult.data.data(), audioResult.count);
558
+ job->n_processors = callbackInfo.nProcessors;
559
+ code = whisper_full_parallel(context, job->params, audioResult.data.data(), audioResult.count, job->n_processors);
555
560
  if (job->is_aborted()) {
556
561
  code = -999;
557
562
  }
558
563
  } catch (...) {
559
- logError("Exception during whisper_full transcription");
564
+ logError("Exception during whisper_full_parallel transcription");
560
565
  code = -3;
561
566
  }
562
567
  rnwhisper::job_remove(callbackInfo.jobId);
package/cpp/rn-whisper.h CHANGED
@@ -24,6 +24,7 @@ struct job {
24
24
  int job_id;
25
25
  bool aborted = false;
26
26
  whisper_full_params params;
27
+ int n_processors = 1;
27
28
 
28
29
  ~job();
29
30
  bool is_aborted();
package/cpp/whisper.cpp CHANGED
@@ -1296,7 +1296,7 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
1296
1296
  if (params.use_gpu) {
1297
1297
  for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
1298
1298
  wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
1299
- if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
1299
+ if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
1300
1300
  if (cnt == params.gpu_device) {
1301
1301
  dev = dev_cur;
1302
1302
  }
@@ -1365,7 +1365,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
1365
1365
  int cnt = 0;
1366
1366
  for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
1367
1367
  wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
1368
- if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
1368
+ if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
1369
1369
  if (cnt == params.gpu_device) {
1370
1370
  auto * buft = wsp_ggml_backend_dev_buffer_type(dev);
1371
1371
  if (buft) {
@@ -1403,6 +1403,7 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
1403
1403
  bool op_supported = true;
1404
1404
 
1405
1405
  if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
1406
+ wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
1406
1407
  (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
1407
1408
  // GPU and default CPU backend support all operators
1408
1409
  op_supported = true;
@@ -4459,6 +4460,7 @@ static bool weight_buft_supported(const whisper_vad_hparams & hparams, wsp_ggml_
4459
4460
  bool op_supported = true;
4460
4461
 
4461
4462
  if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
4463
+ wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
4462
4464
  (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
4463
4465
  // GPU and default CPU backend support all operators
4464
4466
  op_supported = true;
@@ -168,6 +168,7 @@ static void* retained_log_block = nullptr;
168
168
  self->recordState.sliceNSamples.push_back(0);
169
169
 
170
170
  self->recordState.job = rnwhisper::job_new(jobId, [self createParams:options jobId:jobId]);
171
+ self->recordState.job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
171
172
  self->recordState.job->set_realtime_params(
172
173
  {
173
174
  .use_vad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false,
@@ -471,6 +472,7 @@ struct rnwhisper_segments_callback_data {
471
472
  }
472
473
 
473
474
  rnwhisper::job* job = rnwhisper::job_new(jobId, params);
475
+ job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
474
476
  self->recordState.job = job;
475
477
  int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
476
478
  rnwhisper::job_remove(jobId);
@@ -572,7 +574,7 @@ struct rnwhisper_segments_callback_data {
572
574
  audioDataCount:(int)audioDataCount
573
575
  {
574
576
  whisper_reset_timings(self->ctx);
575
- int code = whisper_full(self->ctx, job->params, audioData, audioDataCount);
577
+ int code = whisper_full_parallel(self->ctx, job->params, audioData, audioDataCount, job->n_processors);
576
578
  if (job && job->is_aborted()) code = -999;
577
579
  // if (code == 0) {
578
580
  // whisper_print_timings(self->ctx);
@@ -209,9 +209,6 @@ extern "C" {
209
209
  void * context;
210
210
  };
211
211
 
212
- // Internal backend registry API
213
- WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
214
-
215
212
  // Add backend dynamic loading support to the backend
216
213
 
217
214
  // Initialize the backend
@@ -215,6 +215,8 @@ extern "C" {
215
215
  // Backend registry
216
216
  //
217
217
 
218
+ WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
219
+
218
220
  WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
219
221
 
220
222
  // Backend (reg) enumeration
@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
102
102
  }
103
103
  }
104
104
 
105
+ static inline float wsp_ggml_softplus(float input) {
106
+ return (input > 20.0f) ? input : logf(1 + expf(input));
107
+ }
105
108
  //
106
109
  // logging
107
110
  //
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
562
565
  #define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
563
566
  #define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
564
567
 
568
+ static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
569
+ const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
570
+
571
+ size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
572
+ if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
573
+ return 0;
574
+ }
575
+ return cgraph->use_counts[hash_pos];
576
+ }
577
+
565
578
  // return true if the node's results are only used by N other nodes
566
579
  // and can be fused into their calculations.
567
580
  static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
568
581
  const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
569
582
 
570
583
  // check the use count against how many we're replacing
571
- size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
572
- if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
584
+ if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
573
585
  return false;
574
586
  }
575
587
 
@@ -635,6 +647,36 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
635
647
  return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
636
648
  }
637
649
 
650
+ WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
651
+ const int * node_idxs,
652
+ int count,
653
+ const enum wsp_ggml_op * ops,
654
+ const int * outputs,
655
+ int num_outputs);
656
+
657
+ // Returns true if the subgraph formed by {node_idxs} can be fused
658
+ // checks whethers all nodes which are not part of outputs can be elided
659
+ // by checking if their num_uses are confined to the subgraph
660
+ static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
661
+ int node_idx,
662
+ int count,
663
+ const enum wsp_ggml_op * ops,
664
+ const int * outputs,
665
+ int num_outputs) {
666
+ WSP_GGML_ASSERT(count < 32);
667
+ if (node_idx + count > cgraph->n_nodes) {
668
+ return false;
669
+ }
670
+
671
+ int idxs[32];
672
+
673
+ for (int i = 0; i < count; ++i) {
674
+ idxs[i] = node_idx + i;
675
+ }
676
+
677
+ return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
678
+ }
679
+
638
680
  #ifdef __cplusplus
639
681
  }
640
682
  #endif
@@ -648,6 +690,13 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
648
690
  return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
649
691
  }
650
692
 
693
+ inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
694
+ int start_idx,
695
+ std::initializer_list<enum wsp_ggml_op> ops,
696
+ std::initializer_list<int> outputs = {}) {
697
+ return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
698
+ }
699
+
651
700
  // expose GGUF internals for test code
652
701
  WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
653
702
  WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);
@@ -237,6 +237,8 @@
237
237
  #define WSP_GGML_EXIT_SUCCESS 0
238
238
  #define WSP_GGML_EXIT_ABORTED 1
239
239
 
240
+ // TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
241
+ #define WSP_GGML_ROPE_TYPE_NORMAL 0
240
242
  #define WSP_GGML_ROPE_TYPE_NEOX 2
241
243
  #define WSP_GGML_ROPE_TYPE_MROPE 8
242
244
  #define WSP_GGML_ROPE_TYPE_VISION 24
@@ -574,6 +576,11 @@ extern "C" {
574
576
  WSP_GGML_UNARY_OP_HARDSIGMOID,
575
577
  WSP_GGML_UNARY_OP_EXP,
576
578
  WSP_GGML_UNARY_OP_GELU_ERF,
579
+ WSP_GGML_UNARY_OP_XIELU,
580
+ WSP_GGML_UNARY_OP_FLOOR,
581
+ WSP_GGML_UNARY_OP_CEIL,
582
+ WSP_GGML_UNARY_OP_ROUND,
583
+ WSP_GGML_UNARY_OP_TRUNC,
577
584
 
578
585
  WSP_GGML_UNARY_OP_COUNT,
579
586
  };
@@ -1148,6 +1155,58 @@ extern "C" {
1148
1155
  struct wsp_ggml_context * ctx,
1149
1156
  struct wsp_ggml_tensor * a);
1150
1157
 
1158
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
1159
+ struct wsp_ggml_context * ctx,
1160
+ struct wsp_ggml_tensor * a);
1161
+
1162
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
1163
+ struct wsp_ggml_context * ctx,
1164
+ struct wsp_ggml_tensor * a);
1165
+
1166
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
1167
+ struct wsp_ggml_context * ctx,
1168
+ struct wsp_ggml_tensor * a);
1169
+
1170
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
1171
+ struct wsp_ggml_context * ctx,
1172
+ struct wsp_ggml_tensor * a);
1173
+
1174
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
1175
+ struct wsp_ggml_context * ctx,
1176
+ struct wsp_ggml_tensor * a);
1177
+
1178
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
1179
+ struct wsp_ggml_context * ctx,
1180
+ struct wsp_ggml_tensor * a);
1181
+
1182
+ /**
1183
+ * Truncates the fractional part of each element in the tensor (towards zero).
1184
+ * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
1185
+ * Similar to std::trunc in C/C++.
1186
+ */
1187
+
1188
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
1189
+ struct wsp_ggml_context * ctx,
1190
+ struct wsp_ggml_tensor * a);
1191
+
1192
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
1193
+ struct wsp_ggml_context * ctx,
1194
+ struct wsp_ggml_tensor * a);
1195
+
1196
+
1197
+
1198
+ // xIELU activation function
1199
+ // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
1200
+ // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
1201
+ // that constrain the positive and negative source alpha values respectively
1202
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
1203
+ struct wsp_ggml_context * ctx,
1204
+ struct wsp_ggml_tensor * a,
1205
+ float alpha_n,
1206
+ float alpha_p,
1207
+ float beta,
1208
+ float eps);
1209
+
1151
1210
  // gated linear unit ops
1152
1211
  // A: n columns, r rows,
1153
1212
  // result is n / 2 columns, r rows,
@@ -1615,6 +1674,13 @@ extern "C" {
1615
1674
  float scale,
1616
1675
  float max_bias);
1617
1676
 
1677
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
1678
+ struct wsp_ggml_context * ctx,
1679
+ struct wsp_ggml_tensor * a,
1680
+ struct wsp_ggml_tensor * mask,
1681
+ float scale,
1682
+ float max_bias);
1683
+
1618
1684
  WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
1619
1685
  struct wsp_ggml_tensor * a,
1620
1686
  struct wsp_ggml_tensor * sinks);
@@ -24,6 +24,7 @@ struct job {
24
24
  int job_id;
25
25
  bool aborted = false;
26
26
  whisper_full_params params;
27
+ int n_processors = 1;
27
28
 
28
29
  ~job();
29
30
  bool is_aborted();