whisper.rn 0.4.0-rc.10 → 0.4.0-rc.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/android/src/main/CMakeLists.txt +9 -3
  2. package/cpp/amx/amx.cpp +220 -0
  3. package/cpp/amx/amx.h +8 -0
  4. package/cpp/amx/common.h +91 -0
  5. package/cpp/amx/mmq.cpp +2511 -0
  6. package/cpp/amx/mmq.h +10 -0
  7. package/cpp/ggml-alloc.c +6 -14
  8. package/cpp/ggml-backend-impl.h +50 -11
  9. package/cpp/ggml-backend-reg.cpp +409 -31
  10. package/cpp/ggml-backend.cpp +9 -3
  11. package/cpp/ggml-backend.h +18 -0
  12. package/cpp/ggml-common.h +41 -43
  13. package/cpp/ggml-cpp.h +1 -0
  14. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
  15. package/cpp/ggml-cpu-aarch64.h +2 -24
  16. package/cpp/ggml-cpu-impl.h +171 -11
  17. package/cpp/ggml-cpu-quants.c +1812 -389
  18. package/cpp/ggml-cpu-traits.cpp +36 -0
  19. package/cpp/ggml-cpu-traits.h +38 -0
  20. package/cpp/ggml-cpu.c +1432 -610
  21. package/cpp/ggml-cpu.cpp +131 -141
  22. package/cpp/ggml-cpu.h +10 -50
  23. package/cpp/ggml-impl.h +27 -11
  24. package/cpp/ggml-metal-impl.h +39 -0
  25. package/cpp/ggml-metal.h +1 -1
  26. package/cpp/ggml-metal.m +1031 -359
  27. package/cpp/ggml-opt.cpp +854 -0
  28. package/cpp/ggml-opt.h +216 -0
  29. package/cpp/ggml-quants.c +0 -9
  30. package/cpp/ggml-threading.h +4 -2
  31. package/cpp/ggml-whisper.metallib +0 -0
  32. package/cpp/ggml.c +501 -1537
  33. package/cpp/ggml.h +144 -171
  34. package/cpp/gguf.cpp +1329 -0
  35. package/cpp/gguf.h +202 -0
  36. package/cpp/whisper.cpp +254 -114
  37. package/cpp/whisper.h +6 -3
  38. package/lib/commonjs/version.json +1 -1
  39. package/lib/module/version.json +1 -1
  40. package/package.json +2 -1
  41. package/src/version.json +1 -1
  42. package/whisper-rn.podspec +2 -2
  43. package/cpp/README.md +0 -4
  44. package/cpp/ggml-aarch64.c +0 -129
  45. package/cpp/ggml-aarch64.h +0 -19
  46. package/cpp/ggml-backend.cpp.rej +0 -12
package/cpp/amx/mmq.h ADDED
@@ -0,0 +1,10 @@
1
+ #pragma once
2
+ #include "common.h"
3
+
4
+ size_t wsp_ggml_backend_amx_desired_wsize(const struct wsp_ggml_tensor * dst);
5
+
6
+ size_t wsp_ggml_backend_amx_get_alloc_size(const struct wsp_ggml_tensor * tensor);
7
+
8
+ void wsp_ggml_backend_amx_convert_weight(struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
9
+
10
+ void wsp_ggml_backend_amx_mul_mat(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
package/cpp/ggml-alloc.c CHANGED
@@ -37,6 +37,7 @@ static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const str
37
37
  return true;
38
38
  }
39
39
 
40
+ // ops that return true for this function must not use restrict pointers for their backend implementations
40
41
  static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
41
42
  switch (op) {
42
43
  case WSP_GGML_OP_SCALE:
@@ -52,8 +53,12 @@ static bool wsp_ggml_op_can_inplace(enum wsp_ggml_op op) {
52
53
  case WSP_GGML_OP_LOG:
53
54
  case WSP_GGML_OP_UNARY:
54
55
  case WSP_GGML_OP_ROPE:
56
+ case WSP_GGML_OP_ROPE_BACK:
57
+ case WSP_GGML_OP_SILU_BACK:
55
58
  case WSP_GGML_OP_RMS_NORM:
59
+ case WSP_GGML_OP_RMS_NORM_BACK:
56
60
  case WSP_GGML_OP_SOFT_MAX:
61
+ case WSP_GGML_OP_SOFT_MAX_BACK:
57
62
  return true;
58
63
 
59
64
  default:
@@ -534,7 +539,6 @@ static void wsp_ggml_gallocr_allocate_node(wsp_ggml_gallocr_t galloc, struct wsp
534
539
  size_t offset = wsp_ggml_dyn_tallocr_alloc(alloc, size, node);
535
540
  hn->buffer_id = buffer_id;
536
541
  hn->offset = offset;
537
- return;
538
542
  }
539
543
  }
540
544
 
@@ -985,19 +989,7 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
985
989
  this_size = WSP_GGML_PAD(wsp_ggml_backend_buft_get_alloc_size(buft, t), alignment);
986
990
  }
987
991
 
988
- if (this_size > max_size) {
989
- WSP_GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
990
- __func__, t->name,
991
- wsp_ggml_backend_buft_name(buft),
992
- this_size, max_size);
993
- for (size_t i = 0; i < n_buffers; i++) {
994
- wsp_ggml_backend_buffer_free(buffers[i]);
995
- }
996
- free(buffers);
997
- return NULL;
998
- }
999
-
1000
- if ((cur_buf_size + this_size) > max_size) {
992
+ if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
1001
993
  // allocate tensors in the current buffer
1002
994
  if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
1003
995
  return NULL;
@@ -8,6 +8,8 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
+ #define WSP_GGML_BACKEND_API_VERSION 1
12
+
11
13
  //
12
14
  // Backend buffer type
13
15
  //
@@ -63,20 +65,20 @@ extern "C" {
63
65
  enum wsp_ggml_backend_buffer_usage usage;
64
66
  };
65
67
 
66
- wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
68
+ WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
67
69
  wsp_ggml_backend_buffer_type_t buft,
68
70
  struct wsp_ggml_backend_buffer_i iface,
69
71
  void * context,
70
72
  size_t size);
71
73
 
72
74
  // do not use directly, use wsp_ggml_backend_tensor_copy instead
73
- bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
75
+ WSP_GGML_API bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
74
76
 
75
77
  // multi-buffer
76
78
  // buffer that contains a collection of buffers
77
- wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
78
- bool wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
79
- void wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
79
+ WSP_GGML_API wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
80
+ WSP_GGML_API bool wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
81
+ WSP_GGML_API void wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
80
82
 
81
83
  //
82
84
  // Backend (stream)
@@ -199,17 +201,54 @@ extern "C" {
199
201
  };
200
202
 
201
203
  struct wsp_ggml_backend_reg {
202
- // int api_version; // TODO: for dynamic loading
204
+ int api_version; // initialize to WSP_GGML_BACKEND_API_VERSION
203
205
  struct wsp_ggml_backend_reg_i iface;
204
206
  void * context;
205
207
  };
206
208
 
207
-
208
209
  // Internal backend registry API
209
- void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
210
- void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
211
- // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
212
- // typedef wsp_ggml_backend_register_t * (*wsp_ggml_backend_init)(void);
210
+ WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
211
+
212
+ // Add backend dynamic loading support to the backend
213
+
214
+ // Initialize the backend
215
+ typedef wsp_ggml_backend_reg_t (*wsp_ggml_backend_init_t)(void);
216
+ // Optional: obtain a score for the backend based on the system configuration
217
+ // Higher scores are preferred, 0 means the backend is not supported in the current system
218
+ typedef int (*wsp_ggml_backend_score_t)(void);
219
+
220
+ #ifdef WSP_GGML_BACKEND_DL
221
+ # ifdef __cplusplus
222
+ # define WSP_GGML_BACKEND_DL_IMPL(reg_fn) \
223
+ extern "C" { \
224
+ WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_init(void); \
225
+ } \
226
+ wsp_ggml_backend_reg_t wsp_ggml_backend_init(void) { \
227
+ return reg_fn(); \
228
+ }
229
+ # define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
230
+ extern "C" { \
231
+ WSP_GGML_BACKEND_API int wsp_ggml_backend_score(void); \
232
+ } \
233
+ int wsp_ggml_backend_score(void) { \
234
+ return score_fn(); \
235
+ }
236
+ # else
237
+ # define WSP_GGML_BACKEND_DL_IMPL(reg_fn) \
238
+ WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_init(void); \
239
+ wsp_ggml_backend_reg_t wsp_ggml_backend_init(void) { \
240
+ return reg_fn(); \
241
+ }
242
+ # define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
243
+ WSP_GGML_BACKEND_API int wsp_ggml_backend_score(void); \
244
+ int wsp_ggml_backend_score(void) { \
245
+ return score_fn(); \
246
+ }
247
+ # endif
248
+ #else
249
+ # define WSP_GGML_BACKEND_DL_IMPL(reg_fn)
250
+ # define WSP_GGML_BACKEND_DL_SCORE_IMPL(score_fn)
251
+ #endif
213
252
 
214
253
  #ifdef __cplusplus
215
254
  }