whisper.rn 0.4.0-rc.6 → 0.4.0-rc.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -115,7 +115,7 @@ public class RNWhisper implements LifecycleEventListener {
115
115
  promise.resolve(result);
116
116
  tasks.remove(this);
117
117
  }
118
- }.execute();
118
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
119
119
  tasks.put(task, "initContext");
120
120
  }
121
121
 
@@ -174,7 +174,7 @@ public class RNWhisper implements LifecycleEventListener {
174
174
  promise.resolve(data);
175
175
  tasks.remove(this);
176
176
  }
177
- }.execute();
177
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
178
178
  tasks.put(task, "transcribeFile-" + id);
179
179
  }
180
180
 
@@ -231,7 +231,7 @@ public class RNWhisper implements LifecycleEventListener {
231
231
  promise.resolve(null);
232
232
  tasks.remove(this);
233
233
  }
234
- }.execute();
234
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
235
235
  tasks.put(task, "abortTranscribe-" + id);
236
236
  }
237
237
 
@@ -272,7 +272,7 @@ public class RNWhisper implements LifecycleEventListener {
272
272
  promise.resolve(null);
273
273
  tasks.remove(this);
274
274
  }
275
- }.execute();
275
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
276
276
  tasks.put(task, "releaseContext-" + id);
277
277
  }
278
278
 
@@ -299,7 +299,7 @@ public class RNWhisper implements LifecycleEventListener {
299
299
  promise.resolve(null);
300
300
  tasks.remove(this);
301
301
  }
302
- }.execute();
302
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
303
303
  tasks.put(task, "releaseAllContexts");
304
304
  }
305
305
 
@@ -24,7 +24,7 @@ struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
24
24
 
25
25
  // select which device to run the Core ML model on
26
26
  MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
27
- //config.computeUnits = MLComputeUnitsCPUAndGPU;
27
+ // config.computeUnits = MLComputeUnitsCPUAndGPU;
28
28
  //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
29
29
  config.computeUnits = MLComputeUnitsAll;
30
30
 
package/cpp/ggml-alloc.c CHANGED
@@ -72,7 +72,7 @@ static void remove_allocated_tensor(wsp_ggml_tallocr_t alloc, struct wsp_ggml_te
72
72
 
73
73
  // check if a tensor is allocated by this buffer
74
74
  static bool wsp_ggml_tallocr_is_own(wsp_ggml_tallocr_t alloc, const struct wsp_ggml_tensor * tensor) {
75
- return tensor->buffer == alloc->buffer;
75
+ return tensor->buffer == alloc->buffer && (!tensor->view_src || tensor->view_src->buffer == alloc->buffer);
76
76
  }
77
77
 
78
78
  static bool wsp_ggml_is_view(struct wsp_ggml_tensor * t) {
@@ -102,8 +102,6 @@ void wsp_ggml_tallocr_alloc(wsp_ggml_tallocr_t alloc, struct wsp_ggml_tensor * t
102
102
  }
103
103
  }
104
104
 
105
- AT_PRINTF("block %d\n", best_fit_block);
106
-
107
105
  if (best_fit_block == -1) {
108
106
  // the last block is our last resort
109
107
  struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
@@ -117,6 +115,7 @@ void wsp_ggml_tallocr_alloc(wsp_ggml_tallocr_t alloc, struct wsp_ggml_tensor * t
117
115
  return;
118
116
  }
119
117
  }
118
+
120
119
  struct free_block * block = &alloc->free_blocks[best_fit_block];
121
120
  void * addr = block->addr;
122
121
  block->addr = (char*)block->addr + size;
@@ -129,6 +128,8 @@ void wsp_ggml_tallocr_alloc(wsp_ggml_tallocr_t alloc, struct wsp_ggml_tensor * t
129
128
  }
130
129
  }
131
130
 
131
+ AT_PRINTF("block %d, addr %p\n", best_fit_block, addr);
132
+
132
133
  tensor->data = addr;
133
134
  tensor->buffer = alloc->buffer;
134
135
  if (!alloc->measure) {
@@ -229,6 +230,7 @@ void wsp_ggml_tallocr_reset(wsp_ggml_tallocr_t alloc) {
229
230
  alloc->free_blocks[0].size = SIZE_MAX/2; // restrict maximum size of a measure allocator to half size_t max to avoid overflows
230
231
  } else {
231
232
  alloc->free_blocks[0].size = wsp_ggml_backend_buffer_get_size(alloc->buffer) - align_offset;
233
+ wsp_ggml_backend_buffer_reset(alloc->buffer);
232
234
  }
233
235
  }
234
236
 
@@ -263,9 +265,9 @@ wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure(size_t alignment) {
263
265
  return alloc;
264
266
  }
265
267
 
266
- wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure_from_backend(struct wsp_ggml_backend * backend) {
268
+ wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure_from_buft(struct wsp_ggml_backend_buffer_type * buft) {
267
269
  // create a backend buffer to get the correct tensor allocation sizes
268
- wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_alloc_buffer(backend, 1);
270
+ wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_buft_alloc_buffer(buft, 1);
269
271
 
270
272
  // TODO: move alloc initialization to a common wsp_ggml_tallocr_new_impl function
271
273
  wsp_ggml_tallocr_t alloc = wsp_ggml_tallocr_new_from_buffer(buffer);
@@ -275,13 +277,22 @@ wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure_from_backend(struct wsp_ggml_bac
275
277
  return alloc;
276
278
  }
277
279
 
278
- wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_backend(struct wsp_ggml_backend * backend, size_t size) {
279
- wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_alloc_buffer(backend, size);
280
+ wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure_from_backend(struct wsp_ggml_backend * backend) {
281
+ return wsp_ggml_tallocr_new_measure_from_buft(wsp_ggml_backend_get_default_buffer_type(backend));
282
+ }
283
+
284
+ wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_buft(struct wsp_ggml_backend_buffer_type * buft, size_t size) {
285
+ // create a backend buffer to get the correct tensor allocation sizes
286
+ wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_buft_alloc_buffer(buft, size);
280
287
  wsp_ggml_tallocr_t alloc = wsp_ggml_tallocr_new_from_buffer(buffer);
281
288
  alloc->buffer_owned = true;
282
289
  return alloc;
283
290
  }
284
291
 
292
+ wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_backend(struct wsp_ggml_backend * backend, size_t size) {
293
+ return wsp_ggml_tallocr_new_from_buft(wsp_ggml_backend_get_default_buffer_type(backend), size);
294
+ }
295
+
285
296
  wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_buffer(struct wsp_ggml_backend_buffer * buffer) {
286
297
  wsp_ggml_tallocr_t alloc = (wsp_ggml_tallocr_t)malloc(sizeof(struct wsp_ggml_tallocr));
287
298
 
@@ -449,11 +460,10 @@ static void init_view(wsp_ggml_gallocr_t galloc, struct wsp_ggml_tensor * view,
449
460
  if (update_backend) {
450
461
  view->backend = view->view_src->backend;
451
462
  }
452
- view->buffer = view->view_src->buffer;
463
+ // views are initialized in the alloc buffer rather than the view_src buffer
464
+ view->buffer = alloc->buffer;
453
465
  view->data = (char *)view->view_src->data + view->view_offs;
454
466
 
455
- // FIXME: the view should be initialized by the owning buffer, but currently this breaks the CUDA backend
456
- // due to the wsp_ggml_tensor_extra_gpu ring buffer overwriting the KV cache extras
457
467
  assert(wsp_ggml_tallocr_is_measure(alloc) || !view->buffer || view->buffer->buft == alloc->buffer->buft);
458
468
 
459
469
  if (!alloc->measure) {
@@ -736,6 +746,10 @@ void wsp_ggml_allocr_set_parse_seq(wsp_ggml_allocr_t alloc, const int * list, in
736
746
  }
737
747
 
738
748
  void wsp_ggml_allocr_free(wsp_ggml_allocr_t alloc) {
749
+ if (alloc == NULL) {
750
+ return;
751
+ }
752
+
739
753
  wsp_ggml_gallocr_free(alloc->galloc);
740
754
  wsp_ggml_tallocr_free(alloc->talloc);
741
755
  free(alloc);
@@ -775,11 +789,22 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
775
789
  }
776
790
 
777
791
  if (nbytes == 0) {
778
- fprintf(stderr, "%s: no tensors to allocate\n", __func__);
792
+ // all the tensors in the context are already allocated
793
+ #ifndef NDEBUG
794
+ fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__);
795
+ #endif
779
796
  return NULL;
780
797
  }
781
798
 
782
799
  wsp_ggml_backend_buffer_t buffer = wsp_ggml_backend_buft_alloc_buffer(buft, nbytes);
800
+ if (buffer == NULL) {
801
+ // failed to allocate buffer
802
+ #ifndef NDEBUG
803
+ fprintf(stderr, "%s: failed to allocate buffer\n", __func__);
804
+ #endif
805
+ return NULL;
806
+ }
807
+
783
808
  wsp_ggml_tallocr_t tallocr = wsp_ggml_tallocr_new_from_buffer(buffer);
784
809
 
785
810
  for (struct wsp_ggml_tensor * t = wsp_ggml_get_first_tensor(ctx); t != NULL; t = wsp_ggml_get_next_tensor(ctx, t)) {
@@ -789,6 +814,11 @@ wsp_ggml_backend_buffer_t wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct ws
789
814
  } else {
790
815
  wsp_ggml_backend_view_init(buffer, t);
791
816
  }
817
+ } else {
818
+ if (t->view_src != NULL) {
819
+ // view of a pre-allocated tensor
820
+ wsp_ggml_backend_view_init(buffer, t);
821
+ }
792
822
  }
793
823
  }
794
824
 
package/cpp/ggml-alloc.h CHANGED
@@ -52,8 +52,10 @@ typedef struct wsp_ggml_tallocr * wsp_ggml_tallocr_t;
52
52
 
53
53
  WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new(void * data, size_t size, size_t alignment);
54
54
  WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure(size_t alignment);
55
- WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_buffer(struct wsp_ggml_backend_buffer * buffer);
55
+ WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_buft(struct wsp_ggml_backend_buffer_type * buft, size_t size);
56
56
  WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_backend(struct wsp_ggml_backend * backend, size_t size); // allocates an owned buffer
57
+ WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_from_buffer(struct wsp_ggml_backend_buffer * buffer);
58
+ WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure_from_buft(struct wsp_ggml_backend_buffer_type * buft);
57
59
  WSP_GGML_API wsp_ggml_tallocr_t wsp_ggml_tallocr_new_measure_from_backend(struct wsp_ggml_backend * backend);
58
60
 
59
61
  WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_tallocr_get_buffer(wsp_ggml_tallocr_t talloc);
@@ -16,10 +16,14 @@ extern "C" {
16
16
  typedef void * wsp_ggml_backend_buffer_type_context_t;
17
17
 
18
18
  struct wsp_ggml_backend_buffer_type_i {
19
- wsp_ggml_backend_buffer_t (*alloc_buffer) (wsp_ggml_backend_buffer_type_t buft, size_t size);
20
- size_t (*get_alignment) (wsp_ggml_backend_buffer_type_t buft); // tensor alignment
21
- size_t (*get_alloc_size) (wsp_ggml_backend_buffer_type_t buft, struct wsp_ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
22
- bool (*supports_backend)(wsp_ggml_backend_buffer_type_t buft, wsp_ggml_backend_t backend); // check if the buffer type is usable by the backend
19
+ const char * (*WSP_GGML_CALL get_name) (wsp_ggml_backend_buffer_type_t buft);
20
+ wsp_ggml_backend_buffer_t (*WSP_GGML_CALL alloc_buffer) (wsp_ggml_backend_buffer_type_t buft, size_t size);
21
+ size_t (*WSP_GGML_CALL get_alignment) (wsp_ggml_backend_buffer_type_t buft); // tensor alignment
22
+ size_t (*WSP_GGML_CALL get_alloc_size) (wsp_ggml_backend_buffer_type_t buft, const struct wsp_ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
23
+ bool (*WSP_GGML_CALL supports_backend)(wsp_ggml_backend_buffer_type_t buft, wsp_ggml_backend_t backend); // check if the buffer type is usable by the backend
24
+ // check if tensor data is in host memory
25
+ // should be equivalent to supports_backend(buft, wsp_ggml_backend_cpu_init())
26
+ bool (*WSP_GGML_CALL is_host) (wsp_ggml_backend_buffer_type_t buft);
23
27
  };
24
28
 
25
29
  struct wsp_ggml_backend_buffer_type {
@@ -31,15 +35,15 @@ extern "C" {
31
35
  typedef void * wsp_ggml_backend_buffer_context_t;
32
36
 
33
37
  struct wsp_ggml_backend_buffer_i {
34
- void (*free_buffer)(wsp_ggml_backend_buffer_t buffer);
35
- //void (*reset) (wsp_ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
36
- void * (*get_base) (wsp_ggml_backend_buffer_t buffer);
37
- void (*init_tensor)(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
38
- void (*set_tensor) (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
39
- void (*get_tensor) (wsp_ggml_backend_buffer_t buffer, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
40
- // (optional) copy tensor between different buffer-type, allow for single-copy tranfers
41
- void (*cpy_tensor_from)(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
42
- void (*cpy_tensor_to) (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
38
+ const char * (*WSP_GGML_CALL get_name) (wsp_ggml_backend_buffer_t buffer);
39
+ void (*WSP_GGML_CALL free_buffer)(wsp_ggml_backend_buffer_t buffer);
40
+ void * (*WSP_GGML_CALL get_base) (wsp_ggml_backend_buffer_t buffer);
41
+ void (*WSP_GGML_CALL init_tensor)(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
42
+ void (*WSP_GGML_CALL set_tensor) (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
43
+ void (*WSP_GGML_CALL get_tensor) (wsp_ggml_backend_buffer_t buffer, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
44
+ bool (*WSP_GGML_CALL cpy_tensor) (wsp_ggml_backend_buffer_t buffer, const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
45
+ void (*WSP_GGML_CALL clear) (wsp_ggml_backend_buffer_t buffer, uint8_t value);
46
+ void (*WSP_GGML_CALL reset) (wsp_ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
43
47
  };
44
48
 
45
49
  struct wsp_ggml_backend_buffer {
@@ -47,14 +51,17 @@ extern "C" {
47
51
  wsp_ggml_backend_buffer_type_t buft;
48
52
  wsp_ggml_backend_buffer_context_t context;
49
53
  size_t size;
54
+ enum wsp_ggml_backend_buffer_usage usage;
50
55
  };
51
56
 
52
- wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
57
+ WSP_GGML_CALL wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
53
58
  wsp_ggml_backend_buffer_type_t buft,
54
59
  struct wsp_ggml_backend_buffer_i iface,
55
60
  wsp_ggml_backend_buffer_context_t context,
56
61
  size_t size);
57
62
 
63
+ // do not use directly, use wsp_ggml_backend_tensor_copy instead
64
+ bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
58
65
 
59
66
  //
60
67
  // Backend
@@ -63,33 +70,31 @@ extern "C" {
63
70
  typedef void * wsp_ggml_backend_context_t;
64
71
 
65
72
  struct wsp_ggml_backend_i {
66
- const char * (*get_name)(wsp_ggml_backend_t backend);
73
+ const char * (*WSP_GGML_CALL get_name)(wsp_ggml_backend_t backend);
67
74
 
68
- void (*free)(wsp_ggml_backend_t backend);
75
+ void (*WSP_GGML_CALL free)(wsp_ggml_backend_t backend);
69
76
 
70
77
  // buffer allocation
71
- wsp_ggml_backend_buffer_type_t (*get_default_buffer_type)(wsp_ggml_backend_t backend);
78
+ wsp_ggml_backend_buffer_type_t (*WSP_GGML_CALL get_default_buffer_type)(wsp_ggml_backend_t backend);
72
79
 
73
- // (optional) asynchroneous tensor data access
74
- void (*set_tensor_async)(wsp_ggml_backend_t backend, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
75
- void (*get_tensor_async)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
80
+ // (optional) asynchronous tensor data access
81
+ void (*WSP_GGML_CALL set_tensor_async)(wsp_ggml_backend_t backend, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
82
+ void (*WSP_GGML_CALL get_tensor_async)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
83
+ bool (*WSP_GGML_CALL cpy_tensor_async)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
76
84
 
77
- // (optional) asynchroneous tensor copy
78
- void (*cpy_tensor_from_async)(wsp_ggml_backend_t backend, struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
79
- void (*cpy_tensor_to_async) (wsp_ggml_backend_t backend, struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
80
-
81
- void (*synchronize) (wsp_ggml_backend_t backend);
85
+ // (optional) complete all pending operations
86
+ void (*WSP_GGML_CALL synchronize)(wsp_ggml_backend_t backend);
82
87
 
83
88
  // compute graph with a plan
84
- wsp_ggml_backend_graph_plan_t (*graph_plan_create) (wsp_ggml_backend_t backend, struct wsp_ggml_cgraph * cgraph);
85
- void (*graph_plan_free) (wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
86
- void (*graph_plan_compute)(wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
89
+ wsp_ggml_backend_graph_plan_t (*WSP_GGML_CALL graph_plan_create) (wsp_ggml_backend_t backend, const struct wsp_ggml_cgraph * cgraph);
90
+ void (*WSP_GGML_CALL graph_plan_free) (wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
91
+ void (*WSP_GGML_CALL graph_plan_compute)(wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
87
92
 
88
- // compute graph without a plan
89
- void (*graph_compute)(wsp_ggml_backend_t backend, struct wsp_ggml_cgraph * cgraph);
93
+ // compute graph without a plan (async)
94
+ bool (*WSP_GGML_CALL graph_compute)(wsp_ggml_backend_t backend, struct wsp_ggml_cgraph * cgraph);
90
95
 
91
96
  // check if the backend supports an operation
92
- bool (*supports_op)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * op);
97
+ bool (*WSP_GGML_CALL supports_op)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * op);
93
98
  };
94
99
 
95
100
  struct wsp_ggml_backend {
@@ -98,14 +103,13 @@ extern "C" {
98
103
  wsp_ggml_backend_context_t context;
99
104
  };
100
105
 
101
-
102
106
  //
103
107
  // Backend registry
104
108
  //
105
109
 
106
- typedef wsp_ggml_backend_t (*wsp_ggml_backend_init_fn)(const char * params, void * user_data);
110
+ typedef wsp_ggml_backend_t (*WSP_GGML_CALL wsp_ggml_backend_init_fn)(const char * params, void * user_data);
107
111
 
108
- void wsp_ggml_backend_register(const char * name, wsp_ggml_backend_init_fn init_fn, wsp_ggml_backend_buffer_type_t default_buffer_type, void * user_data);
112
+ WSP_GGML_CALL void wsp_ggml_backend_register(const char * name, wsp_ggml_backend_init_fn init_fn, wsp_ggml_backend_buffer_type_t default_buffer_type, void * user_data);
109
113
 
110
114
  #ifdef __cplusplus
111
115
  }