whisper.rn 0.4.0-rc.1 → 0.4.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +6 -6
  2. package/android/build.gradle +4 -0
  3. package/android/src/main/CMakeLists.txt +14 -0
  4. package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
  5. package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
  6. package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
  7. package/android/src/main/jni-utils.h +76 -0
  8. package/android/src/main/jni.cpp +226 -109
  9. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  10. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
  11. package/cpp/README.md +1 -1
  12. package/cpp/coreml/whisper-encoder-impl.h +1 -1
  13. package/cpp/coreml/whisper-encoder.h +4 -0
  14. package/cpp/coreml/whisper-encoder.mm +5 -3
  15. package/cpp/ggml-aarch64.c +129 -0
  16. package/cpp/ggml-aarch64.h +19 -0
  17. package/cpp/ggml-alloc.c +805 -400
  18. package/cpp/ggml-alloc.h +60 -10
  19. package/cpp/ggml-backend-impl.h +216 -0
  20. package/cpp/ggml-backend-reg.cpp +204 -0
  21. package/cpp/ggml-backend.cpp +1996 -0
  22. package/cpp/ggml-backend.cpp.rej +12 -0
  23. package/cpp/ggml-backend.h +336 -0
  24. package/cpp/ggml-common.h +1853 -0
  25. package/cpp/ggml-cpp.h +38 -0
  26. package/cpp/ggml-cpu-aarch64.c +3560 -0
  27. package/cpp/ggml-cpu-aarch64.h +30 -0
  28. package/cpp/ggml-cpu-impl.h +371 -0
  29. package/cpp/ggml-cpu-quants.c +10822 -0
  30. package/cpp/ggml-cpu-quants.h +63 -0
  31. package/cpp/ggml-cpu.c +13970 -0
  32. package/cpp/ggml-cpu.cpp +663 -0
  33. package/cpp/ggml-cpu.h +177 -0
  34. package/cpp/ggml-impl.h +551 -0
  35. package/cpp/ggml-metal-impl.h +249 -0
  36. package/cpp/ggml-metal.h +24 -43
  37. package/cpp/ggml-metal.m +4190 -1075
  38. package/cpp/ggml-quants.c +5247 -0
  39. package/cpp/ggml-quants.h +100 -0
  40. package/cpp/ggml-threading.cpp +12 -0
  41. package/cpp/ggml-threading.h +12 -0
  42. package/cpp/ggml-whisper.metallib +0 -0
  43. package/cpp/ggml.c +5474 -18763
  44. package/cpp/ggml.h +833 -628
  45. package/cpp/rn-audioutils.cpp +68 -0
  46. package/cpp/rn-audioutils.h +14 -0
  47. package/cpp/rn-whisper-log.h +11 -0
  48. package/cpp/rn-whisper.cpp +221 -52
  49. package/cpp/rn-whisper.h +50 -15
  50. package/cpp/whisper.cpp +2872 -1371
  51. package/cpp/whisper.h +170 -41
  52. package/ios/RNWhisper.mm +139 -46
  53. package/ios/RNWhisperAudioUtils.h +1 -2
  54. package/ios/RNWhisperAudioUtils.m +18 -67
  55. package/ios/RNWhisperContext.h +11 -8
  56. package/ios/RNWhisperContext.mm +195 -150
  57. package/jest/mock.js +15 -2
  58. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  59. package/lib/commonjs/index.js +76 -28
  60. package/lib/commonjs/index.js.map +1 -1
  61. package/lib/commonjs/version.json +1 -1
  62. package/lib/module/NativeRNWhisper.js.map +1 -1
  63. package/lib/module/index.js +76 -28
  64. package/lib/module/index.js.map +1 -1
  65. package/lib/module/version.json +1 -1
  66. package/lib/typescript/NativeRNWhisper.d.ts +13 -4
  67. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  68. package/lib/typescript/index.d.ts +37 -5
  69. package/lib/typescript/index.d.ts.map +1 -1
  70. package/package.json +9 -7
  71. package/src/NativeRNWhisper.ts +20 -4
  72. package/src/index.ts +98 -42
  73. package/src/version.json +1 -1
  74. package/whisper-rn.podspec +11 -18
  75. package/cpp/ggml-metal.metal +0 -2353
package/cpp/ggml-alloc.h CHANGED
@@ -6,20 +6,70 @@
6
6
  extern "C" {
7
7
  #endif
8
8
 
9
+ typedef struct wsp_ggml_backend_buffer_type * wsp_ggml_backend_buffer_type_t;
10
+ typedef struct wsp_ggml_backend_buffer * wsp_ggml_backend_buffer_t;
11
+ typedef struct wsp_ggml_backend * wsp_ggml_backend_t;
9
12
 
10
- WSP_GGML_API struct wsp_ggml_allocr * wsp_ggml_allocr_new(void * data, size_t size, size_t alignment);
11
- WSP_GGML_API struct wsp_ggml_allocr * wsp_ggml_allocr_new_measure(size_t alignment);
13
+ // Tensor allocator
14
+ struct wsp_ggml_tallocr {
15
+ wsp_ggml_backend_buffer_t buffer;
16
+ void * base;
17
+ size_t alignment;
18
+ size_t offset;
19
+ };
12
20
 
13
- // tell the allocator to parse nodes following the order described in the list
14
- // you should call this if your graph are optimized to execute out-of-order
15
- WSP_GGML_API void wsp_ggml_allocr_set_parse_seq(struct wsp_ggml_allocr * alloc, const int * list, int n);
21
+ WSP_GGML_API struct wsp_ggml_tallocr wsp_ggml_tallocr_new(wsp_ggml_backend_buffer_t buffer);
22
+ WSP_GGML_API void wsp_ggml_tallocr_alloc(struct wsp_ggml_tallocr * talloc, struct wsp_ggml_tensor * tensor);
16
23
 
17
- WSP_GGML_API void wsp_ggml_allocr_free(struct wsp_ggml_allocr * alloc);
18
- WSP_GGML_API bool wsp_ggml_allocr_is_measure(struct wsp_ggml_allocr * alloc);
19
- WSP_GGML_API void wsp_ggml_allocr_reset(struct wsp_ggml_allocr * alloc);
20
- WSP_GGML_API void wsp_ggml_allocr_alloc(struct wsp_ggml_allocr * alloc, struct wsp_ggml_tensor * tensor);
21
- WSP_GGML_API size_t wsp_ggml_allocr_alloc_graph(struct wsp_ggml_allocr * alloc, struct wsp_ggml_cgraph * graph);
24
+ // Graph allocator
25
+ /*
26
+ Example usage:
27
+ wsp_ggml_gallocr_t galloc = wsp_ggml_gallocr_new(wsp_ggml_backend_cpu_buffer_type());
22
28
 
29
+ // optional: create a worst-case graph and reserve the buffers to avoid reallocations
30
+ wsp_ggml_gallocr_reserve(galloc, build_graph(max_batch));
31
+
32
+ // allocate the graph
33
+ struct wsp_ggml_cgraph * graph = build_graph(batch);
34
+ wsp_ggml_gallocr_alloc_graph(galloc, graph);
35
+
36
+ printf("compute buffer size: %zu bytes\n", wsp_ggml_gallocr_get_buffer_size(galloc, 0));
37
+
38
+ // evaluate the graph
39
+ wsp_ggml_backend_graph_compute(backend, graph);
40
+ */
41
+
42
+ // special tensor flags for use with the graph allocator:
43
+ // wsp_ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
44
+ // wsp_ggml_set_output(): output tensors are never freed and never overwritten
45
+
46
+ typedef struct wsp_ggml_gallocr * wsp_ggml_gallocr_t;
47
+
48
+ WSP_GGML_API wsp_ggml_gallocr_t wsp_ggml_gallocr_new(wsp_ggml_backend_buffer_type_t buft);
49
+ WSP_GGML_API wsp_ggml_gallocr_t wsp_ggml_gallocr_new_n(wsp_ggml_backend_buffer_type_t * bufts, int n_bufs);
50
+ WSP_GGML_API void wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
51
+
52
+ // pre-allocate buffers from a measure graph - does not allocate or modify the graph
53
+ // call with a worst-case graph to avoid buffer reallocations
54
+ // not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
55
+ // returns false if the buffer allocation failed
56
+ WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
57
+ WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
58
+ wsp_ggml_gallocr_t galloc,
59
+ struct wsp_ggml_cgraph * graph,
60
+ const int * node_buffer_ids,
61
+ const int * leaf_buffer_ids);
62
+
63
+ // automatic reallocation if the topology changes when using a single buffer
64
+ // returns false if using multiple buffers and a re-allocation is needed (call wsp_ggml_gallocr_reserve_n first to set the node buffers)
65
+ WSP_GGML_API bool wsp_ggml_gallocr_alloc_graph(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
66
+
67
+ WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc, int buffer_id);
68
+
69
+ // Utils
70
+ // Create a buffer and allocate all the tensors in a wsp_ggml_context
71
+ WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
72
+ WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);
23
73
 
24
74
  #ifdef __cplusplus
25
75
  }
@@ -0,0 +1,216 @@
1
+ #pragma once
2
+
3
+ // ggml-backend internal header
4
+
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ //
12
+ // Backend buffer type
13
+ //
14
+
15
+ struct wsp_ggml_backend_buffer_type_i {
16
+ const char * (*get_name) (wsp_ggml_backend_buffer_type_t buft);
17
+ // allocate a buffer of this type
18
+ wsp_ggml_backend_buffer_t (*alloc_buffer) (wsp_ggml_backend_buffer_type_t buft, size_t size);
19
+ // tensor alignment
20
+ size_t (*get_alignment) (wsp_ggml_backend_buffer_type_t buft);
21
+ // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
22
+ size_t (*get_max_size) (wsp_ggml_backend_buffer_type_t buft);
23
+ // (optional) data size needed to allocate the tensor, including padding (defaults to wsp_ggml_nbytes)
24
+ size_t (*get_alloc_size)(wsp_ggml_backend_buffer_type_t buft, const struct wsp_ggml_tensor * tensor);
25
+ // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
26
+ bool (*is_host) (wsp_ggml_backend_buffer_type_t buft);
27
+ };
28
+
29
+ struct wsp_ggml_backend_buffer_type {
30
+ struct wsp_ggml_backend_buffer_type_i iface;
31
+ wsp_ggml_backend_dev_t device;
32
+ void * context;
33
+ };
34
+
35
+ //
36
+ // Backend buffer
37
+ //
38
+
39
+ struct wsp_ggml_backend_buffer_i {
40
+ // (optional) free the buffer
41
+ void (*free_buffer) (wsp_ggml_backend_buffer_t buffer);
42
+ // base address of the buffer
43
+ void * (*get_base) (wsp_ggml_backend_buffer_t buffer);
44
+ // (optional) initialize a tensor in the buffer (eg. add tensor extras)
45
+ void (*init_tensor) (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor);
46
+ // tensor data access
47
+ void (*memset_tensor)(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
48
+ void (*set_tensor) (wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
49
+ void (*get_tensor) (wsp_ggml_backend_buffer_t buffer, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
50
+ // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
51
+ bool (*cpy_tensor) (wsp_ggml_backend_buffer_t buffer, const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
52
+ // clear the entire buffer
53
+ void (*clear) (wsp_ggml_backend_buffer_t buffer, uint8_t value);
54
+ // (optional) reset any internal state due to tensor initialization, such as tensor extras
55
+ void (*reset) (wsp_ggml_backend_buffer_t buffer);
56
+ };
57
+
58
+ struct wsp_ggml_backend_buffer {
59
+ struct wsp_ggml_backend_buffer_i iface;
60
+ wsp_ggml_backend_buffer_type_t buft;
61
+ void * context;
62
+ size_t size;
63
+ enum wsp_ggml_backend_buffer_usage usage;
64
+ };
65
+
66
+ wsp_ggml_backend_buffer_t wsp_ggml_backend_buffer_init(
67
+ wsp_ggml_backend_buffer_type_t buft,
68
+ struct wsp_ggml_backend_buffer_i iface,
69
+ void * context,
70
+ size_t size);
71
+
72
+ // do not use directly, use wsp_ggml_backend_tensor_copy instead
73
+ bool wsp_ggml_backend_buffer_copy_tensor(const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
74
+
75
+ // multi-buffer
76
+ // buffer that contains a collection of buffers
77
+ wsp_ggml_backend_buffer_t wsp_ggml_backend_multi_buffer_alloc_buffer(wsp_ggml_backend_buffer_t * buffers, size_t n_buffers);
78
+ bool wsp_ggml_backend_buffer_is_multi_buffer(wsp_ggml_backend_buffer_t buffer);
79
+ void wsp_ggml_backend_multi_buffer_set_usage(wsp_ggml_backend_buffer_t buffer, enum wsp_ggml_backend_buffer_usage usage);
80
+
81
+ //
82
+ // Backend (stream)
83
+ //
84
+
85
+ struct wsp_ggml_backend_i {
86
+ const char * (*get_name)(wsp_ggml_backend_t backend);
87
+
88
+ void (*free)(wsp_ggml_backend_t backend);
89
+
90
+ // (optional) asynchronous tensor data access
91
+ void (*set_tensor_async)(wsp_ggml_backend_t backend, struct wsp_ggml_tensor * tensor, const void * data, size_t offset, size_t size);
92
+ void (*get_tensor_async)(wsp_ggml_backend_t backend, const struct wsp_ggml_tensor * tensor, void * data, size_t offset, size_t size);
93
+ bool (*cpy_tensor_async)(wsp_ggml_backend_t backend_src, wsp_ggml_backend_t backend_dst, const struct wsp_ggml_tensor * src, struct wsp_ggml_tensor * dst);
94
+
95
+ // (optional) complete all pending operations (required if the backend supports async operations)
96
+ void (*synchronize)(wsp_ggml_backend_t backend);
97
+
98
+ // (optional) graph plans (not used currently)
99
+ // compute graph with a plan
100
+ wsp_ggml_backend_graph_plan_t (*graph_plan_create) (wsp_ggml_backend_t backend, const struct wsp_ggml_cgraph * cgraph);
101
+ void (*graph_plan_free) (wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
102
+ // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
103
+ void (*graph_plan_update) (wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan, const struct wsp_ggml_cgraph * cgraph);
104
+ // compute the graph with the plan
105
+ enum wsp_ggml_status (*graph_plan_compute)(wsp_ggml_backend_t backend, wsp_ggml_backend_graph_plan_t plan);
106
+
107
+ // compute graph (always async if supported by the backend)
108
+ enum wsp_ggml_status (*graph_compute) (wsp_ggml_backend_t backend, struct wsp_ggml_cgraph * cgraph);
109
+
110
+ // (optional) event synchronization
111
+ // record an event on this stream
112
+ void (*event_record)(wsp_ggml_backend_t backend, wsp_ggml_backend_event_t event);
113
+ // wait for an event on on a different stream
114
+ void (*event_wait) (wsp_ggml_backend_t backend, wsp_ggml_backend_event_t event);
115
+ };
116
+
117
+ struct wsp_ggml_backend {
118
+ wsp_ggml_guid_t guid;
119
+ struct wsp_ggml_backend_i iface;
120
+ wsp_ggml_backend_dev_t device;
121
+ void * context;
122
+ };
123
+
124
+ struct wsp_ggml_backend_event {
125
+ struct wsp_ggml_backend_device * device;
126
+ void * context;
127
+ };
128
+
129
+ //
130
+ // Backend device
131
+ //
132
+
133
+ // Note: if additional properties are needed, we should add a struct with all of them
134
+ // the current functions to obtain the properties can remain, since they are more convenient for often used properties
135
+ struct wsp_ggml_backend_device_i {
136
+ // device name: short identifier for this device, such as "CPU" or "CUDA0"
137
+ const char * (*get_name)(wsp_ggml_backend_dev_t dev);
138
+
139
+ // device description: short informative description of the device, could be the model name
140
+ const char * (*get_description)(wsp_ggml_backend_dev_t dev);
141
+
142
+ // device memory in bytes
143
+ void (*get_memory)(wsp_ggml_backend_dev_t dev, size_t * free, size_t * total);
144
+
145
+ // device type
146
+ enum wsp_ggml_backend_dev_type (*get_type)(wsp_ggml_backend_dev_t dev);
147
+
148
+ // device properties
149
+ void (*get_props)(wsp_ggml_backend_dev_t dev, struct wsp_ggml_backend_dev_props * props);
150
+
151
+ // backend (stream) initialization
152
+ wsp_ggml_backend_t (*init_backend)(wsp_ggml_backend_dev_t dev, const char * params);
153
+
154
+ // preferred buffer type
155
+ wsp_ggml_backend_buffer_type_t (*get_buffer_type)(wsp_ggml_backend_dev_t dev);
156
+
157
+ // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
158
+ wsp_ggml_backend_buffer_type_t (*get_host_buffer_type)(wsp_ggml_backend_dev_t dev);
159
+
160
+ // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
161
+ wsp_ggml_backend_buffer_t (*buffer_from_host_ptr)(wsp_ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
162
+
163
+ // check if the backend can compute an operation
164
+ bool (*supports_op)(wsp_ggml_backend_dev_t dev, const struct wsp_ggml_tensor * op);
165
+
166
+ // check if the backend can use tensors allocated in a buffer type
167
+ bool (*supports_buft)(wsp_ggml_backend_dev_t dev, wsp_ggml_backend_buffer_type_t buft);
168
+
169
+ // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
170
+ // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
171
+ bool (*offload_op)(wsp_ggml_backend_dev_t dev, const struct wsp_ggml_tensor * op);
172
+
173
+ // (optional) event synchronization
174
+ wsp_ggml_backend_event_t (*event_new) (wsp_ggml_backend_dev_t dev);
175
+ void (*event_free) (wsp_ggml_backend_dev_t dev, wsp_ggml_backend_event_t event);
176
+ void (*event_synchronize) (wsp_ggml_backend_dev_t dev, wsp_ggml_backend_event_t event);
177
+ };
178
+
179
+ struct wsp_ggml_backend_device {
180
+ struct wsp_ggml_backend_device_i iface;
181
+ wsp_ggml_backend_reg_t reg;
182
+ void * context;
183
+ };
184
+
185
+ //
186
+ // Backend (reg)
187
+ //
188
+
189
+ struct wsp_ggml_backend_reg_i {
190
+ const char * (*get_name)(wsp_ggml_backend_reg_t reg);
191
+
192
+ // enumerate available devices
193
+ size_t (*get_device_count)(wsp_ggml_backend_reg_t reg);
194
+ wsp_ggml_backend_dev_t (*get_device)(wsp_ggml_backend_reg_t reg, size_t index);
195
+
196
+ // (optional) get a pointer to a function in the backend
197
+ // backends can add custom functions that are not part of the standard ggml-backend interface
198
+ void * (*get_proc_address)(wsp_ggml_backend_reg_t reg, const char * name);
199
+ };
200
+
201
+ struct wsp_ggml_backend_reg {
202
+ // int api_version; // TODO: for dynamic loading
203
+ struct wsp_ggml_backend_reg_i iface;
204
+ void * context;
205
+ };
206
+
207
+
208
+ // Internal backend registry API
209
+ void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
210
+ void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
211
+ // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
212
+ // typedef wsp_ggml_backend_register_t * (*wsp_ggml_backend_init)(void);
213
+
214
+ #ifdef __cplusplus
215
+ }
216
+ #endif
@@ -0,0 +1,204 @@
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-cpu.h"
4
+ #include "ggml-impl.h"
5
+ #include <cstring>
6
+ #include <vector>
7
+
8
+ // Backend registry
9
+
10
+ #ifdef WSP_GGML_USE_CUDA
11
+ #include "ggml-cuda.h"
12
+ #endif
13
+
14
+ #ifdef WSP_GGML_USE_METAL
15
+ #include <TargetConditionals.h>
16
+
17
+ #if !TARGET_OS_SIMULATOR
18
+ #include "ggml-metal.h"
19
+ #endif
20
+
21
+ #endif
22
+
23
+ #ifdef WSP_GGML_USE_SYCL
24
+ #include "ggml-sycl.h"
25
+ #endif
26
+
27
+ #ifdef WSP_GGML_USE_VULKAN
28
+ #include "ggml-vulkan.h"
29
+ #endif
30
+
31
+ #ifdef WSP_GGML_USE_BLAS
32
+ #include "ggml-blas.h"
33
+ #endif
34
+
35
+ #ifdef WSP_GGML_USE_RPC
36
+ #include "ggml-rpc.h"
37
+ #endif
38
+
39
+ #ifdef WSP_GGML_USE_AMX
40
+ # include "ggml-amx.h"
41
+ #endif
42
+
43
+ #ifdef WSP_GGML_USE_CANN
44
+ #include "ggml-cann.h"
45
+ #endif
46
+
47
+ #ifdef WSP_GGML_USE_KOMPUTE
48
+ #include "ggml-kompute.h"
49
+ #endif
50
+
51
+ struct wsp_ggml_backend_registry {
52
+ std::vector<wsp_ggml_backend_reg_t> backends;
53
+ std::vector<wsp_ggml_backend_dev_t> devices;
54
+
55
+ wsp_ggml_backend_registry() {
56
+ #ifdef WSP_GGML_USE_CUDA
57
+ register_backend(wsp_ggml_backend_cuda_reg());
58
+ #endif
59
+ #ifdef WSP_GGML_USE_METAL
60
+
61
+ #if !TARGET_OS_SIMULATOR
62
+ register_backend(wsp_ggml_backend_metal_reg());
63
+ #endif
64
+
65
+ #endif
66
+ #ifdef WSP_GGML_USE_SYCL
67
+ register_backend(wsp_ggml_backend_sycl_reg());
68
+ #endif
69
+ #ifdef WSP_GGML_USE_VULKAN
70
+ register_backend(wsp_ggml_backend_vk_reg());
71
+ #endif
72
+ #ifdef WSP_GGML_USE_CANN
73
+ register_backend(wsp_ggml_backend_cann_reg());
74
+ #endif
75
+ #ifdef WSP_GGML_USE_BLAS
76
+ register_backend(wsp_ggml_backend_blas_reg());
77
+ #endif
78
+ #ifdef WSP_GGML_USE_RPC
79
+ register_backend(wsp_ggml_backend_rpc_reg());
80
+ #endif
81
+ #ifdef WSP_GGML_USE_AMX
82
+ register_backend(wsp_ggml_backend_amx_reg());
83
+ #endif
84
+ #ifdef WSP_GGML_USE_KOMPUTE
85
+ register_backend(wsp_ggml_backend_kompute_reg());
86
+ #endif
87
+
88
+ register_backend(wsp_ggml_backend_cpu_reg());
89
+ }
90
+
91
+ void register_backend(wsp_ggml_backend_reg_t reg) {
92
+ if (!reg) {
93
+ return;
94
+ }
95
+
96
+ #ifndef NDEBUG
97
+ WSP_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
98
+ __func__, wsp_ggml_backend_reg_name(reg), wsp_ggml_backend_reg_dev_count(reg));
99
+ #endif
100
+ backends.push_back(reg);
101
+ for (size_t i = 0; i < wsp_ggml_backend_reg_dev_count(reg); i++) {
102
+ register_device(wsp_ggml_backend_reg_dev_get(reg, i));
103
+ }
104
+ }
105
+
106
+ void register_device(wsp_ggml_backend_dev_t device) {
107
+ #ifndef NDEBUG
108
+ WSP_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, wsp_ggml_backend_dev_name(device), wsp_ggml_backend_dev_description(device));
109
+ #endif
110
+ devices.push_back(device);
111
+ }
112
+ };
113
+
114
+ static wsp_ggml_backend_registry & get_reg() {
115
+ static wsp_ggml_backend_registry reg;
116
+ return reg;
117
+ }
118
+
119
+ // Internal API
120
+ void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg) {
121
+ get_reg().register_backend(reg);
122
+ }
123
+
124
+ void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device) {
125
+ get_reg().register_device(device);
126
+ }
127
+
128
+ // Backend (reg) enumeration
129
+ size_t wsp_ggml_backend_reg_count() {
130
+ return get_reg().backends.size();
131
+ }
132
+
133
+ wsp_ggml_backend_reg_t wsp_ggml_backend_reg_get(size_t index) {
134
+ WSP_GGML_ASSERT(index < wsp_ggml_backend_reg_count());
135
+ return get_reg().backends[index];
136
+ }
137
+
138
+ wsp_ggml_backend_reg_t wsp_ggml_backend_reg_by_name(const char * name) {
139
+ for (size_t i = 0; i < wsp_ggml_backend_reg_count(); i++) {
140
+ wsp_ggml_backend_reg_t reg = wsp_ggml_backend_reg_get(i);
141
+ if (std::strcmp(wsp_ggml_backend_reg_name(reg), name) == 0) {
142
+ return reg;
143
+ }
144
+ }
145
+ return NULL;
146
+ }
147
+
148
+ // Device enumeration
149
+ size_t wsp_ggml_backend_dev_count() {
150
+ return get_reg().devices.size();
151
+ }
152
+
153
+ wsp_ggml_backend_dev_t wsp_ggml_backend_dev_get(size_t index) {
154
+ WSP_GGML_ASSERT(index < wsp_ggml_backend_dev_count());
155
+ return get_reg().devices[index];
156
+ }
157
+
158
+ wsp_ggml_backend_dev_t wsp_ggml_backend_dev_by_name(const char * name) {
159
+ for (size_t i = 0; i < wsp_ggml_backend_dev_count(); i++) {
160
+ wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
161
+ if (strcmp(wsp_ggml_backend_dev_name(dev), name) == 0) {
162
+ return dev;
163
+ }
164
+ }
165
+ return NULL;
166
+ }
167
+
168
+ wsp_ggml_backend_dev_t wsp_ggml_backend_dev_by_type(enum wsp_ggml_backend_dev_type type) {
169
+ for (size_t i = 0; i < wsp_ggml_backend_dev_count(); i++) {
170
+ wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
171
+ if (wsp_ggml_backend_dev_type(dev) == type) {
172
+ return dev;
173
+ }
174
+ }
175
+ return NULL;
176
+ }
177
+
178
+ // Convenience functions
179
+ wsp_ggml_backend_t wsp_ggml_backend_init_by_name(const char * name, const char * params) {
180
+ wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_by_name(name);
181
+ if (!dev) {
182
+ return NULL;
183
+ }
184
+ return wsp_ggml_backend_dev_init(dev, params);
185
+ }
186
+
187
+ wsp_ggml_backend_t wsp_ggml_backend_init_by_type(enum wsp_ggml_backend_dev_type type, const char * params) {
188
+ wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_by_type(type);
189
+ if (!dev) {
190
+ return NULL;
191
+ }
192
+ return wsp_ggml_backend_dev_init(dev, params);
193
+ }
194
+
195
+ wsp_ggml_backend_t wsp_ggml_backend_init_best(void) {
196
+ wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_by_type(WSP_GGML_BACKEND_DEVICE_TYPE_GPU);
197
+ if (!dev) {
198
+ dev = wsp_ggml_backend_dev_by_type(WSP_GGML_BACKEND_DEVICE_TYPE_CPU);
199
+ }
200
+ if (!dev) {
201
+ return NULL;
202
+ }
203
+ return wsp_ggml_backend_dev_init(dev, NULL);
204
+ }