whispercpp 1.2.0.2 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +46 -86
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -7
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/ggml/include/ggml.h +2285 -0
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/include/whisper.h +672 -0
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1608 -159
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/src/whisper.cpp +7393 -0
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -8616
  133. data/ext/ggml.h +0 -748
  134. data/ext/whisper.cpp +0 -4829
  135. data/ext/whisper.h +0 -402
@@ -0,0 +1,622 @@
1
+ #include "ggml-backend.h"
2
+ #include "ggml-backend-impl.h"
3
+ #include "ggml-cpu.h"
4
+ #include "ggml-cpu-aarch64.h"
5
+ #include "ggml-cpu-traits.h"
6
+ #include "ggml-impl.h"
7
+ #include "amx/amx.h"
8
+
9
+ #include <cctype>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ #ifdef GGML_USE_CPU_HBM
14
+ #include "ggml-cpu-hbm.h"
15
+ #endif
16
+
17
+ #if defined(__APPLE__)
18
+ #include <sys/types.h>
19
+ #include <sys/sysctl.h>
20
+ #endif
21
+
22
+ #if defined(_WIN32)
23
+ #define WIN32_LEAN_AND_MEAN
24
+ #ifndef NOMINMAX
25
+ #define NOMINMAX
26
+ #endif
27
+ #include <windows.h>
28
+ #endif
29
+
30
+ // ggml-backend interface
31
+
32
+ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
33
+ static std::vector<ggml_backend_buffer_type_t> bufts = []() {
34
+ std::vector<ggml_backend_buffer_type_t> bufts;
35
+
36
+ #if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
37
+ if (ggml_backend_amx_buffer_type()) {
38
+ bufts.push_back(ggml_backend_amx_buffer_type());
39
+ }
40
+ #endif
41
+
42
+ #ifdef GGML_USE_CPU_AARCH64
43
+ if (ggml_backend_cpu_aarch64_buffer_type()) {
44
+ bufts.push_back(ggml_backend_cpu_aarch64_buffer_type());
45
+ }
46
+ #endif
47
+
48
+ bufts.push_back(NULL);
49
+
50
+ return bufts;
51
+ }();
52
+
53
+ return bufts;
54
+ }
55
+
56
+ static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
57
+ return ggml_backend_cpu_get_extra_buffers_type().data();
58
+
59
+ GGML_UNUSED(device);
60
+ }
61
+
62
+ static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
63
+ for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
64
+ if (extra && extra == buft) return true;
65
+ }
66
+ return false;
67
+ }
68
+
69
+ // CPU backend - backend (stream)
70
+
71
+ struct ggml_backend_cpu_context {
72
+ int n_threads;
73
+ ggml_threadpool_t threadpool;
74
+
75
+ uint8_t * work_data;
76
+ size_t work_size;
77
+
78
+ ggml_abort_callback abort_callback;
79
+ void * abort_callback_data;
80
+ };
81
+
82
+ static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
83
+ return "CPU";
84
+
85
+ GGML_UNUSED(backend);
86
+ }
87
+
88
+ static void ggml_backend_cpu_free(ggml_backend_t backend) {
89
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
90
+ delete[] cpu_ctx->work_data;
91
+ delete cpu_ctx;
92
+ delete backend;
93
+ }
94
+
95
+ struct ggml_backend_plan_cpu {
96
+ struct ggml_cplan cplan;
97
+ struct ggml_cgraph cgraph;
98
+ };
99
+
100
+ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
101
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
102
+
103
+ struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
104
+
105
+ cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
106
+ cpu_plan->cgraph = *cgraph; // FIXME: deep copy
107
+
108
+ if (cpu_plan->cplan.work_size > 0) {
109
+ cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
110
+ if (cpu_plan->cplan.work_data == NULL) {
111
+ delete cpu_plan;
112
+ return NULL;
113
+ }
114
+ }
115
+
116
+ cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
117
+ cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
118
+
119
+ return cpu_plan;
120
+ }
121
+
122
+ static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
123
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
124
+
125
+ delete[] cpu_plan->cplan.work_data;
126
+ delete cpu_plan;
127
+
128
+ GGML_UNUSED(backend);
129
+ }
130
+
131
+ static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
132
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
133
+
134
+ return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
135
+
136
+ GGML_UNUSED(backend);
137
+ }
138
+
139
+ static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
140
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
141
+
142
+ struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
143
+
144
+ if (cpu_ctx->work_size < cplan.work_size) {
145
+ delete[] cpu_ctx->work_data;
146
+ cpu_ctx->work_data = new uint8_t[cplan.work_size];
147
+ if (cpu_ctx->work_data == NULL) {
148
+ cpu_ctx->work_size = 0;
149
+ return GGML_STATUS_ALLOC_FAILED;
150
+ }
151
+ cpu_ctx->work_size = cplan.work_size;
152
+ }
153
+ cplan.work_data = (uint8_t *)cpu_ctx->work_data;
154
+
155
+ cplan.abort_callback = cpu_ctx->abort_callback;
156
+ cplan.abort_callback_data = cpu_ctx->abort_callback_data;
157
+
158
+ return ggml_graph_compute(cgraph, &cplan);
159
+ }
160
+
161
+ static const struct ggml_backend_i ggml_backend_cpu_i = {
162
+ /* .get_name = */ ggml_backend_cpu_get_name,
163
+ /* .free = */ ggml_backend_cpu_free,
164
+ /* .set_tensor_async = */ NULL,
165
+ /* .get_tensor_async = */ NULL,
166
+ /* .cpy_tensor_async = */ NULL,
167
+ /* .synchronize = */ NULL,
168
+ /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
169
+ /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
170
+ /* .graph_plan_update = */ NULL,
171
+ /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
172
+ /* .graph_compute = */ ggml_backend_cpu_graph_compute,
173
+ /* .event_record = */ NULL,
174
+ /* .event_wait = */ NULL,
175
+ };
176
+
177
+ static ggml_guid_t ggml_backend_cpu_guid(void) {
178
+ static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
179
+ return &guid;
180
+ }
181
+
182
+ ggml_backend_t ggml_backend_cpu_init(void) {
183
+ // initialize CPU backend now to avoid slowing the first graph computation
184
+ ggml_cpu_init();
185
+
186
+ struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
187
+ if (ctx == NULL) {
188
+ return NULL;
189
+ }
190
+
191
+ ctx->n_threads = GGML_DEFAULT_N_THREADS;
192
+ ctx->threadpool = NULL;
193
+ ctx->work_data = NULL;
194
+ ctx->work_size = 0;
195
+ ctx->abort_callback = NULL;
196
+ ctx->abort_callback_data = NULL;
197
+
198
+ ggml_backend_t cpu_backend = new ggml_backend {
199
+ /* .guid = */ ggml_backend_cpu_guid(),
200
+ /* .interface = */ ggml_backend_cpu_i,
201
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
202
+ /* .context = */ ctx,
203
+ };
204
+
205
+ if (cpu_backend == NULL) {
206
+ delete ctx;
207
+ return NULL;
208
+ }
209
+
210
+ return cpu_backend;
211
+ }
212
+
213
+ bool ggml_backend_is_cpu(ggml_backend_t backend) {
214
+ return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
215
+ }
216
+
217
+ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
218
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
219
+
220
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
221
+ ctx->n_threads = n_threads;
222
+ }
223
+
224
+ void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
225
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
226
+
227
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
228
+
229
+ if (ctx->threadpool && ctx->threadpool != threadpool) {
230
+ // already had a different threadpool, pause/suspend it before switching
231
+ ggml_threadpool_pause(ctx->threadpool);
232
+ }
233
+ ctx->threadpool = threadpool;
234
+ }
235
+
236
+ void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
237
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
238
+
239
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
240
+ ctx->abort_callback = abort_callback;
241
+ ctx->abort_callback_data = abort_callback_data;
242
+ }
243
+
244
+ // CPU backend - device
245
+
246
+ struct ggml_backend_cpu_device_context {
247
+ std::string description = "CPU";
248
+
249
+ ggml_backend_cpu_device_context() {
250
+ #ifdef __APPLE__
251
+ size_t len = 0;
252
+ if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
253
+ description.resize(len);
254
+ sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
255
+ }
256
+ #elif defined(__linux__)
257
+ FILE * f = fopen("/proc/cpuinfo", "r");
258
+ if (f) {
259
+ char buf[1024];
260
+ while (fgets(buf, sizeof(buf), f)) {
261
+ if (strncmp(buf, "model name", 10) == 0) {
262
+ char * p = strchr(buf, ':');
263
+ if (p) {
264
+ p++;
265
+ while (std::isspace(*p)) {
266
+ p++;
267
+ }
268
+ while (std::isspace(p[strlen(p) - 1])) {
269
+ p[strlen(p) - 1] = '\0';
270
+ }
271
+ description = p;
272
+ break;
273
+ }
274
+ }
275
+ }
276
+ fclose(f);
277
+ }
278
+ #elif defined(_WIN32)
279
+ HKEY hKey;
280
+ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
281
+ TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
282
+ 0,
283
+ KEY_READ,
284
+ &hKey) == ERROR_SUCCESS) {
285
+ DWORD cpu_brand_size = 0;
286
+ if (RegQueryValueExA(hKey,
287
+ TEXT("ProcessorNameString"),
288
+ NULL,
289
+ NULL,
290
+ NULL,
291
+ &cpu_brand_size) == ERROR_SUCCESS) {
292
+ description.resize(cpu_brand_size);
293
+ if (RegQueryValueExA(hKey,
294
+ TEXT("ProcessorNameString"),
295
+ NULL,
296
+ NULL,
297
+ (LPBYTE)&description[0], // NOLINT
298
+ &cpu_brand_size) == ERROR_SUCCESS) {
299
+ if (description.find('\0') != std::string::npos) {
300
+ description.resize(description.find('\0'));
301
+ }
302
+ }
303
+ }
304
+ RegCloseKey(hKey);
305
+ }
306
+ #endif
307
+ }
308
+ };
309
+
310
+ static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
311
+ return "CPU";
312
+
313
+ GGML_UNUSED(dev);
314
+ }
315
+
316
+ static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
317
+ struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
318
+
319
+ return ctx->description.c_str();
320
+ }
321
+
322
+ static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
323
+ // TODO
324
+ *free = 0;
325
+ *total = 0;
326
+
327
+ GGML_UNUSED(dev);
328
+ }
329
+
330
+ static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
331
+ return GGML_BACKEND_DEVICE_TYPE_CPU;
332
+
333
+ GGML_UNUSED(dev);
334
+ }
335
+
336
+ static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
337
+ props->name = ggml_backend_cpu_device_get_name(dev);
338
+ props->description = ggml_backend_cpu_device_get_description(dev);
339
+ props->type = ggml_backend_cpu_device_get_type(dev);
340
+ ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
341
+ props->caps = {
342
+ /* .async = */ false,
343
+ /* .host_buffer = */ false,
344
+ /* .buffer_from_host_ptr = */ true,
345
+ /* .events = */ false,
346
+ };
347
+ }
348
+
349
+ static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
350
+ return ggml_backend_cpu_init();
351
+
352
+ GGML_UNUSED(dev);
353
+ GGML_UNUSED(params);
354
+ }
355
+
356
+ static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
357
+ return ggml_backend_cpu_buffer_type();
358
+
359
+ GGML_UNUSED(dev);
360
+ }
361
+
362
+ static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
363
+ return ggml_backend_cpu_buffer_from_ptr(ptr, size);
364
+
365
+ GGML_UNUSED(dev);
366
+ GGML_UNUSED(max_tensor_size);
367
+ }
368
+
369
+ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
370
+ const struct ggml_tensor * src0 = op->src[0];
371
+ const struct ggml_tensor * src1 = op->src[1];
372
+
373
+ if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
374
+ return true;
375
+ }
376
+
377
+ // extra_buffer_op?
378
+ for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
379
+ if (extra) {
380
+ auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
381
+ if (buf_extra && buf_extra->supports_op(dev, op)) {
382
+ return true;
383
+ }
384
+ }
385
+ }
386
+
387
+ // the other case need host buffer.
388
+ for (int i = 0; i < GGML_MAX_SRC; i++) {
389
+ if (op->src[i] && op->src[i]->buffer && !ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
390
+ return false;
391
+ }
392
+ }
393
+
394
+ switch (op->op) {
395
+ case GGML_OP_CPY:
396
+ return
397
+ op->type != GGML_TYPE_IQ3_XXS &&
398
+ op->type != GGML_TYPE_IQ3_S &&
399
+ op->type != GGML_TYPE_IQ2_XXS &&
400
+ op->type != GGML_TYPE_IQ2_XS &&
401
+ op->type != GGML_TYPE_IQ2_S &&
402
+ op->type != GGML_TYPE_IQ1_S &&
403
+ op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
404
+ case GGML_OP_MUL_MAT:
405
+ return src1->type == GGML_TYPE_F32 || src1->type == ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
406
+ case GGML_OP_ROPE_BACK:
407
+ return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
408
+ case GGML_OP_IM2COL_BACK:
409
+ return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
410
+ case GGML_OP_OUT_PROD:
411
+ return (src0->type == GGML_TYPE_F32 || ggml_is_quantized(src0->type)) && src1->type == GGML_TYPE_F32;
412
+ default:
413
+ return true;
414
+ }
415
+ }
416
+
417
+ static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
418
+ return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
419
+ GGML_UNUSED(dev);
420
+ }
421
+
422
+ static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
423
+ /* .get_name = */ ggml_backend_cpu_device_get_name,
424
+ /* .get_description = */ ggml_backend_cpu_device_get_description,
425
+ /* .get_memory = */ ggml_backend_cpu_device_get_memory,
426
+ /* .get_type = */ ggml_backend_cpu_device_get_type,
427
+ /* .get_props = */ ggml_backend_cpu_device_get_props,
428
+ /* .init_backend = */ ggml_backend_cpu_device_init_backend,
429
+ /* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,
430
+ /* .get_host_buffer_type = */ NULL,
431
+ /* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,
432
+ /* .supports_op = */ ggml_backend_cpu_device_supports_op,
433
+ /* .supports_buft = */ ggml_backend_cpu_device_supports_buft,
434
+ /* .offload_op = */ NULL,
435
+ /* .event_new = */ NULL,
436
+ /* .event_free = */ NULL,
437
+ /* .event_synchronize = */ NULL,
438
+ };
439
+
440
+ // CPU backend - backend (reg)
441
+
442
+ static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
443
+ return "CPU";
444
+
445
+ GGML_UNUSED(reg);
446
+ }
447
+
448
+ static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
449
+ return 1;
450
+
451
+ GGML_UNUSED(reg);
452
+ }
453
+
454
+ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
455
+ GGML_ASSERT(index == 0);
456
+
457
+ static ggml_backend_cpu_device_context ctx;
458
+ static ggml_backend_device ggml_backend_cpu_device = {
459
+ /* .iface = */ ggml_backend_cpu_device_i,
460
+ /* .reg = */ reg,
461
+ /* .context = */ &ctx,
462
+ };
463
+
464
+ return &ggml_backend_cpu_device;
465
+ }
466
+
467
+ // This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
468
+ // and additionally to allow other backends to expose their own list of features that applications can query using the same API
469
+ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
470
+ static std::vector<ggml_backend_feature> features = []() {
471
+ ggml_cpu_init();
472
+
473
+ std::vector<ggml_backend_feature> features;
474
+ if (ggml_cpu_has_sse3()) {
475
+ features.push_back({ "SSE3", "1" });
476
+ }
477
+ if (ggml_cpu_has_ssse3()) {
478
+ features.push_back({ "SSSE3", "1" });
479
+ }
480
+ if (ggml_cpu_has_avx()) {
481
+ features.push_back({ "AVX", "1" });
482
+ }
483
+ if (ggml_cpu_has_avx_vnni()) {
484
+ features.push_back({ "AVX_VNNI", "1" });
485
+ }
486
+ if (ggml_cpu_has_avx2()) {
487
+ features.push_back({ "AVX2", "1" });
488
+ }
489
+ if (ggml_cpu_has_f16c()) {
490
+ features.push_back({ "F16C", "1" });
491
+ }
492
+ if (ggml_cpu_has_fma()) {
493
+ features.push_back({ "FMA", "1" });
494
+ }
495
+ if (ggml_cpu_has_avx512()) {
496
+ features.push_back({ "AVX512", "1" });
497
+ }
498
+ if (ggml_cpu_has_avx512_vbmi()) {
499
+ features.push_back({ "AVX512_VBMI", "1" });
500
+ }
501
+ if (ggml_cpu_has_avx512_vnni()) {
502
+ features.push_back({ "AVX512_VNNI", "1" });
503
+ }
504
+ if (ggml_cpu_has_avx512_bf16()) {
505
+ features.push_back({ "AVX512_BF16", "1" });
506
+ }
507
+ if (ggml_cpu_has_amx_int8()) {
508
+ features.push_back({ "AMX_INT8", "1" });
509
+ }
510
+ if (ggml_cpu_has_neon()) {
511
+ features.push_back({ "NEON", "1" });
512
+ }
513
+ if (ggml_cpu_has_arm_fma()) {
514
+ features.push_back({ "ARM_FMA", "1" });
515
+ }
516
+ if (ggml_cpu_has_fp16_va()) {
517
+ features.push_back({ "FP16_VA", "1" });
518
+ }
519
+ if (ggml_cpu_has_matmul_int8()) {
520
+ features.push_back({ "MATMUL_INT8", "1" });
521
+ }
522
+ if (ggml_cpu_has_sve()) {
523
+ features.push_back({ "SVE", "1" });
524
+ }
525
+ if (ggml_cpu_get_sve_cnt() > 0) {
526
+ static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
527
+ features.push_back({ "SVE_CNT", sve_cnt.c_str() });
528
+ }
529
+ if (ggml_cpu_has_riscv_v()) {
530
+ features.push_back({ "RISCV_V", "1" });
531
+ }
532
+ if (ggml_cpu_has_vsx()) {
533
+ features.push_back({ "VSX", "1" });
534
+ }
535
+ if (ggml_cpu_has_wasm_simd()) {
536
+ features.push_back({ "WASM_SIMD", "1" });
537
+ }
538
+ if (ggml_cpu_has_llamafile()) {
539
+ features.push_back({ "LLAMAFILE", "1" });
540
+ }
541
+ #ifdef GGML_USE_ACCELERATE
542
+ features.push_back({ "ACCELERATE", "1" });
543
+ #endif
544
+ #ifdef GGML_USE_CPU_HBM
545
+ features.push_back({ "CPU_HBM", "1" });
546
+ #endif
547
+ #ifdef GGML_USE_OPENMP
548
+ features.push_back({ "OPENMP", "1" });
549
+ #endif
550
+ #ifdef GGML_USE_CPU_AARCH64
551
+ features.push_back({ "AARCH64_REPACK", "1" });
552
+ #endif
553
+
554
+ features.push_back({ nullptr, nullptr });
555
+
556
+ return features;
557
+ }();
558
+
559
+ return features.data();
560
+
561
+ GGML_UNUSED(reg);
562
+ }
563
+
564
+ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
565
+ if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
566
+ ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
567
+ return (void *)fct;
568
+ }
569
+ if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
570
+ ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
571
+ return (void *)fct;
572
+ }
573
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
574
+ return (void *)ggml_backend_cpu_get_features;
575
+ }
576
+ if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
577
+ return (void *)ggml_backend_cpu_set_abort_callback;
578
+ }
579
+ if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
580
+ return (void *)ggml_numa_init;
581
+ }
582
+ if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
583
+ return (void *)ggml_is_numa;
584
+ }
585
+
586
+ // threadpool - TODO: move to ggml-base
587
+ if (strcmp(name, "ggml_threadpool_new") == 0) {
588
+ return (void *)ggml_threadpool_new;
589
+ }
590
+ if (strcmp(name, "ggml_threadpool_free") == 0) {
591
+ return (void *)ggml_threadpool_free;
592
+ }
593
+ if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
594
+ return (void *)ggml_backend_cpu_set_threadpool;
595
+ }
596
+
597
+ return NULL;
598
+
599
+ GGML_UNUSED(reg);
600
+ }
601
+
602
+ static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
603
+ /* .get_name = */ ggml_backend_cpu_reg_get_name,
604
+ /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
605
+ /* .get_device = */ ggml_backend_cpu_reg_get_device,
606
+ /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
607
+ };
608
+
609
+ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
610
+ // init CPU feature detection
611
+ ggml_cpu_init();
612
+
613
+ static struct ggml_backend_reg ggml_backend_cpu_reg = {
614
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
615
+ /* .iface = */ ggml_backend_cpu_reg_i,
616
+ /* .context = */ NULL,
617
+ };
618
+
619
+ return &ggml_backend_cpu_reg;
620
+ }
621
+
622
+ GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)