whispercpp 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,552 @@
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <codecvt>
6
+ #include <cstring>
7
+ #include <filesystem>
8
+ #include <locale>
9
+ #include <memory>
10
+ #include <string>
11
+ #include <type_traits>
12
+ #include <vector>
13
+
14
+ #ifdef _WIN32
15
+ # define WIN32_LEAN_AND_MEAN
16
+ # ifndef NOMINMAX
17
+ # define NOMINMAX
18
+ # endif
19
+ # include <windows.h>
20
+ #elif defined(__APPLE__)
21
+ # include <mach-o/dyld.h>
22
+ # include <dlfcn.h>
23
+ #else
24
+ # include <dlfcn.h>
25
+ # include <unistd.h>
26
+ #endif
27
+
28
+ // Backend registry
29
+ #ifdef GGML_USE_CPU
30
+ #include "ggml-cpu.h"
31
+ #endif
32
+
33
+ #ifdef GGML_USE_CUDA
34
+ #include "ggml-cuda.h"
35
+ #endif
36
+
37
+ #ifdef GGML_USE_METAL
38
+ #include "ggml-metal.h"
39
+ #endif
40
+
41
+ #ifdef GGML_USE_SYCL
42
+ #include "ggml-sycl.h"
43
+ #endif
44
+
45
+ #ifdef GGML_USE_VULKAN
46
+ #include "ggml-vulkan.h"
47
+ #endif
48
+
49
+ #ifdef GGML_USE_OPENCL
50
+ #include "ggml-opencl.h"
51
+ #endif
52
+
53
+ #ifdef GGML_USE_BLAS
54
+ #include "ggml-blas.h"
55
+ #endif
56
+
57
+ #ifdef GGML_USE_RPC
58
+ #include "ggml-rpc.h"
59
+ #endif
60
+
61
+ #ifdef GGML_USE_CANN
62
+ #include "ggml-cann.h"
63
+ #endif
64
+
65
+ #ifdef GGML_USE_KOMPUTE
66
+ #include "ggml-kompute.h"
67
+ #endif
68
+
69
+ #ifdef _WIN32
70
+
71
+ using dl_handle = std::remove_pointer_t<HMODULE>;
72
+
73
+ struct dl_handle_deleter {
74
+ void operator()(HMODULE handle) {
75
+ FreeLibrary(handle);
76
+ }
77
+ };
78
+
79
+ static dl_handle * dl_load_library(const std::wstring & path) {
80
+ // suppress error dialogs for missing DLLs
81
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
82
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
83
+
84
+ HMODULE handle = LoadLibraryW(path.c_str());
85
+
86
+ SetErrorMode(old_mode);
87
+
88
+ return handle;
89
+ }
90
+
91
+ static dl_handle * dl_load_library(const std::string & path) {
92
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
93
+ return dl_load_library(converter.from_bytes(path));
94
+ }
95
+
96
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
97
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
98
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
99
+
100
+ void * p = (void *) GetProcAddress(handle, name);
101
+
102
+ SetErrorMode(old_mode);
103
+
104
+ return p;
105
+ }
106
+
107
+ #else
108
+
109
+ using dl_handle = void;
110
+
111
+ struct dl_handle_deleter {
112
+ void operator()(void * handle) {
113
+ dlclose(handle);
114
+ }
115
+ };
116
+
117
+ static void * dl_load_library(const std::string & path) {
118
+ dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
119
+
120
+ return handle;
121
+ }
122
+
123
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
124
+ return dlsym(handle, name);
125
+ }
126
+
127
+ #endif
128
+
129
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
130
+
131
+ struct ggml_backend_reg_entry {
132
+ ggml_backend_reg_t reg;
133
+ dl_handle_ptr handle;
134
+ };
135
+
136
+ struct ggml_backend_registry {
137
+ std::vector<ggml_backend_reg_entry> backends;
138
+ std::vector<ggml_backend_dev_t> devices;
139
+
140
+ ggml_backend_registry() {
141
+ #ifdef GGML_USE_CUDA
142
+ register_backend(ggml_backend_cuda_reg());
143
+ #endif
144
+ #ifdef GGML_USE_METAL
145
+ register_backend(ggml_backend_metal_reg());
146
+ #endif
147
+ #ifdef GGML_USE_SYCL
148
+ register_backend(ggml_backend_sycl_reg());
149
+ #endif
150
+ #ifdef GGML_USE_VULKAN
151
+ register_backend(ggml_backend_vk_reg());
152
+ #endif
153
+ #ifdef GGML_USE_OPENCL
154
+ register_backend(ggml_backend_opencl_reg());
155
+ #endif
156
+ #ifdef GGML_USE_CANN
157
+ register_backend(ggml_backend_cann_reg());
158
+ #endif
159
+ #ifdef GGML_USE_BLAS
160
+ register_backend(ggml_backend_blas_reg());
161
+ #endif
162
+ #ifdef GGML_USE_RPC
163
+ register_backend(ggml_backend_rpc_reg());
164
+ #endif
165
+ #ifdef GGML_USE_KOMPUTE
166
+ register_backend(ggml_backend_kompute_reg());
167
+ #endif
168
+ #ifdef GGML_USE_CPU
169
+ register_backend(ggml_backend_cpu_reg());
170
+ #endif
171
+ }
172
+
173
+ ~ggml_backend_registry() {
174
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
175
+ // since backend threads may still be running and accessing resources from the dynamic library
176
+ for (auto & entry : backends) {
177
+ if (entry.handle) {
178
+ entry.handle.release(); // NOLINT
179
+ }
180
+ }
181
+ }
182
+
183
+ void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
184
+ if (!reg) {
185
+ return;
186
+ }
187
+
188
+ #ifndef NDEBUG
189
+ GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
190
+ __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
191
+ #endif
192
+ backends.push_back({ reg, std::move(handle) });
193
+ for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
194
+ register_device(ggml_backend_reg_dev_get(reg, i));
195
+ }
196
+ }
197
+
198
+ void register_device(ggml_backend_dev_t device) {
199
+ #ifndef NDEBUG
200
+ GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
201
+ #endif
202
+ devices.push_back(device);
203
+ }
204
+
205
+ ggml_backend_reg_t load_backend(const char * path, bool silent) {
206
+ dl_handle_ptr handle { dl_load_library(path) };
207
+ if (!handle) {
208
+ if (!silent) {
209
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
210
+ }
211
+ return nullptr;
212
+ }
213
+
214
+ auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
215
+ if (score_fn && score_fn() == 0) {
216
+ if (!silent) {
217
+ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
218
+ }
219
+ return nullptr;
220
+ }
221
+
222
+ auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
223
+ if (!backend_init_fn) {
224
+ if (!silent) {
225
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
226
+ }
227
+ return nullptr;
228
+ }
229
+
230
+ ggml_backend_reg_t reg = backend_init_fn();
231
+ if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
232
+ if (!silent) {
233
+ if (!reg) {
234
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
235
+ } else {
236
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
237
+ __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
238
+ }
239
+ }
240
+ return nullptr;
241
+ }
242
+
243
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
244
+
245
+ register_backend(reg, std::move(handle));
246
+
247
+ return reg;
248
+ }
249
+
250
+ void unload_backend(ggml_backend_reg_t reg, bool silent) {
251
+ auto it = std::find_if(backends.begin(), backends.end(),
252
+ [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
253
+
254
+ if (it == backends.end()) {
255
+ if (!silent) {
256
+ GGML_LOG_ERROR("%s: backend not found\n", __func__);
257
+ }
258
+ return;
259
+ }
260
+
261
+ if (!silent) {
262
+ GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
263
+ }
264
+
265
+ // remove devices
266
+ devices.erase(
267
+ std::remove_if(devices.begin(), devices.end(),
268
+ [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
269
+ devices.end());
270
+
271
+ // remove backend
272
+ backends.erase(it);
273
+ }
274
+ };
275
+
276
+ static ggml_backend_registry & get_reg() {
277
+ static ggml_backend_registry reg;
278
+ return reg;
279
+ }
280
+
281
+ // Internal API
282
+ void ggml_backend_register(ggml_backend_reg_t reg) {
283
+ get_reg().register_backend(reg);
284
+ }
285
+
286
+ void ggml_backend_device_register(ggml_backend_dev_t device) {
287
+ get_reg().register_device(device);
288
+ }
289
+
290
+ // Backend (reg) enumeration
291
+ static bool striequals(const char * a, const char * b) {
292
+ for (; *a && *b; a++, b++) {
293
+ if (std::tolower(*a) != std::tolower(*b)) {
294
+ return false;
295
+ }
296
+ }
297
+ return *a == *b;
298
+ }
299
+
300
+ size_t ggml_backend_reg_count() {
301
+ return get_reg().backends.size();
302
+ }
303
+
304
+ ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
305
+ GGML_ASSERT(index < ggml_backend_reg_count());
306
+ return get_reg().backends[index].reg;
307
+ }
308
+
309
+ ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
310
+ for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
311
+ ggml_backend_reg_t reg = ggml_backend_reg_get(i);
312
+ if (striequals(ggml_backend_reg_name(reg), name)) {
313
+ return reg;
314
+ }
315
+ }
316
+ return nullptr;
317
+ }
318
+
319
+ // Device enumeration
320
+ size_t ggml_backend_dev_count() {
321
+ return get_reg().devices.size();
322
+ }
323
+
324
+ ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
325
+ GGML_ASSERT(index < ggml_backend_dev_count());
326
+ return get_reg().devices[index];
327
+ }
328
+
329
+ ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
330
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
331
+ ggml_backend_dev_t dev = ggml_backend_dev_get(i);
332
+ if (striequals(ggml_backend_dev_name(dev), name)) {
333
+ return dev;
334
+ }
335
+ }
336
+ return nullptr;
337
+ }
338
+
339
+ ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
340
+ for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
341
+ ggml_backend_dev_t dev = ggml_backend_dev_get(i);
342
+ if (ggml_backend_dev_type(dev) == type) {
343
+ return dev;
344
+ }
345
+ }
346
+ return nullptr;
347
+ }
348
+
349
+ // Convenience functions
350
+ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
351
+ ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
352
+ if (!dev) {
353
+ return nullptr;
354
+ }
355
+ return ggml_backend_dev_init(dev, params);
356
+ }
357
+
358
+ ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
359
+ ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
360
+ if (!dev) {
361
+ return nullptr;
362
+ }
363
+ return ggml_backend_dev_init(dev, params);
364
+ }
365
+
366
+ ggml_backend_t ggml_backend_init_best(void) {
367
+ ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
368
+ if (!dev) {
369
+ dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
370
+ }
371
+ if (!dev) {
372
+ return nullptr;
373
+ }
374
+ return ggml_backend_dev_init(dev, nullptr);
375
+ }
376
+
377
+ // Dynamic loading
378
+ ggml_backend_reg_t ggml_backend_load(const char * path) {
379
+ return get_reg().load_backend(path, false);
380
+ }
381
+
382
+ void ggml_backend_unload(ggml_backend_reg_t reg) {
383
+ get_reg().unload_backend(reg, true);
384
+ }
385
+
386
+ static std::string get_executable_path() {
387
+ #if defined(__APPLE__)
388
+ // get executable path
389
+ std::vector<char> path;
390
+ uint32_t size;
391
+ while (true) {
392
+ size = path.size();
393
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
394
+ break;
395
+ }
396
+ path.resize(size);
397
+ }
398
+ std::string base_path(path.data(), size);
399
+ // remove executable name
400
+ auto last_slash = base_path.find_last_of('/');
401
+ if (last_slash != std::string::npos) {
402
+ base_path = base_path.substr(0, last_slash);
403
+ }
404
+ return base_path + "/";
405
+ #elif defined(__linux__)
406
+ std::string base_path = ".";
407
+ std::vector<char> path(1024);
408
+ while (true) {
409
+ // get executable path
410
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
411
+ if (len == -1) {
412
+ break;
413
+ }
414
+ if (len < (ssize_t) path.size()) {
415
+ base_path = std::string(path.data(), len);
416
+ // remove executable name
417
+ auto last_slash = base_path.find_last_of('/');
418
+ if (last_slash != std::string::npos) {
419
+ base_path = base_path.substr(0, last_slash);
420
+ }
421
+ break;
422
+ }
423
+ path.resize(path.size() * 2);
424
+ }
425
+
426
+ return base_path + "/";
427
+ #elif defined(_WIN32)
428
+ std::vector<char> path(MAX_PATH);
429
+ DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
430
+ if (len == 0) {
431
+ return "";
432
+ }
433
+ std::string base_path(path.data(), len);
434
+ // remove executable name
435
+ auto last_slash = base_path.find_last_of('\\');
436
+ if (last_slash != std::string::npos) {
437
+ base_path = base_path.substr(0, last_slash);
438
+ }
439
+ return base_path + "\\";
440
+ #endif
441
+ }
442
+
443
+ static std::string backend_filename_prefix() {
444
+ #ifdef _WIN32
445
+ return "ggml-";
446
+ #else
447
+ return "libggml-";
448
+ #endif
449
+ }
450
+
451
+ static std::string backend_filename_suffix() {
452
+ #ifdef _WIN32
453
+ return ".dll";
454
+ #else
455
+ return ".so";
456
+ #endif
457
+ }
458
+
459
+ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
460
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
461
+ // TODO: search system paths
462
+ std::string file_prefix = backend_filename_prefix() + name + "-";
463
+ std::vector<std::string> search_paths;
464
+ if (user_search_path == nullptr) {
465
+ search_paths.push_back("./");
466
+ search_paths.push_back(get_executable_path());
467
+ } else {
468
+ #if defined(_WIN32)
469
+ search_paths.push_back(std::string(user_search_path) + "\\");
470
+ #else
471
+ search_paths.push_back(std::string(user_search_path) + "/");
472
+ #endif
473
+ }
474
+
475
+ int best_score = 0;
476
+ std::string best_path;
477
+
478
+ namespace fs = std::filesystem;
479
+ for (const auto & search_path : search_paths) {
480
+ if (!fs::exists(search_path)) {
481
+ continue;
482
+ }
483
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
484
+ for (const auto & entry : dir_it) {
485
+ if (entry.is_regular_file()) {
486
+ std::string filename = entry.path().filename().string();
487
+ std::string ext = entry.path().extension().string();
488
+ if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
489
+ dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
490
+ if (!handle && !silent) {
491
+ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
492
+ }
493
+ if (handle) {
494
+ auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
495
+ if (score_fn) {
496
+ int s = score_fn();
497
+ #ifndef NDEBUG
498
+ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
499
+ #endif
500
+ if (s > best_score) {
501
+ best_score = s;
502
+ best_path = entry.path().string();
503
+ }
504
+ } else {
505
+ if (!silent) {
506
+ GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
507
+ }
508
+ }
509
+ }
510
+ }
511
+ }
512
+ }
513
+ }
514
+
515
+ if (best_score == 0) {
516
+ // try to load the base backend
517
+ for (const auto & search_path : search_paths) {
518
+ std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
519
+ if (fs::exists(path)) {
520
+ return get_reg().load_backend(path.c_str(), silent);
521
+ }
522
+ }
523
+ return nullptr;
524
+ }
525
+
526
+ return get_reg().load_backend(best_path.c_str(), silent);
527
+ }
528
+
529
+ void ggml_backend_load_all() {
530
+ ggml_backend_load_all_from_path(nullptr);
531
+ }
532
+
533
+ void ggml_backend_load_all_from_path(const char * dir_path) {
534
+ #ifdef NDEBUG
535
+ bool silent = true;
536
+ #else
537
+ bool silent = false;
538
+ #endif
539
+
540
+ ggml_backend_load_best("blas", silent, dir_path);
541
+ ggml_backend_load_best("cann", silent, dir_path);
542
+ ggml_backend_load_best("cuda", silent, dir_path);
543
+ ggml_backend_load_best("hip", silent, dir_path);
544
+ ggml_backend_load_best("kompute", silent, dir_path);
545
+ ggml_backend_load_best("metal", silent, dir_path);
546
+ ggml_backend_load_best("rpc", silent, dir_path);
547
+ ggml_backend_load_best("sycl", silent, dir_path);
548
+ ggml_backend_load_best("vulkan", silent, dir_path);
549
+ ggml_backend_load_best("opencl", silent, dir_path);
550
+ ggml_backend_load_best("musa", silent, dir_path);
551
+ ggml_backend_load_best("cpu", silent, dir_path);
552
+ }