llama-cpp-capacitor 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/cpp/LICENSE +21 -0
  2. package/cpp/README.md +4 -0
  3. package/cpp/anyascii.c +22223 -0
  4. package/cpp/anyascii.h +42 -0
  5. package/cpp/chat-parser.cpp +393 -0
  6. package/cpp/chat-parser.h +120 -0
  7. package/cpp/chat.cpp +2315 -0
  8. package/cpp/chat.h +221 -0
  9. package/cpp/common.cpp +1619 -0
  10. package/cpp/common.h +744 -0
  11. package/cpp/ggml-alloc.c +1028 -0
  12. package/cpp/ggml-alloc.h +76 -0
  13. package/cpp/ggml-backend-impl.h +255 -0
  14. package/cpp/ggml-backend-reg.cpp +600 -0
  15. package/cpp/ggml-backend.cpp +2118 -0
  16. package/cpp/ggml-backend.h +354 -0
  17. package/cpp/ggml-common.h +1878 -0
  18. package/cpp/ggml-cpp.h +39 -0
  19. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  20. package/cpp/ggml-cpu/amx/amx.h +8 -0
  21. package/cpp/ggml-cpu/amx/common.h +91 -0
  22. package/cpp/ggml-cpu/amx/mmq.cpp +2512 -0
  23. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  24. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  25. package/cpp/ggml-cpu/arch/arm/quants.c +3650 -0
  26. package/cpp/ggml-cpu/arch/arm/repack.cpp +1891 -0
  27. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  28. package/cpp/ggml-cpu/arch/x86/quants.c +3820 -0
  29. package/cpp/ggml-cpu/arch/x86/repack.cpp +6307 -0
  30. package/cpp/ggml-cpu/arch-fallback.h +215 -0
  31. package/cpp/ggml-cpu/binary-ops.cpp +158 -0
  32. package/cpp/ggml-cpu/binary-ops.h +16 -0
  33. package/cpp/ggml-cpu/common.h +73 -0
  34. package/cpp/ggml-cpu/ggml-cpu-impl.h +525 -0
  35. package/cpp/ggml-cpu/ggml-cpu.c +3578 -0
  36. package/cpp/ggml-cpu/ggml-cpu.cpp +672 -0
  37. package/cpp/ggml-cpu/ops.cpp +10587 -0
  38. package/cpp/ggml-cpu/ops.h +114 -0
  39. package/cpp/ggml-cpu/quants.c +1193 -0
  40. package/cpp/ggml-cpu/quants.h +97 -0
  41. package/cpp/ggml-cpu/repack.cpp +1982 -0
  42. package/cpp/ggml-cpu/repack.h +120 -0
  43. package/cpp/ggml-cpu/simd-mappings.h +1184 -0
  44. package/cpp/ggml-cpu/traits.cpp +36 -0
  45. package/cpp/ggml-cpu/traits.h +38 -0
  46. package/cpp/ggml-cpu/unary-ops.cpp +186 -0
  47. package/cpp/ggml-cpu/unary-ops.h +28 -0
  48. package/cpp/ggml-cpu/vec.cpp +348 -0
  49. package/cpp/ggml-cpu/vec.h +1121 -0
  50. package/cpp/ggml-cpu.h +145 -0
  51. package/cpp/ggml-impl.h +622 -0
  52. package/cpp/ggml-metal-impl.h +688 -0
  53. package/cpp/ggml-metal.h +66 -0
  54. package/cpp/ggml-metal.m +6833 -0
  55. package/cpp/ggml-opt.cpp +1093 -0
  56. package/cpp/ggml-opt.h +256 -0
  57. package/cpp/ggml-quants.c +5324 -0
  58. package/cpp/ggml-quants.h +106 -0
  59. package/cpp/ggml-threading.cpp +12 -0
  60. package/cpp/ggml-threading.h +14 -0
  61. package/cpp/ggml.c +7108 -0
  62. package/cpp/ggml.h +2492 -0
  63. package/cpp/gguf.cpp +1358 -0
  64. package/cpp/gguf.h +202 -0
  65. package/cpp/json-partial.cpp +256 -0
  66. package/cpp/json-partial.h +38 -0
  67. package/cpp/json-schema-to-grammar.cpp +985 -0
  68. package/cpp/json-schema-to-grammar.h +21 -0
  69. package/cpp/llama-adapter.cpp +388 -0
  70. package/cpp/llama-adapter.h +76 -0
  71. package/cpp/llama-arch.cpp +2355 -0
  72. package/cpp/llama-arch.h +499 -0
  73. package/cpp/llama-batch.cpp +875 -0
  74. package/cpp/llama-batch.h +160 -0
  75. package/cpp/llama-chat.cpp +783 -0
  76. package/cpp/llama-chat.h +65 -0
  77. package/cpp/llama-context.cpp +2748 -0
  78. package/cpp/llama-context.h +306 -0
  79. package/cpp/llama-cparams.cpp +5 -0
  80. package/cpp/llama-cparams.h +41 -0
  81. package/cpp/llama-cpp.h +30 -0
  82. package/cpp/llama-grammar.cpp +1229 -0
  83. package/cpp/llama-grammar.h +173 -0
  84. package/cpp/llama-graph.cpp +1891 -0
  85. package/cpp/llama-graph.h +810 -0
  86. package/cpp/llama-hparams.cpp +180 -0
  87. package/cpp/llama-hparams.h +233 -0
  88. package/cpp/llama-impl.cpp +167 -0
  89. package/cpp/llama-impl.h +61 -0
  90. package/cpp/llama-io.cpp +15 -0
  91. package/cpp/llama-io.h +35 -0
  92. package/cpp/llama-kv-cache-iswa.cpp +318 -0
  93. package/cpp/llama-kv-cache-iswa.h +135 -0
  94. package/cpp/llama-kv-cache.cpp +2059 -0
  95. package/cpp/llama-kv-cache.h +374 -0
  96. package/cpp/llama-kv-cells.h +491 -0
  97. package/cpp/llama-memory-hybrid.cpp +258 -0
  98. package/cpp/llama-memory-hybrid.h +137 -0
  99. package/cpp/llama-memory-recurrent.cpp +1146 -0
  100. package/cpp/llama-memory-recurrent.h +179 -0
  101. package/cpp/llama-memory.cpp +59 -0
  102. package/cpp/llama-memory.h +119 -0
  103. package/cpp/llama-mmap.cpp +600 -0
  104. package/cpp/llama-mmap.h +68 -0
  105. package/cpp/llama-model-loader.cpp +1164 -0
  106. package/cpp/llama-model-loader.h +170 -0
  107. package/cpp/llama-model-saver.cpp +282 -0
  108. package/cpp/llama-model-saver.h +37 -0
  109. package/cpp/llama-model.cpp +19042 -0
  110. package/cpp/llama-model.h +491 -0
  111. package/cpp/llama-sampling.cpp +2575 -0
  112. package/cpp/llama-sampling.h +32 -0
  113. package/cpp/llama-vocab.cpp +3792 -0
  114. package/cpp/llama-vocab.h +176 -0
  115. package/cpp/llama.cpp +358 -0
  116. package/cpp/llama.h +1373 -0
  117. package/cpp/log.cpp +427 -0
  118. package/cpp/log.h +103 -0
  119. package/cpp/minja/chat-template.hpp +550 -0
  120. package/cpp/minja/minja.hpp +3009 -0
  121. package/cpp/nlohmann/json.hpp +25526 -0
  122. package/cpp/nlohmann/json_fwd.hpp +187 -0
  123. package/cpp/regex-partial.cpp +204 -0
  124. package/cpp/regex-partial.h +56 -0
  125. package/cpp/rn-completion.cpp +681 -0
  126. package/cpp/rn-completion.h +116 -0
  127. package/cpp/rn-llama.cpp +345 -0
  128. package/cpp/rn-llama.h +149 -0
  129. package/cpp/rn-mtmd.hpp +602 -0
  130. package/cpp/rn-tts.cpp +591 -0
  131. package/cpp/rn-tts.h +59 -0
  132. package/cpp/sampling.cpp +579 -0
  133. package/cpp/sampling.h +107 -0
  134. package/cpp/tools/mtmd/clip-impl.h +473 -0
  135. package/cpp/tools/mtmd/clip.cpp +4322 -0
  136. package/cpp/tools/mtmd/clip.h +106 -0
  137. package/cpp/tools/mtmd/miniaudio/miniaudio.h +93468 -0
  138. package/cpp/tools/mtmd/mtmd-audio.cpp +769 -0
  139. package/cpp/tools/mtmd/mtmd-audio.h +47 -0
  140. package/cpp/tools/mtmd/mtmd-helper.cpp +460 -0
  141. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  142. package/cpp/tools/mtmd/mtmd.cpp +1066 -0
  143. package/cpp/tools/mtmd/mtmd.h +298 -0
  144. package/cpp/tools/mtmd/stb/stb_image.h +7988 -0
  145. package/cpp/unicode-data.cpp +7034 -0
  146. package/cpp/unicode-data.h +20 -0
  147. package/cpp/unicode.cpp +1061 -0
  148. package/cpp/unicode.h +68 -0
  149. package/package.json +2 -1
@@ -0,0 +1,600 @@
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <cstring>
6
+ #include <filesystem>
7
+ #include <memory>
8
+ #include <string>
9
+ #include <type_traits>
10
+ #include <vector>
11
+ #include <cctype>
12
+
13
+ #ifdef _WIN32
14
+ # define WIN32_LEAN_AND_MEAN
15
+ # ifndef NOMINMAX
16
+ # define NOMINMAX
17
+ # endif
18
+ # include <windows.h>
19
+ #elif defined(__APPLE__)
20
+ # include <mach-o/dyld.h>
21
+ # include <dlfcn.h>
22
+ #else
23
+ # include <dlfcn.h>
24
+ # include <unistd.h>
25
+ #endif
26
+
27
+ // Backend registry
28
+ #ifdef LM_GGML_USE_CPU
29
+ #include "ggml-cpu.h"
30
+ #endif
31
+
32
+ #ifdef LM_GGML_USE_CUDA
33
+ #include "ggml-cuda.h"
34
+ #endif
35
+
36
+ #ifdef LM_GGML_USE_METAL
37
+ #include "ggml-metal.h"
38
+ #endif
39
+
40
+ #ifdef LM_GGML_USE_SYCL
41
+ #include "ggml-sycl.h"
42
+ #endif
43
+
44
+ #ifdef LM_GGML_USE_VULKAN
45
+ #include "ggml-vulkan.h"
46
+ #endif
47
+
48
+ #ifdef LM_GGML_USE_WEBGPU
49
+ #include "ggml-webgpu.h"
50
+ #endif
51
+
52
+ #ifdef LM_GGML_USE_ZDNN
53
+ #include "ggml-zdnn.h"
54
+ #endif
55
+
56
+ #ifdef LM_GGML_USE_OPENCL
57
+ #include "ggml-opencl.h"
58
+ #endif
59
+
60
+ #ifdef LM_GGML_USE_BLAS
61
+ #include "ggml-blas.h"
62
+ #endif
63
+
64
+ #ifdef LM_GGML_USE_RPC
65
+ #include "ggml-rpc.h"
66
+ #endif
67
+
68
+ #ifdef LM_GGML_USE_CANN
69
+ #include "ggml-cann.h"
70
+ #endif
71
+
72
+ // disable C++17 deprecation warning for std::codecvt_utf8
73
+ #if defined(__clang__)
74
+ # pragma clang diagnostic push
75
+ # pragma clang diagnostic ignored "-Wdeprecated-declarations"
76
+ #elif defined(__GNUC__)
77
+ # pragma GCC diagnostic push
78
+ # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
79
+ #endif
80
+
81
+ namespace fs = std::filesystem;
82
+
83
+ static std::string path_str(const fs::path & path) {
84
+ std::string u8path;
85
+ try {
86
+ #if defined(__cpp_lib_char8_t)
87
+ // C++20 and later: u8string() returns std::u8string
88
+ std::u8string u8str = path.u8string();
89
+ u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
90
+ #else
91
+ // C++17: u8string() returns std::string
92
+ u8path = path.u8string();
93
+ #endif
94
+ } catch (...) {
95
+ }
96
+ return u8path;
97
+ }
98
+
99
+ #if defined(__clang__)
100
+ # pragma clang diagnostic pop
101
+ #elif defined(__GNUC__)
102
+ # pragma GCC diagnostic pop
103
+ #endif
104
+
105
+ #ifdef _WIN32
106
+
107
+ using dl_handle = std::remove_pointer_t<HMODULE>;
108
+
109
+ struct dl_handle_deleter {
110
+ void operator()(HMODULE handle) {
111
+ FreeLibrary(handle);
112
+ }
113
+ };
114
+
115
+ static dl_handle * dl_load_library(const fs::path & path) {
116
+ // suppress error dialogs for missing DLLs
117
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
118
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
119
+
120
+ HMODULE handle = LoadLibraryW(path.wstring().c_str());
121
+
122
+ SetErrorMode(old_mode);
123
+
124
+ return handle;
125
+ }
126
+
127
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
128
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
129
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
130
+
131
+ void * p = (void *) GetProcAddress(handle, name);
132
+
133
+ SetErrorMode(old_mode);
134
+
135
+ return p;
136
+ }
137
+
138
+ #else
139
+
140
+ using dl_handle = void;
141
+
142
+ struct dl_handle_deleter {
143
+ void operator()(void * handle) {
144
+ dlclose(handle);
145
+ }
146
+ };
147
+
148
+ static void * dl_load_library(const fs::path & path) {
149
+ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
150
+
151
+ return handle;
152
+ }
153
+
154
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
155
+ return dlsym(handle, name);
156
+ }
157
+
158
+ #endif
159
+
160
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
161
+
162
+ struct lm_ggml_backend_reg_entry {
163
+ lm_ggml_backend_reg_t reg;
164
+ dl_handle_ptr handle;
165
+ };
166
+
167
+ struct lm_ggml_backend_registry {
168
+ std::vector<lm_ggml_backend_reg_entry> backends;
169
+ std::vector<lm_ggml_backend_dev_t> devices;
170
+
171
+ lm_ggml_backend_registry() {
172
+ #ifdef LM_GGML_USE_CUDA
173
+ register_backend(lm_ggml_backend_cuda_reg());
174
+ #endif
175
+ #ifdef LM_GGML_USE_METAL
176
+ register_backend(lm_ggml_backend_metal_reg());
177
+ #endif
178
+ #ifdef LM_GGML_USE_SYCL
179
+ register_backend(lm_ggml_backend_sycl_reg());
180
+ #endif
181
+ #ifdef LM_GGML_USE_VULKAN
182
+ register_backend(lm_ggml_backend_vk_reg());
183
+ #endif
184
+ #ifdef LM_GGML_USE_WEBGPU
185
+ register_backend(lm_ggml_backend_webgpu_reg());
186
+ #endif
187
+ #ifdef LM_GGML_USE_ZDNN
188
+ register_backend(lm_ggml_backend_zdnn_reg());
189
+ #endif
190
+ #ifdef LM_GGML_USE_OPENCL
191
+ register_backend(lm_ggml_backend_opencl_reg());
192
+ #endif
193
+ #ifdef LM_GGML_USE_CANN
194
+ register_backend(lm_ggml_backend_cann_reg());
195
+ #endif
196
+ #ifdef LM_GGML_USE_BLAS
197
+ register_backend(lm_ggml_backend_blas_reg());
198
+ #endif
199
+ #ifdef LM_GGML_USE_RPC
200
+ register_backend(lm_ggml_backend_rpc_reg());
201
+ #endif
202
+ #ifdef LM_GGML_USE_CPU
203
+ register_backend(lm_ggml_backend_cpu_reg());
204
+ #endif
205
+ }
206
+
207
+ ~lm_ggml_backend_registry() {
208
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
209
+ // since backend threads may still be running and accessing resources from the dynamic library
210
+ for (auto & entry : backends) {
211
+ if (entry.handle) {
212
+ entry.handle.release(); // NOLINT
213
+ }
214
+ }
215
+ }
216
+
217
+ void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
218
+ if (!reg) {
219
+ return;
220
+ }
221
+
222
+ #ifndef NDEBUG
223
+ LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
224
+ __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
225
+ #endif
226
+ backends.push_back({ reg, std::move(handle) });
227
+ for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
228
+ register_device(lm_ggml_backend_reg_dev_get(reg, i));
229
+ }
230
+ }
231
+
232
+ void register_device(lm_ggml_backend_dev_t device) {
233
+ #ifndef NDEBUG
234
+ LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
235
+ #endif
236
+ devices.push_back(device);
237
+ }
238
+
239
+ lm_ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
240
+ dl_handle_ptr handle { dl_load_library(path) };
241
+ if (!handle) {
242
+ if (!silent) {
243
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
244
+ }
245
+ return nullptr;
246
+ }
247
+
248
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
249
+ if (score_fn && score_fn() == 0) {
250
+ if (!silent) {
251
+ LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
252
+ }
253
+ return nullptr;
254
+ }
255
+
256
+ auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
257
+ if (!backend_init_fn) {
258
+ if (!silent) {
259
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, path_str(path).c_str());
260
+ }
261
+ return nullptr;
262
+ }
263
+
264
+ lm_ggml_backend_reg_t reg = backend_init_fn();
265
+ if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
266
+ if (!silent) {
267
+ if (!reg) {
268
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n",
269
+ __func__, path_str(path).c_str());
270
+ } else {
271
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
272
+ __func__, path_str(path).c_str(), reg->api_version, LM_GGML_BACKEND_API_VERSION);
273
+ }
274
+ }
275
+ return nullptr;
276
+ }
277
+
278
+ LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path_str(path).c_str());
279
+
280
+ register_backend(reg, std::move(handle));
281
+
282
+ return reg;
283
+ }
284
+
285
+ void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
286
+ auto it = std::find_if(backends.begin(), backends.end(),
287
+ [reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
288
+
289
+ if (it == backends.end()) {
290
+ if (!silent) {
291
+ LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
292
+ }
293
+ return;
294
+ }
295
+
296
+ if (!silent) {
297
+ LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
298
+ }
299
+
300
+ // remove devices
301
+ devices.erase(
302
+ std::remove_if(devices.begin(), devices.end(),
303
+ [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
304
+ devices.end());
305
+
306
+ // remove backend
307
+ backends.erase(it);
308
+ }
309
+ };
310
+
311
+ static lm_ggml_backend_registry & get_reg() {
312
+ static lm_ggml_backend_registry reg;
313
+ return reg;
314
+ }
315
+
316
+ // Internal API
317
+ void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
318
+ get_reg().register_backend(reg);
319
+ }
320
+
321
+ void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
322
+ get_reg().register_device(device);
323
+ }
324
+
325
+ // Backend (reg) enumeration
326
+ static bool striequals(const char * a, const char * b) {
327
+ for (; *a && *b; a++, b++) {
328
+ if (std::tolower(*a) != std::tolower(*b)) {
329
+ return false;
330
+ }
331
+ }
332
+ return *a == *b;
333
+ }
334
+
335
+ size_t lm_ggml_backend_reg_count() {
336
+ return get_reg().backends.size();
337
+ }
338
+
339
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
340
+ LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
341
+ return get_reg().backends[index].reg;
342
+ }
343
+
344
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
345
+ for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
346
+ lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
347
+ if (striequals(lm_ggml_backend_reg_name(reg), name)) {
348
+ return reg;
349
+ }
350
+ }
351
+ return nullptr;
352
+ }
353
+
354
+ // Device enumeration
355
+ size_t lm_ggml_backend_dev_count() {
356
+ return get_reg().devices.size();
357
+ }
358
+
359
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
360
+ LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
361
+ return get_reg().devices[index];
362
+ }
363
+
364
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
365
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
366
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
367
+ if (striequals(lm_ggml_backend_dev_name(dev), name)) {
368
+ return dev;
369
+ }
370
+ }
371
+ return nullptr;
372
+ }
373
+
374
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
375
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
376
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
377
+ if (lm_ggml_backend_dev_type(dev) == type) {
378
+ return dev;
379
+ }
380
+ }
381
+ return nullptr;
382
+ }
383
+
384
+ // Convenience functions
385
+ lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
386
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
387
+ if (!dev) {
388
+ return nullptr;
389
+ }
390
+ return lm_ggml_backend_dev_init(dev, params);
391
+ }
392
+
393
+ lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
394
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
395
+ if (!dev) {
396
+ return nullptr;
397
+ }
398
+ return lm_ggml_backend_dev_init(dev, params);
399
+ }
400
+
401
+ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
402
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
403
+ if (!dev) {
404
+ dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
405
+ }
406
+ if (!dev) {
407
+ return nullptr;
408
+ }
409
+ return lm_ggml_backend_dev_init(dev, nullptr);
410
+ }
411
+
412
+ // Dynamic loading
413
+ lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
414
+ return get_reg().load_backend(path, false);
415
+ }
416
+
417
+ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
418
+ get_reg().unload_backend(reg, true);
419
+ }
420
+
421
+ static fs::path get_executable_path() {
422
+ #if defined(__APPLE__)
423
+ // get executable path
424
+ std::vector<char> path;
425
+ uint32_t size;
426
+ while (true) {
427
+ size = path.size();
428
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
429
+ break;
430
+ }
431
+ path.resize(size);
432
+ }
433
+ std::string base_path(path.data(), size);
434
+ // remove executable name
435
+ auto last_slash = base_path.find_last_of('/');
436
+ if (last_slash != std::string::npos) {
437
+ base_path = base_path.substr(0, last_slash);
438
+ }
439
+ return base_path + "/";
440
+ #elif defined(__linux__) || defined(__FreeBSD__)
441
+ std::string base_path = ".";
442
+ std::vector<char> path(1024);
443
+ while (true) {
444
+ // get executable path
445
+ # if defined(__linux__)
446
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
447
+ # elif defined(__FreeBSD__)
448
+ ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
449
+ # endif
450
+ if (len == -1) {
451
+ break;
452
+ }
453
+ if (len < (ssize_t) path.size()) {
454
+ base_path = std::string(path.data(), len);
455
+ // remove executable name
456
+ auto last_slash = base_path.find_last_of('/');
457
+ if (last_slash != std::string::npos) {
458
+ base_path = base_path.substr(0, last_slash);
459
+ }
460
+ break;
461
+ }
462
+ path.resize(path.size() * 2);
463
+ }
464
+
465
+ return base_path + "/";
466
+ #elif defined(_WIN32)
467
+ std::vector<wchar_t> path(MAX_PATH);
468
+ DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
469
+ if (len == 0) {
470
+ return {};
471
+ }
472
+ std::wstring base_path(path.data(), len);
473
+ // remove executable name
474
+ auto last_slash = base_path.find_last_of('\\');
475
+ if (last_slash != std::string::npos) {
476
+ base_path = base_path.substr(0, last_slash);
477
+ }
478
+ return base_path + L"\\";
479
+ #else
480
+ return {};
481
+ #endif
482
+ }
483
+
484
+ static fs::path backend_filename_prefix() {
485
+ #ifdef _WIN32
486
+ return fs::u8path("ggml-");
487
+ #else
488
+ return fs::u8path("libggml-");
489
+ #endif
490
+ }
491
+
492
+ static fs::path backend_filename_extension() {
493
+ #ifdef _WIN32
494
+ return fs::u8path(".dll");
495
+ #else
496
+ return fs::u8path(".so");
497
+ #endif
498
+ }
499
+
500
+ static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
501
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
502
+ const fs::path name_path = fs::u8path(name);
503
+ const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
504
+ const fs::path file_extension = backend_filename_extension();
505
+
506
+ std::vector<fs::path> search_paths;
507
+ if (user_search_path == nullptr) {
508
+ #ifdef LM_GGML_BACKEND_DIR
509
+ search_paths.push_back(fs::u8path(LM_GGML_BACKEND_DIR));
510
+ #endif
511
+ // default search paths: executable directory, current directory
512
+ search_paths.push_back(get_executable_path());
513
+ search_paths.push_back(fs::current_path());
514
+ } else {
515
+ search_paths.push_back(fs::u8path(user_search_path));
516
+ }
517
+
518
+ int best_score = 0;
519
+ fs::path best_path;
520
+
521
+ for (const auto & search_path : search_paths) {
522
+ if (!fs::exists(search_path)) {
523
+ LM_GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
524
+ continue;
525
+ }
526
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
527
+ for (const auto & entry : dir_it) {
528
+ if (entry.is_regular_file()) {
529
+ auto filename = entry.path().filename();
530
+ auto ext = entry.path().extension();
531
+ if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
532
+ dl_handle_ptr handle { dl_load_library(entry) };
533
+ if (!handle && !silent) {
534
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
535
+ }
536
+ if (handle) {
537
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
538
+ if (score_fn) {
539
+ int s = score_fn();
540
+ #ifndef NDEBUG
541
+ LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
542
+ #endif
543
+ if (s > best_score) {
544
+ best_score = s;
545
+ best_path = entry.path();
546
+ }
547
+ } else {
548
+ if (!silent) {
549
+ LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
550
+ }
551
+ }
552
+ }
553
+ }
554
+ }
555
+ }
556
+ }
557
+
558
+ if (best_score == 0) {
559
+ // try to load the base backend
560
+ for (const auto & search_path : search_paths) {
561
+ fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
562
+ fs::path path = search_path / filename;
563
+ if (fs::exists(path)) {
564
+ return get_reg().load_backend(path, silent);
565
+ }
566
+ }
567
+ return nullptr;
568
+ }
569
+
570
+ return get_reg().load_backend(best_path, silent);
571
+ }
572
+
573
+ void lm_ggml_backend_load_all() {
574
+ lm_ggml_backend_load_all_from_path(nullptr);
575
+ }
576
+
577
+ void lm_ggml_backend_load_all_from_path(const char * dir_path) {
578
+ #ifdef NDEBUG
579
+ bool silent = true;
580
+ #else
581
+ bool silent = false;
582
+ #endif
583
+
584
+ lm_ggml_backend_load_best("blas", silent, dir_path);
585
+ lm_ggml_backend_load_best("cann", silent, dir_path);
586
+ lm_ggml_backend_load_best("cuda", silent, dir_path);
587
+ lm_ggml_backend_load_best("hip", silent, dir_path);
588
+ lm_ggml_backend_load_best("metal", silent, dir_path);
589
+ lm_ggml_backend_load_best("rpc", silent, dir_path);
590
+ lm_ggml_backend_load_best("sycl", silent, dir_path);
591
+ lm_ggml_backend_load_best("vulkan", silent, dir_path);
592
+ lm_ggml_backend_load_best("opencl", silent, dir_path);
593
+ lm_ggml_backend_load_best("musa", silent, dir_path);
594
+ lm_ggml_backend_load_best("cpu", silent, dir_path);
595
+ // check the environment variable LM_GGML_BACKEND_PATH to load an out-of-tree backend
596
+ const char * backend_path = std::getenv("LM_GGML_BACKEND_PATH");
597
+ if (backend_path) {
598
+ lm_ggml_backend_load(backend_path);
599
+ }
600
+ }