cui-llama.rn 1.3.0 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/android/src/main/CMakeLists.txt +9 -6
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
  3. package/android/src/main/jni.cpp +15 -15
  4. package/cpp/common.cpp +1962 -1682
  5. package/cpp/common.h +645 -600
  6. package/cpp/ggml-alloc.c +1038 -1040
  7. package/cpp/ggml-alloc.h +76 -76
  8. package/cpp/ggml-backend-impl.h +256 -216
  9. package/cpp/ggml-backend-reg.cpp +552 -195
  10. package/cpp/ggml-backend.cpp +1999 -1997
  11. package/cpp/ggml-backend.h +352 -328
  12. package/cpp/ggml-common.h +1853 -1853
  13. package/cpp/ggml-cpp.h +38 -38
  14. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +4262 -3560
  15. package/cpp/ggml-cpu-aarch64.h +8 -30
  16. package/cpp/ggml-cpu-impl.h +386 -371
  17. package/cpp/ggml-cpu-quants.c +10835 -10822
  18. package/cpp/ggml-cpu-quants.h +63 -63
  19. package/cpp/ggml-cpu-traits.cpp +36 -0
  20. package/cpp/ggml-cpu-traits.h +38 -0
  21. package/cpp/ggml-cpu.c +14122 -13975
  22. package/cpp/ggml-cpu.cpp +618 -663
  23. package/cpp/ggml-cpu.h +135 -177
  24. package/cpp/ggml-impl.h +556 -550
  25. package/cpp/ggml-metal.h +66 -66
  26. package/cpp/ggml-metal.m +4884 -4294
  27. package/cpp/ggml-quants.c +5238 -5247
  28. package/cpp/ggml-quants.h +100 -100
  29. package/cpp/ggml-threading.cpp +12 -12
  30. package/cpp/ggml-threading.h +14 -12
  31. package/cpp/ggml.c +7707 -8180
  32. package/cpp/ggml.h +2286 -2411
  33. package/cpp/json-schema-to-grammar.cpp +1045 -0
  34. package/cpp/json-schema-to-grammar.h +8 -0
  35. package/cpp/json.hpp +24766 -0
  36. package/cpp/llama-grammar.cpp +1138 -1138
  37. package/cpp/llama-grammar.h +144 -144
  38. package/cpp/llama-impl.h +181 -181
  39. package/cpp/llama-sampling.cpp +2293 -2348
  40. package/cpp/llama-sampling.h +48 -48
  41. package/cpp/llama-vocab.cpp +1985 -1984
  42. package/cpp/llama-vocab.h +170 -170
  43. package/cpp/llama.cpp +22836 -22132
  44. package/cpp/llama.h +1263 -1253
  45. package/cpp/log.cpp +401 -401
  46. package/cpp/log.h +121 -121
  47. package/cpp/rn-llama.hpp +6 -6
  48. package/cpp/sampling.cpp +500 -466
  49. package/cpp/sampling.h +22 -1
  50. package/cpp/sgemm.cpp +1884 -1884
  51. package/cpp/speculative.cpp +274 -0
  52. package/cpp/speculative.h +28 -0
  53. package/cpp/unicode.cpp +62 -51
  54. package/cpp/unicode.h +9 -10
  55. package/ios/RNLlamaContext.mm +13 -0
  56. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  57. package/lib/commonjs/grammar.js +4 -2
  58. package/lib/commonjs/grammar.js.map +1 -1
  59. package/lib/commonjs/index.js +38 -1
  60. package/lib/commonjs/index.js.map +1 -1
  61. package/lib/module/NativeRNLlama.js.map +1 -1
  62. package/lib/module/grammar.js +2 -1
  63. package/lib/module/grammar.js.map +1 -1
  64. package/lib/module/index.js +36 -0
  65. package/lib/module/index.js.map +1 -1
  66. package/lib/typescript/NativeRNLlama.d.ts +95 -6
  67. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  68. package/lib/typescript/grammar.d.ts +5 -6
  69. package/lib/typescript/grammar.d.ts.map +1 -1
  70. package/lib/typescript/index.d.ts +40 -4
  71. package/lib/typescript/index.d.ts.map +1 -1
  72. package/package.json +2 -1
  73. package/src/NativeRNLlama.ts +99 -12
  74. package/src/grammar.ts +10 -8
  75. package/src/index.ts +68 -3
  76. package/cpp/ggml-aarch64.c +0 -129
  77. package/cpp/ggml-aarch64.h +0 -19
@@ -1,195 +1,552 @@
1
- #include "ggml-backend-impl.h"
2
- #include "ggml-backend.h"
3
- #include "ggml-cpu.h"
4
- #include "ggml-impl.h"
5
- #include <cstring>
6
- #include <vector>
7
-
8
- // Backend registry
9
-
10
- #ifdef LM_GGML_USE_CUDA
11
- #include "ggml-cuda.h"
12
- #endif
13
-
14
- #ifdef LM_GGML_USE_METAL
15
- #include "ggml-metal.h"
16
- #endif
17
-
18
- #ifdef LM_GGML_USE_SYCL
19
- #include "ggml-sycl.h"
20
- #endif
21
-
22
- #ifdef LM_GGML_USE_VULKAN
23
- #include "ggml-vulkan.h"
24
- #endif
25
-
26
- #ifdef LM_GGML_USE_BLAS
27
- #include "ggml-blas.h"
28
- #endif
29
-
30
- #ifdef LM_GGML_USE_RPC
31
- #include "ggml-rpc.h"
32
- #endif
33
-
34
- #ifdef LM_GGML_USE_AMX
35
- # include "ggml-amx.h"
36
- #endif
37
-
38
- #ifdef LM_GGML_USE_CANN
39
- #include "ggml-cann.h"
40
- #endif
41
-
42
- #ifdef LM_GGML_USE_KOMPUTE
43
- #include "ggml-kompute.h"
44
- #endif
45
-
46
- struct lm_ggml_backend_registry {
47
- std::vector<lm_ggml_backend_reg_t> backends;
48
- std::vector<lm_ggml_backend_dev_t> devices;
49
-
50
- lm_ggml_backend_registry() {
51
- #ifdef LM_GGML_USE_CUDA
52
- register_backend(lm_ggml_backend_cuda_reg());
53
- #endif
54
- #ifdef LM_GGML_USE_METAL
55
- register_backend(lm_ggml_backend_metal_reg());
56
- #endif
57
- #ifdef LM_GGML_USE_SYCL
58
- register_backend(lm_ggml_backend_sycl_reg());
59
- #endif
60
- #ifdef LM_GGML_USE_VULKAN
61
- register_backend(lm_ggml_backend_vk_reg());
62
- #endif
63
- #ifdef LM_GGML_USE_CANN
64
- register_backend(lm_ggml_backend_cann_reg());
65
- #endif
66
- #ifdef LM_GGML_USE_BLAS
67
- register_backend(lm_ggml_backend_blas_reg());
68
- #endif
69
- #ifdef LM_GGML_USE_RPC
70
- register_backend(lm_ggml_backend_rpc_reg());
71
- #endif
72
- #ifdef LM_GGML_USE_AMX
73
- register_backend(lm_ggml_backend_amx_reg());
74
- #endif
75
- #ifdef LM_GGML_USE_KOMPUTE
76
- register_backend(lm_ggml_backend_kompute_reg());
77
- #endif
78
-
79
- register_backend(lm_ggml_backend_cpu_reg());
80
- }
81
-
82
- void register_backend(lm_ggml_backend_reg_t reg) {
83
- if (!reg) {
84
- return;
85
- }
86
-
87
- #ifndef NDEBUG
88
- LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
89
- __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
90
- #endif
91
- backends.push_back(reg);
92
- for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
93
- register_device(lm_ggml_backend_reg_dev_get(reg, i));
94
- }
95
- }
96
-
97
- void register_device(lm_ggml_backend_dev_t device) {
98
- #ifndef NDEBUG
99
- LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
100
- #endif
101
- devices.push_back(device);
102
- }
103
- };
104
-
105
- static lm_ggml_backend_registry & get_reg() {
106
- static lm_ggml_backend_registry reg;
107
- return reg;
108
- }
109
-
110
- // Internal API
111
- void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
112
- get_reg().register_backend(reg);
113
- }
114
-
115
- void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
116
- get_reg().register_device(device);
117
- }
118
-
119
- // Backend (reg) enumeration
120
- size_t lm_ggml_backend_reg_count() {
121
- return get_reg().backends.size();
122
- }
123
-
124
- lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
125
- LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
126
- return get_reg().backends[index];
127
- }
128
-
129
- lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
130
- for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
131
- lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
132
- if (std::strcmp(lm_ggml_backend_reg_name(reg), name) == 0) {
133
- return reg;
134
- }
135
- }
136
- return NULL;
137
- }
138
-
139
- // Device enumeration
140
- size_t lm_ggml_backend_dev_count() {
141
- return get_reg().devices.size();
142
- }
143
-
144
- lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
145
- LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
146
- return get_reg().devices[index];
147
- }
148
-
149
- lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
150
- for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
151
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
152
- if (strcmp(lm_ggml_backend_dev_name(dev), name) == 0) {
153
- return dev;
154
- }
155
- }
156
- return NULL;
157
- }
158
-
159
- lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
160
- for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
161
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
162
- if (lm_ggml_backend_dev_type(dev) == type) {
163
- return dev;
164
- }
165
- }
166
- return NULL;
167
- }
168
-
169
- // Convenience functions
170
- lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
171
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
172
- if (!dev) {
173
- return NULL;
174
- }
175
- return lm_ggml_backend_dev_init(dev, params);
176
- }
177
-
178
- lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
179
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
180
- if (!dev) {
181
- return NULL;
182
- }
183
- return lm_ggml_backend_dev_init(dev, params);
184
- }
185
-
186
- lm_ggml_backend_t lm_ggml_backend_init_best(void) {
187
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
188
- if (!dev) {
189
- dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
190
- }
191
- if (!dev) {
192
- return NULL;
193
- }
194
- return lm_ggml_backend_dev_init(dev, NULL);
195
- }
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <codecvt>
6
+ #include <cstring>
7
+ #include <filesystem>
8
+ #include <locale>
9
+ #include <memory>
10
+ #include <string>
11
+ #include <type_traits>
12
+ #include <vector>
13
+
14
+ #ifdef _WIN32
15
+ # define WIN32_LEAN_AND_MEAN
16
+ # ifndef NOMINMAX
17
+ # define NOMINMAX
18
+ # endif
19
+ # include <windows.h>
20
+ #elif defined(__APPLE__)
21
+ # include <mach-o/dyld.h>
22
+ # include <dlfcn.h>
23
+ #else
24
+ # include <dlfcn.h>
25
+ # include <unistd.h>
26
+ #endif
27
+
28
+ // Backend registry
29
+ #ifdef LM_GGML_USE_CPU
30
+ #include "ggml-cpu.h"
31
+ #endif
32
+
33
+ #ifdef LM_GGML_USE_CUDA
34
+ #include "ggml-cuda.h"
35
+ #endif
36
+
37
+ #ifdef LM_GGML_USE_METAL
38
+ #include "ggml-metal.h"
39
+ #endif
40
+
41
+ #ifdef LM_GGML_USE_SYCL
42
+ #include "ggml-sycl.h"
43
+ #endif
44
+
45
+ #ifdef LM_GGML_USE_VULKAN
46
+ #include "ggml-vulkan.h"
47
+ #endif
48
+
49
+ #ifdef LM_GGML_USE_OPENCL
50
+ #include "ggml-opencl.h"
51
+ #endif
52
+
53
+ #ifdef LM_GGML_USE_BLAS
54
+ #include "ggml-blas.h"
55
+ #endif
56
+
57
+ #ifdef LM_GGML_USE_RPC
58
+ #include "ggml-rpc.h"
59
+ #endif
60
+
61
+ #ifdef LM_GGML_USE_CANN
62
+ #include "ggml-cann.h"
63
+ #endif
64
+
65
+ #ifdef LM_GGML_USE_KOMPUTE
66
+ #include "ggml-kompute.h"
67
+ #endif
68
+
69
+ #ifdef _WIN32
70
+
71
+ using dl_handle = std::remove_pointer_t<HMODULE>;
72
+
73
+ struct dl_handle_deleter {
74
+ void operator()(HMODULE handle) {
75
+ FreeLibrary(handle);
76
+ }
77
+ };
78
+
79
+ static dl_handle * dl_load_library(const std::wstring & path) {
80
+ // suppress error dialogs for missing DLLs
81
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
82
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
83
+
84
+ HMODULE handle = LoadLibraryW(path.c_str());
85
+
86
+ SetErrorMode(old_mode);
87
+
88
+ return handle;
89
+ }
90
+
91
+ static dl_handle * dl_load_library(const std::string & path) {
92
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
93
+ return dl_load_library(converter.from_bytes(path));
94
+ }
95
+
96
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
97
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
98
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
99
+
100
+ void * p = (void *) GetProcAddress(handle, name);
101
+
102
+ SetErrorMode(old_mode);
103
+
104
+ return p;
105
+ }
106
+
107
+ #else
108
+
109
+ using dl_handle = void;
110
+
111
+ struct dl_handle_deleter {
112
+ void operator()(void * handle) {
113
+ dlclose(handle);
114
+ }
115
+ };
116
+
117
+ static void * dl_load_library(const std::string & path) {
118
+ dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
119
+
120
+ return handle;
121
+ }
122
+
123
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
124
+ return dlsym(handle, name);
125
+ }
126
+
127
+ #endif
128
+
129
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
130
+
131
+ struct lm_ggml_backend_reg_entry {
132
+ lm_ggml_backend_reg_t reg;
133
+ dl_handle_ptr handle;
134
+ };
135
+
136
+ struct lm_ggml_backend_registry {
137
+ std::vector<lm_ggml_backend_reg_entry> backends;
138
+ std::vector<lm_ggml_backend_dev_t> devices;
139
+
140
+ lm_ggml_backend_registry() {
141
+ #ifdef LM_GGML_USE_CUDA
142
+ register_backend(lm_ggml_backend_cuda_reg());
143
+ #endif
144
+ #ifdef LM_GGML_USE_METAL
145
+ register_backend(lm_ggml_backend_metal_reg());
146
+ #endif
147
+ #ifdef LM_GGML_USE_SYCL
148
+ register_backend(lm_ggml_backend_sycl_reg());
149
+ #endif
150
+ #ifdef LM_GGML_USE_VULKAN
151
+ register_backend(lm_ggml_backend_vk_reg());
152
+ #endif
153
+ #ifdef LM_GGML_USE_OPENCL
154
+ register_backend(lm_ggml_backend_opencl_reg());
155
+ #endif
156
+ #ifdef LM_GGML_USE_CANN
157
+ register_backend(lm_ggml_backend_cann_reg());
158
+ #endif
159
+ #ifdef LM_GGML_USE_BLAS
160
+ register_backend(lm_ggml_backend_blas_reg());
161
+ #endif
162
+ #ifdef LM_GGML_USE_RPC
163
+ register_backend(lm_ggml_backend_rpc_reg());
164
+ #endif
165
+ #ifdef LM_GGML_USE_KOMPUTE
166
+ register_backend(lm_ggml_backend_kompute_reg());
167
+ #endif
168
+ #ifdef LM_GGML_USE_CPU
169
+ register_backend(lm_ggml_backend_cpu_reg());
170
+ #endif
171
+ }
172
+
173
+ ~lm_ggml_backend_registry() {
174
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
175
+ // since backend threads may still be running and accessing resources from the dynamic library
176
+ for (auto & entry : backends) {
177
+ if (entry.handle) {
178
+ entry.handle.release(); // NOLINT
179
+ }
180
+ }
181
+ }
182
+
183
+ void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
184
+ if (!reg) {
185
+ return;
186
+ }
187
+
188
+ #ifndef NDEBUG
189
+ LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
190
+ __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
191
+ #endif
192
+ backends.push_back({ reg, std::move(handle) });
193
+ for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
194
+ register_device(lm_ggml_backend_reg_dev_get(reg, i));
195
+ }
196
+ }
197
+
198
+ void register_device(lm_ggml_backend_dev_t device) {
199
+ #ifndef NDEBUG
200
+ LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
201
+ #endif
202
+ devices.push_back(device);
203
+ }
204
+
205
+ lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
206
+ dl_handle_ptr handle { dl_load_library(path) };
207
+ if (!handle) {
208
+ if (!silent) {
209
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
210
+ }
211
+ return nullptr;
212
+ }
213
+
214
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
215
+ if (score_fn && score_fn() == 0) {
216
+ if (!silent) {
217
+ LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
218
+ }
219
+ return nullptr;
220
+ }
221
+
222
+ auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
223
+ if (!backend_init_fn) {
224
+ if (!silent) {
225
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, path);
226
+ }
227
+ return nullptr;
228
+ }
229
+
230
+ lm_ggml_backend_reg_t reg = backend_init_fn();
231
+ if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
232
+ if (!silent) {
233
+ if (!reg) {
234
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
235
+ } else {
236
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
237
+ __func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
238
+ }
239
+ }
240
+ return nullptr;
241
+ }
242
+
243
+ LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
244
+
245
+ register_backend(reg, std::move(handle));
246
+
247
+ return reg;
248
+ }
249
+
250
+ void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
251
+ auto it = std::find_if(backends.begin(), backends.end(),
252
+ [reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
253
+
254
+ if (it == backends.end()) {
255
+ if (!silent) {
256
+ LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
257
+ }
258
+ return;
259
+ }
260
+
261
+ if (!silent) {
262
+ LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
263
+ }
264
+
265
+ // remove devices
266
+ devices.erase(
267
+ std::remove_if(devices.begin(), devices.end(),
268
+ [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
269
+ devices.end());
270
+
271
+ // remove backend
272
+ backends.erase(it);
273
+ }
274
+ };
275
+
276
+ static lm_ggml_backend_registry & get_reg() {
277
+ static lm_ggml_backend_registry reg;
278
+ return reg;
279
+ }
280
+
281
+ // Internal API
282
+ void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
283
+ get_reg().register_backend(reg);
284
+ }
285
+
286
+ void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
287
+ get_reg().register_device(device);
288
+ }
289
+
290
+ // Backend (reg) enumeration
291
+ static bool striequals(const char * a, const char * b) {
292
+ for (; *a && *b; a++, b++) {
293
+ if (std::tolower(*a) != std::tolower(*b)) {
294
+ return false;
295
+ }
296
+ }
297
+ return *a == *b;
298
+ }
299
+
300
+ size_t lm_ggml_backend_reg_count() {
301
+ return get_reg().backends.size();
302
+ }
303
+
304
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
305
+ LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
306
+ return get_reg().backends[index].reg;
307
+ }
308
+
309
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
310
+ for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
311
+ lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
312
+ if (striequals(lm_ggml_backend_reg_name(reg), name)) {
313
+ return reg;
314
+ }
315
+ }
316
+ return nullptr;
317
+ }
318
+
319
+ // Device enumeration
320
+ size_t lm_ggml_backend_dev_count() {
321
+ return get_reg().devices.size();
322
+ }
323
+
324
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
325
+ LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
326
+ return get_reg().devices[index];
327
+ }
328
+
329
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
330
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
331
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
332
+ if (striequals(lm_ggml_backend_dev_name(dev), name)) {
333
+ return dev;
334
+ }
335
+ }
336
+ return nullptr;
337
+ }
338
+
339
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
340
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
341
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
342
+ if (lm_ggml_backend_dev_type(dev) == type) {
343
+ return dev;
344
+ }
345
+ }
346
+ return nullptr;
347
+ }
348
+
349
+ // Convenience functions
350
+ lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
351
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
352
+ if (!dev) {
353
+ return nullptr;
354
+ }
355
+ return lm_ggml_backend_dev_init(dev, params);
356
+ }
357
+
358
+ lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
359
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
360
+ if (!dev) {
361
+ return nullptr;
362
+ }
363
+ return lm_ggml_backend_dev_init(dev, params);
364
+ }
365
+
366
+ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
367
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
368
+ if (!dev) {
369
+ dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
370
+ }
371
+ if (!dev) {
372
+ return nullptr;
373
+ }
374
+ return lm_ggml_backend_dev_init(dev, nullptr);
375
+ }
376
+
377
+ // Dynamic loading
378
+ lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
379
+ return get_reg().load_backend(path, false);
380
+ }
381
+
382
+ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
383
+ get_reg().unload_backend(reg, true);
384
+ }
385
+
386
+ static std::string get_executable_path() {
387
+ #if defined(__APPLE__)
388
+ // get executable path
389
+ std::vector<char> path;
390
+ uint32_t size;
391
+ while (true) {
392
+ size = path.size();
393
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
394
+ break;
395
+ }
396
+ path.resize(size);
397
+ }
398
+ std::string base_path(path.data(), size);
399
+ // remove executable name
400
+ auto last_slash = base_path.find_last_of('/');
401
+ if (last_slash != std::string::npos) {
402
+ base_path = base_path.substr(0, last_slash);
403
+ }
404
+ return base_path + "/";
405
+ #elif defined(__linux__)
406
+ std::string base_path = ".";
407
+ std::vector<char> path(1024);
408
+ while (true) {
409
+ // get executable path
410
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
411
+ if (len == -1) {
412
+ break;
413
+ }
414
+ if (len < (ssize_t) path.size()) {
415
+ base_path = std::string(path.data(), len);
416
+ // remove executable name
417
+ auto last_slash = base_path.find_last_of('/');
418
+ if (last_slash != std::string::npos) {
419
+ base_path = base_path.substr(0, last_slash);
420
+ }
421
+ break;
422
+ }
423
+ path.resize(path.size() * 2);
424
+ }
425
+
426
+ return base_path + "/";
427
+ #elif defined(_WIN32)
428
+ std::vector<char> path(MAX_PATH);
429
+ DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
430
+ if (len == 0) {
431
+ return "";
432
+ }
433
+ std::string base_path(path.data(), len);
434
+ // remove executable name
435
+ auto last_slash = base_path.find_last_of('\\');
436
+ if (last_slash != std::string::npos) {
437
+ base_path = base_path.substr(0, last_slash);
438
+ }
439
+ return base_path + "\\";
440
+ #endif
441
+ }
442
+
443
+ static std::string backend_filename_prefix() {
444
+ #ifdef _WIN32
445
+ return "ggml-";
446
+ #else
447
+ return "libggml-";
448
+ #endif
449
+ }
450
+
451
+ static std::string backend_filename_suffix() {
452
+ #ifdef _WIN32
453
+ return ".dll";
454
+ #else
455
+ return ".so";
456
+ #endif
457
+ }
458
+
459
+ static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
460
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
461
+ // TODO: search system paths
462
+ std::string file_prefix = backend_filename_prefix() + name + "-";
463
+ std::vector<std::string> search_paths;
464
+ if (user_search_path == nullptr) {
465
+ search_paths.push_back("./");
466
+ search_paths.push_back(get_executable_path());
467
+ } else {
468
+ #if defined(_WIN32)
469
+ search_paths.push_back(std::string(user_search_path) + "\\");
470
+ #else
471
+ search_paths.push_back(std::string(user_search_path) + "/");
472
+ #endif
473
+ }
474
+
475
+ int best_score = 0;
476
+ std::string best_path;
477
+
478
+ namespace fs = std::filesystem;
479
+ for (const auto & search_path : search_paths) {
480
+ if (!fs::exists(search_path)) {
481
+ continue;
482
+ }
483
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
484
+ for (const auto & entry : dir_it) {
485
+ if (entry.is_regular_file()) {
486
+ std::string filename = entry.path().filename().string();
487
+ std::string ext = entry.path().extension().string();
488
+ if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
489
+ dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
490
+ if (!handle && !silent) {
491
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
492
+ }
493
+ if (handle) {
494
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
495
+ if (score_fn) {
496
+ int s = score_fn();
497
+ #ifndef NDEBUG
498
+ LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
499
+ #endif
500
+ if (s > best_score) {
501
+ best_score = s;
502
+ best_path = entry.path().string();
503
+ }
504
+ } else {
505
+ if (!silent) {
506
+ LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
507
+ }
508
+ }
509
+ }
510
+ }
511
+ }
512
+ }
513
+ }
514
+
515
+ if (best_score == 0) {
516
+ // try to load the base backend
517
+ for (const auto & search_path : search_paths) {
518
+ std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
519
+ if (fs::exists(path)) {
520
+ return get_reg().load_backend(path.c_str(), silent);
521
+ }
522
+ }
523
+ return nullptr;
524
+ }
525
+
526
+ return get_reg().load_backend(best_path.c_str(), silent);
527
+ }
528
+
529
+ void lm_ggml_backend_load_all() {
530
+ lm_ggml_backend_load_all_from_path(nullptr);
531
+ }
532
+
533
+ void lm_ggml_backend_load_all_from_path(const char * dir_path) {
534
+ #ifdef NDEBUG
535
+ bool silent = true;
536
+ #else
537
+ bool silent = false;
538
+ #endif
539
+
540
+ lm_ggml_backend_load_best("blas", silent, dir_path);
541
+ lm_ggml_backend_load_best("cann", silent, dir_path);
542
+ lm_ggml_backend_load_best("cuda", silent, dir_path);
543
+ lm_ggml_backend_load_best("hip", silent, dir_path);
544
+ lm_ggml_backend_load_best("kompute", silent, dir_path);
545
+ lm_ggml_backend_load_best("metal", silent, dir_path);
546
+ lm_ggml_backend_load_best("rpc", silent, dir_path);
547
+ lm_ggml_backend_load_best("sycl", silent, dir_path);
548
+ lm_ggml_backend_load_best("vulkan", silent, dir_path);
549
+ lm_ggml_backend_load_best("opencl", silent, dir_path);
550
+ lm_ggml_backend_load_best("musa", silent, dir_path);
551
+ lm_ggml_backend_load_best("cpu", silent, dir_path);
552
+ }