cui-llama.rn 1.3.0 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/android/src/main/CMakeLists.txt +6 -1
  2. package/android/src/main/jni.cpp +6 -6
  3. package/cpp/amx/amx.cpp +196 -0
  4. package/cpp/amx/amx.h +20 -0
  5. package/cpp/amx/common.h +101 -0
  6. package/cpp/amx/mmq.cpp +2524 -0
  7. package/cpp/amx/mmq.h +16 -0
  8. package/cpp/common.cpp +1981 -1682
  9. package/cpp/common.h +636 -600
  10. package/cpp/ggml-aarch64.c +129 -129
  11. package/cpp/ggml-aarch64.h +19 -19
  12. package/cpp/ggml-alloc.c +1038 -1040
  13. package/cpp/ggml-alloc.h +76 -76
  14. package/cpp/ggml-backend-impl.h +238 -216
  15. package/cpp/ggml-backend-reg.cpp +423 -195
  16. package/cpp/ggml-backend.cpp +1999 -1997
  17. package/cpp/ggml-backend.h +351 -328
  18. package/cpp/ggml-common.h +1859 -1853
  19. package/cpp/ggml-cpp.h +38 -38
  20. package/cpp/ggml-cpu-aarch64.c +3823 -3560
  21. package/cpp/ggml-cpu-aarch64.h +32 -30
  22. package/cpp/ggml-cpu-impl.h +386 -371
  23. package/cpp/ggml-cpu-quants.c +10835 -10822
  24. package/cpp/ggml-cpu-quants.h +63 -63
  25. package/cpp/ggml-cpu.c +99 -103
  26. package/cpp/ggml-cpu.cpp +69 -17
  27. package/cpp/ggml-cpu.h +152 -177
  28. package/cpp/ggml-impl.h +556 -550
  29. package/cpp/ggml-metal.h +66 -66
  30. package/cpp/ggml-metal.m +4426 -4294
  31. package/cpp/ggml-quants.c +5247 -5247
  32. package/cpp/ggml-quants.h +100 -100
  33. package/cpp/ggml-threading.cpp +12 -12
  34. package/cpp/ggml-threading.h +12 -12
  35. package/cpp/ggml.c +7618 -8180
  36. package/cpp/ggml.h +2255 -2411
  37. package/cpp/json-schema-to-grammar.cpp +1045 -0
  38. package/cpp/json-schema-to-grammar.h +8 -0
  39. package/cpp/json.hpp +24766 -0
  40. package/cpp/llama-grammar.cpp +1138 -1138
  41. package/cpp/llama-grammar.h +144 -144
  42. package/cpp/llama-impl.h +181 -181
  43. package/cpp/llama-sampling.cpp +2348 -2348
  44. package/cpp/llama-sampling.h +48 -48
  45. package/cpp/llama-vocab.cpp +1984 -1984
  46. package/cpp/llama-vocab.h +170 -170
  47. package/cpp/llama.cpp +22332 -22132
  48. package/cpp/llama.h +1259 -1253
  49. package/cpp/log.cpp +401 -401
  50. package/cpp/log.h +121 -121
  51. package/cpp/rn-llama.hpp +6 -6
  52. package/cpp/sampling.cpp +505 -466
  53. package/cpp/sampling.h +22 -1
  54. package/cpp/sgemm.cpp +1884 -1884
  55. package/cpp/speculative.cpp +270 -0
  56. package/cpp/speculative.h +28 -0
  57. package/cpp/unicode.cpp +11 -0
  58. package/ios/RNLlamaContext.mm +13 -0
  59. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  60. package/lib/commonjs/grammar.js +4 -2
  61. package/lib/commonjs/grammar.js.map +1 -1
  62. package/lib/commonjs/index.js.map +1 -1
  63. package/lib/module/NativeRNLlama.js.map +1 -1
  64. package/lib/module/grammar.js +2 -1
  65. package/lib/module/grammar.js.map +1 -1
  66. package/lib/module/index.js.map +1 -1
  67. package/lib/typescript/NativeRNLlama.d.ts +94 -4
  68. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  69. package/lib/typescript/grammar.d.ts +5 -6
  70. package/lib/typescript/grammar.d.ts.map +1 -1
  71. package/lib/typescript/index.d.ts +4 -2
  72. package/lib/typescript/index.d.ts.map +1 -1
  73. package/package.json +2 -1
  74. package/src/NativeRNLlama.ts +97 -10
  75. package/src/grammar.ts +10 -8
  76. package/src/index.ts +22 -1
@@ -1,195 +1,423 @@
1
- #include "ggml-backend-impl.h"
2
- #include "ggml-backend.h"
3
- #include "ggml-cpu.h"
4
- #include "ggml-impl.h"
5
- #include <cstring>
6
- #include <vector>
7
-
8
- // Backend registry
9
-
10
- #ifdef LM_GGML_USE_CUDA
11
- #include "ggml-cuda.h"
12
- #endif
13
-
14
- #ifdef LM_GGML_USE_METAL
15
- #include "ggml-metal.h"
16
- #endif
17
-
18
- #ifdef LM_GGML_USE_SYCL
19
- #include "ggml-sycl.h"
20
- #endif
21
-
22
- #ifdef LM_GGML_USE_VULKAN
23
- #include "ggml-vulkan.h"
24
- #endif
25
-
26
- #ifdef LM_GGML_USE_BLAS
27
- #include "ggml-blas.h"
28
- #endif
29
-
30
- #ifdef LM_GGML_USE_RPC
31
- #include "ggml-rpc.h"
32
- #endif
33
-
34
- #ifdef LM_GGML_USE_AMX
35
- # include "ggml-amx.h"
36
- #endif
37
-
38
- #ifdef LM_GGML_USE_CANN
39
- #include "ggml-cann.h"
40
- #endif
41
-
42
- #ifdef LM_GGML_USE_KOMPUTE
43
- #include "ggml-kompute.h"
44
- #endif
45
-
46
- struct lm_ggml_backend_registry {
47
- std::vector<lm_ggml_backend_reg_t> backends;
48
- std::vector<lm_ggml_backend_dev_t> devices;
49
-
50
- lm_ggml_backend_registry() {
51
- #ifdef LM_GGML_USE_CUDA
52
- register_backend(lm_ggml_backend_cuda_reg());
53
- #endif
54
- #ifdef LM_GGML_USE_METAL
55
- register_backend(lm_ggml_backend_metal_reg());
56
- #endif
57
- #ifdef LM_GGML_USE_SYCL
58
- register_backend(lm_ggml_backend_sycl_reg());
59
- #endif
60
- #ifdef LM_GGML_USE_VULKAN
61
- register_backend(lm_ggml_backend_vk_reg());
62
- #endif
63
- #ifdef LM_GGML_USE_CANN
64
- register_backend(lm_ggml_backend_cann_reg());
65
- #endif
66
- #ifdef LM_GGML_USE_BLAS
67
- register_backend(lm_ggml_backend_blas_reg());
68
- #endif
69
- #ifdef LM_GGML_USE_RPC
70
- register_backend(lm_ggml_backend_rpc_reg());
71
- #endif
72
- #ifdef LM_GGML_USE_AMX
73
- register_backend(lm_ggml_backend_amx_reg());
74
- #endif
75
- #ifdef LM_GGML_USE_KOMPUTE
76
- register_backend(lm_ggml_backend_kompute_reg());
77
- #endif
78
-
79
- register_backend(lm_ggml_backend_cpu_reg());
80
- }
81
-
82
- void register_backend(lm_ggml_backend_reg_t reg) {
83
- if (!reg) {
84
- return;
85
- }
86
-
87
- #ifndef NDEBUG
88
- LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
89
- __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
90
- #endif
91
- backends.push_back(reg);
92
- for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
93
- register_device(lm_ggml_backend_reg_dev_get(reg, i));
94
- }
95
- }
96
-
97
- void register_device(lm_ggml_backend_dev_t device) {
98
- #ifndef NDEBUG
99
- LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
100
- #endif
101
- devices.push_back(device);
102
- }
103
- };
104
-
105
- static lm_ggml_backend_registry & get_reg() {
106
- static lm_ggml_backend_registry reg;
107
- return reg;
108
- }
109
-
110
- // Internal API
111
- void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
112
- get_reg().register_backend(reg);
113
- }
114
-
115
- void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
116
- get_reg().register_device(device);
117
- }
118
-
119
- // Backend (reg) enumeration
120
- size_t lm_ggml_backend_reg_count() {
121
- return get_reg().backends.size();
122
- }
123
-
124
- lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
125
- LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
126
- return get_reg().backends[index];
127
- }
128
-
129
- lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
130
- for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
131
- lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
132
- if (std::strcmp(lm_ggml_backend_reg_name(reg), name) == 0) {
133
- return reg;
134
- }
135
- }
136
- return NULL;
137
- }
138
-
139
- // Device enumeration
140
- size_t lm_ggml_backend_dev_count() {
141
- return get_reg().devices.size();
142
- }
143
-
144
- lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
145
- LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
146
- return get_reg().devices[index];
147
- }
148
-
149
- lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
150
- for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
151
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
152
- if (strcmp(lm_ggml_backend_dev_name(dev), name) == 0) {
153
- return dev;
154
- }
155
- }
156
- return NULL;
157
- }
158
-
159
- lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
160
- for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
161
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
162
- if (lm_ggml_backend_dev_type(dev) == type) {
163
- return dev;
164
- }
165
- }
166
- return NULL;
167
- }
168
-
169
- // Convenience functions
170
- lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
171
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
172
- if (!dev) {
173
- return NULL;
174
- }
175
- return lm_ggml_backend_dev_init(dev, params);
176
- }
177
-
178
- lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
179
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
180
- if (!dev) {
181
- return NULL;
182
- }
183
- return lm_ggml_backend_dev_init(dev, params);
184
- }
185
-
186
- lm_ggml_backend_t lm_ggml_backend_init_best(void) {
187
- lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
188
- if (!dev) {
189
- dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
190
- }
191
- if (!dev) {
192
- return NULL;
193
- }
194
- return lm_ggml_backend_dev_init(dev, NULL);
195
- }
1
+ #include "ggml-backend-impl.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include <algorithm>
5
+ #include <cstring>
6
+ #include <string>
7
+ #include <vector>
8
+
9
+ #ifdef _WIN32
10
+ # define WIN32_LEAN_AND_MEAN
11
+ # ifndef NOMINMAX
12
+ # define NOMINMAX
13
+ # endif
14
+ # include <windows.h>
15
+ #elif defined(__APPLE__)
16
+ # include <mach-o/dyld.h>
17
+ # include <dlfcn.h>
18
+ #else
19
+ # include <dlfcn.h>
20
+ # include <unistd.h>
21
+ #endif
22
+
23
+ // Backend registry
24
+ #ifdef LM_GGML_USE_CPU
25
+ #include "ggml-cpu.h"
26
+ #endif
27
+
28
+ #ifdef LM_GGML_USE_CUDA
29
+ #include "ggml-cuda.h"
30
+ #endif
31
+
32
+ #ifdef LM_GGML_USE_METAL
33
+ #include "ggml-metal.h"
34
+ #endif
35
+
36
+ #ifdef LM_GGML_USE_SYCL
37
+ #include "ggml-sycl.h"
38
+ #endif
39
+
40
+ #ifdef LM_GGML_USE_VULKAN
41
+ #include "ggml-vulkan.h"
42
+ #endif
43
+
44
+ #ifdef LM_GGML_USE_BLAS
45
+ #include "ggml-blas.h"
46
+ #endif
47
+
48
+ #ifdef LM_GGML_USE_RPC
49
+ #include "ggml-rpc.h"
50
+ #endif
51
+
52
+ #ifdef LM_GGML_USE_CANN
53
+ #include "ggml-cann.h"
54
+ #endif
55
+
56
+ #ifdef LM_GGML_USE_KOMPUTE
57
+ #include "ggml-kompute.h"
58
+ #endif
59
+
60
+ struct lm_ggml_backend_reg_entry {
61
+ lm_ggml_backend_reg_t reg;
62
+ void * handle;
63
+ };
64
+
65
+ struct lm_ggml_backend_registry {
66
+ std::vector<lm_ggml_backend_reg_entry> backends;
67
+ std::vector<lm_ggml_backend_dev_t> devices;
68
+
69
+ lm_ggml_backend_registry() {
70
+ #ifdef LM_GGML_USE_CUDA
71
+ register_backend(lm_ggml_backend_cuda_reg());
72
+ #endif
73
+ #ifdef LM_GGML_USE_METAL
74
+ register_backend(lm_ggml_backend_metal_reg());
75
+ #endif
76
+ #ifdef LM_GGML_USE_SYCL
77
+ register_backend(lm_ggml_backend_sycl_reg());
78
+ #endif
79
+ #ifdef LM_GGML_USE_VULKAN
80
+ register_backend(lm_ggml_backend_vk_reg());
81
+ #endif
82
+ #ifdef LM_GGML_USE_CANN
83
+ register_backend(lm_ggml_backend_cann_reg());
84
+ #endif
85
+ #ifdef LM_GGML_USE_BLAS
86
+ register_backend(lm_ggml_backend_blas_reg());
87
+ #endif
88
+ #ifdef LM_GGML_USE_RPC
89
+ register_backend(lm_ggml_backend_rpc_reg());
90
+ #endif
91
+ #ifdef LM_GGML_USE_KOMPUTE
92
+ register_backend(lm_ggml_backend_kompute_reg());
93
+ #endif
94
+ #ifdef LM_GGML_USE_CPU
95
+ register_backend(lm_ggml_backend_cpu_reg());
96
+ #endif
97
+ }
98
+
99
+ ~lm_ggml_backend_registry() {
100
+ while (!backends.empty()) {
101
+ // use silent since the log system may have been destroyed at this point
102
+ unload_backend(backends.back().reg, true);
103
+ }
104
+ }
105
+
106
+ void register_backend(lm_ggml_backend_reg_t reg, void * handle = nullptr) {
107
+ if (!reg) {
108
+ return;
109
+ }
110
+
111
+ #ifndef NDEBUG
112
+ LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
113
+ __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
114
+ #endif
115
+ backends.push_back({ reg, handle });
116
+ for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
117
+ register_device(lm_ggml_backend_reg_dev_get(reg, i));
118
+ }
119
+ }
120
+
121
+ void register_device(lm_ggml_backend_dev_t device) {
122
+ #ifndef NDEBUG
123
+ LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
124
+ #endif
125
+ devices.push_back(device);
126
+ }
127
+
128
+ lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
129
+ #ifdef _WIN32
130
+ // suppress error dialogs for missing DLLs
131
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
132
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
133
+
134
+ HMODULE handle = LoadLibraryA(path);
135
+
136
+ if (!handle) {
137
+ if (!silent) {
138
+ LM_GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
139
+ }
140
+ SetErrorMode(old_mode);
141
+ return nullptr;
142
+ }
143
+
144
+ lm_ggml_backend_init_t backend_init = (lm_ggml_backend_init_t) GetProcAddress(handle, "lm_ggml_backend_init");
145
+
146
+ SetErrorMode(old_mode);
147
+
148
+ if (!backend_init) {
149
+ if (!silent) {
150
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
151
+ }
152
+ FreeLibrary(handle);
153
+ return nullptr;
154
+ }
155
+ #else
156
+ void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
157
+
158
+ if (!handle) {
159
+ if (!silent) {
160
+ LM_GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
161
+ }
162
+ return nullptr;
163
+ }
164
+
165
+ auto * backend_init = (lm_ggml_backend_init_t) dlsym(handle, "lm_ggml_backend_init");
166
+
167
+ if (!backend_init) {
168
+ if (!silent) {
169
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %s\n", __func__, path, dlerror());
170
+ }
171
+ dlclose(handle);
172
+ return nullptr;
173
+ }
174
+ #endif
175
+ lm_ggml_backend_reg_t reg = backend_init();
176
+
177
+ if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
178
+ if (!silent) {
179
+ if (!reg) {
180
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
181
+ } else {
182
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
183
+ __func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
184
+ }
185
+ }
186
+ #ifdef _WIN32
187
+ FreeLibrary(handle);
188
+ #else
189
+ dlclose(handle);
190
+ #endif
191
+ return nullptr;
192
+ }
193
+
194
+ LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
195
+ register_backend(reg, handle);
196
+ return reg;
197
+ }
198
+
199
+ void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
200
+ auto it = std::find_if(backends.begin(), backends.end(),
201
+ [reg](lm_ggml_backend_reg_entry entry) { return entry.reg == reg; });
202
+
203
+ if (it == backends.end()) {
204
+ if (!silent) {
205
+ LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
206
+ }
207
+ return;
208
+ }
209
+
210
+ if (!silent) {
211
+ LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
212
+ }
213
+
214
+ // remove devices
215
+ devices.erase(
216
+ std::remove_if(devices.begin(), devices.end(),
217
+ [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
218
+ devices.end());
219
+
220
+ // unload library
221
+ if (it->handle) {
222
+ #ifdef _WIN32
223
+ FreeLibrary((HMODULE) it->handle);
224
+ #else
225
+ dlclose(it->handle);
226
+ #endif
227
+ }
228
+
229
+ // remove backend
230
+ backends.erase(it);
231
+ }
232
+ };
233
+
234
+ static lm_ggml_backend_registry & get_reg() {
235
+ static lm_ggml_backend_registry reg;
236
+ return reg;
237
+ }
238
+
239
+ // Internal API
240
+ void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
241
+ get_reg().register_backend(reg);
242
+ }
243
+
244
+ void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
245
+ get_reg().register_device(device);
246
+ }
247
+
248
+ // Backend (reg) enumeration
249
+ static bool striequals(const char * a, const char * b) {
250
+ for (; *a && *b; a++, b++) {
251
+ if (std::tolower(*a) != std::tolower(*b)) {
252
+ return false;
253
+ }
254
+ }
255
+ return *a == *b;
256
+ }
257
+
258
+ size_t lm_ggml_backend_reg_count() {
259
+ return get_reg().backends.size();
260
+ }
261
+
262
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
263
+ LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
264
+ return get_reg().backends[index].reg;
265
+ }
266
+
267
+ lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
268
+ for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
269
+ lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
270
+ if (striequals(lm_ggml_backend_reg_name(reg), name)) {
271
+ return reg;
272
+ }
273
+ }
274
+ return nullptr;
275
+ }
276
+
277
+ // Device enumeration
278
+ size_t lm_ggml_backend_dev_count() {
279
+ return get_reg().devices.size();
280
+ }
281
+
282
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
283
+ LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
284
+ return get_reg().devices[index];
285
+ }
286
+
287
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
288
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
289
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
290
+ if (striequals(lm_ggml_backend_dev_name(dev), name)) {
291
+ return dev;
292
+ }
293
+ }
294
+ return nullptr;
295
+ }
296
+
297
+ lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
298
+ for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
299
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
300
+ if (lm_ggml_backend_dev_type(dev) == type) {
301
+ return dev;
302
+ }
303
+ }
304
+ return nullptr;
305
+ }
306
+
307
+ // Convenience functions
308
+ lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
309
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
310
+ if (!dev) {
311
+ return nullptr;
312
+ }
313
+ return lm_ggml_backend_dev_init(dev, params);
314
+ }
315
+
316
+ lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
317
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
318
+ if (!dev) {
319
+ return nullptr;
320
+ }
321
+ return lm_ggml_backend_dev_init(dev, params);
322
+ }
323
+
324
+ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
325
+ lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
326
+ if (!dev) {
327
+ dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
328
+ }
329
+ if (!dev) {
330
+ return nullptr;
331
+ }
332
+ return lm_ggml_backend_dev_init(dev, nullptr);
333
+ }
334
+
335
+ // Dynamic loading
336
+ lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
337
+ return get_reg().load_backend(path, false);
338
+ }
339
+
340
+ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
341
+ get_reg().unload_backend(reg, true);
342
+ }
343
+
344
+ void lm_ggml_backend_load_all() {
345
+ std::vector<std::string> search_prefix;
346
+
347
+ // add the executable directory to the search path
348
+ // FIXME: this is convenient for development, but it should probably be disabled in production
349
+
350
+ #if defined(__APPLE__)
351
+ // get executable path
352
+ std::vector<char> path;
353
+ uint32_t size;
354
+ while (true) {
355
+ size = path.size();
356
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
357
+ break;
358
+ }
359
+ path.resize(size);
360
+ }
361
+ std::string base_path(path.data(), size);
362
+ // remove executable name
363
+ auto last_slash = base_path.find_last_of('/');
364
+ if (last_slash != std::string::npos) {
365
+ base_path = base_path.substr(0, last_slash);
366
+ }
367
+ search_prefix.push_back(base_path + "/");
368
+ #elif defined(__linux__)
369
+ std::string base_path = ".";
370
+ std::vector<char> path(1024);
371
+ while (true) {
372
+ // get executable path
373
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
374
+ if (len == -1) {
375
+ break;
376
+ }
377
+ if (len < (ssize_t) path.size()) {
378
+ base_path = std::string(path.data(), len);
379
+ // remove executable name
380
+ auto last_slash = base_path.find_last_of('/');
381
+ if (last_slash != std::string::npos) {
382
+ base_path = base_path.substr(0, last_slash);
383
+ }
384
+ break;
385
+ }
386
+ path.resize(path.size() * 2);
387
+ }
388
+
389
+ search_prefix.push_back(base_path + "/");
390
+ #endif
391
+
392
+ auto & reg = get_reg();
393
+
394
+ auto try_load = [&](const std::string & name) {
395
+ std::string os_name;
396
+ #ifdef _WIN32
397
+ os_name = "ggml-" + name + ".dll";
398
+ #else
399
+ os_name = "libggml-" + name + ".so";
400
+ #endif
401
+ if (reg.load_backend(os_name.c_str(), true)) {
402
+ return;
403
+ }
404
+ for (const auto & prefix : search_prefix) {
405
+ if (reg.load_backend((prefix + os_name).c_str(), true)) {
406
+ return;
407
+ }
408
+ }
409
+ };
410
+
411
+ try_load("amx");
412
+ try_load("blas");
413
+ try_load("cann");
414
+ try_load("cuda");
415
+ try_load("hip");
416
+ try_load("kompute");
417
+ try_load("metal");
418
+ try_load("rpc");
419
+ try_load("sycl");
420
+ try_load("vulkan");
421
+ try_load("musa");
422
+ try_load("cpu");
423
+ }