cui-llama.rn 1.3.0 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +6 -1
- package/android/src/main/jni.cpp +6 -6
- package/cpp/amx/amx.cpp +196 -0
- package/cpp/amx/amx.h +20 -0
- package/cpp/amx/common.h +101 -0
- package/cpp/amx/mmq.cpp +2524 -0
- package/cpp/amx/mmq.h +16 -0
- package/cpp/common.cpp +1981 -1682
- package/cpp/common.h +636 -600
- package/cpp/ggml-aarch64.c +129 -129
- package/cpp/ggml-aarch64.h +19 -19
- package/cpp/ggml-alloc.c +1038 -1040
- package/cpp/ggml-alloc.h +76 -76
- package/cpp/ggml-backend-impl.h +238 -216
- package/cpp/ggml-backend-reg.cpp +423 -195
- package/cpp/ggml-backend.cpp +1999 -1997
- package/cpp/ggml-backend.h +351 -328
- package/cpp/ggml-common.h +1859 -1853
- package/cpp/ggml-cpp.h +38 -38
- package/cpp/ggml-cpu-aarch64.c +3823 -3560
- package/cpp/ggml-cpu-aarch64.h +32 -30
- package/cpp/ggml-cpu-impl.h +386 -371
- package/cpp/ggml-cpu-quants.c +10835 -10822
- package/cpp/ggml-cpu-quants.h +63 -63
- package/cpp/ggml-cpu.c +99 -103
- package/cpp/ggml-cpu.cpp +69 -17
- package/cpp/ggml-cpu.h +152 -177
- package/cpp/ggml-impl.h +556 -550
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +4426 -4294
- package/cpp/ggml-quants.c +5247 -5247
- package/cpp/ggml-quants.h +100 -100
- package/cpp/ggml-threading.cpp +12 -12
- package/cpp/ggml-threading.h +12 -12
- package/cpp/ggml.c +7618 -8180
- package/cpp/ggml.h +2255 -2411
- package/cpp/json-schema-to-grammar.cpp +1045 -0
- package/cpp/json-schema-to-grammar.h +8 -0
- package/cpp/json.hpp +24766 -0
- package/cpp/llama-grammar.cpp +1138 -1138
- package/cpp/llama-grammar.h +144 -144
- package/cpp/llama-impl.h +181 -181
- package/cpp/llama-sampling.cpp +2348 -2348
- package/cpp/llama-sampling.h +48 -48
- package/cpp/llama-vocab.cpp +1984 -1984
- package/cpp/llama-vocab.h +170 -170
- package/cpp/llama.cpp +22332 -22132
- package/cpp/llama.h +1259 -1253
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.hpp +6 -6
- package/cpp/sampling.cpp +505 -466
- package/cpp/sampling.h +22 -1
- package/cpp/sgemm.cpp +1884 -1884
- package/cpp/speculative.cpp +270 -0
- package/cpp/speculative.h +28 -0
- package/cpp/unicode.cpp +11 -0
- package/ios/RNLlamaContext.mm +13 -0
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/grammar.js +4 -2
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/grammar.js +2 -1
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +94 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/grammar.d.ts +5 -6
- package/lib/typescript/grammar.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +4 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -1
- package/src/NativeRNLlama.ts +97 -10
- package/src/grammar.ts +10 -8
- package/src/index.ts +22 -1
package/cpp/ggml-backend-reg.cpp
CHANGED
@@ -1,195 +1,423 @@
|
|
1
|
-
#include "ggml-backend-impl.h"
|
2
|
-
#include "ggml-backend.h"
|
3
|
-
#include "ggml-
|
4
|
-
#include
|
5
|
-
#include <cstring>
|
6
|
-
#include <
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
|
18
|
-
#
|
19
|
-
#include
|
20
|
-
#
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
#
|
25
|
-
|
26
|
-
#
|
27
|
-
|
28
|
-
#
|
29
|
-
|
30
|
-
#
|
31
|
-
|
32
|
-
#
|
33
|
-
|
34
|
-
#
|
35
|
-
|
36
|
-
#
|
37
|
-
|
38
|
-
#
|
39
|
-
|
40
|
-
#
|
41
|
-
|
42
|
-
#
|
43
|
-
|
44
|
-
#
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
#
|
54
|
-
#
|
55
|
-
|
56
|
-
#
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
#
|
73
|
-
|
74
|
-
|
75
|
-
#
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
#
|
88
|
-
|
89
|
-
|
90
|
-
#endif
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
}
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
}
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
return
|
154
|
-
}
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
}
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
1
|
+
#include "ggml-backend-impl.h"
|
2
|
+
#include "ggml-backend.h"
|
3
|
+
#include "ggml-impl.h"
|
4
|
+
#include <algorithm>
|
5
|
+
#include <cstring>
|
6
|
+
#include <string>
|
7
|
+
#include <vector>
|
8
|
+
|
9
|
+
#ifdef _WIN32
|
10
|
+
# define WIN32_LEAN_AND_MEAN
|
11
|
+
# ifndef NOMINMAX
|
12
|
+
# define NOMINMAX
|
13
|
+
# endif
|
14
|
+
# include <windows.h>
|
15
|
+
#elif defined(__APPLE__)
|
16
|
+
# include <mach-o/dyld.h>
|
17
|
+
# include <dlfcn.h>
|
18
|
+
#else
|
19
|
+
# include <dlfcn.h>
|
20
|
+
# include <unistd.h>
|
21
|
+
#endif
|
22
|
+
|
23
|
+
// Backend registry
|
24
|
+
#ifdef LM_GGML_USE_CPU
|
25
|
+
#include "ggml-cpu.h"
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#ifdef LM_GGML_USE_CUDA
|
29
|
+
#include "ggml-cuda.h"
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#ifdef LM_GGML_USE_METAL
|
33
|
+
#include "ggml-metal.h"
|
34
|
+
#endif
|
35
|
+
|
36
|
+
#ifdef LM_GGML_USE_SYCL
|
37
|
+
#include "ggml-sycl.h"
|
38
|
+
#endif
|
39
|
+
|
40
|
+
#ifdef LM_GGML_USE_VULKAN
|
41
|
+
#include "ggml-vulkan.h"
|
42
|
+
#endif
|
43
|
+
|
44
|
+
#ifdef LM_GGML_USE_BLAS
|
45
|
+
#include "ggml-blas.h"
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#ifdef LM_GGML_USE_RPC
|
49
|
+
#include "ggml-rpc.h"
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#ifdef LM_GGML_USE_CANN
|
53
|
+
#include "ggml-cann.h"
|
54
|
+
#endif
|
55
|
+
|
56
|
+
#ifdef LM_GGML_USE_KOMPUTE
|
57
|
+
#include "ggml-kompute.h"
|
58
|
+
#endif
|
59
|
+
|
60
|
+
struct lm_ggml_backend_reg_entry {
|
61
|
+
lm_ggml_backend_reg_t reg;
|
62
|
+
void * handle;
|
63
|
+
};
|
64
|
+
|
65
|
+
struct lm_ggml_backend_registry {
|
66
|
+
std::vector<lm_ggml_backend_reg_entry> backends;
|
67
|
+
std::vector<lm_ggml_backend_dev_t> devices;
|
68
|
+
|
69
|
+
lm_ggml_backend_registry() {
|
70
|
+
#ifdef LM_GGML_USE_CUDA
|
71
|
+
register_backend(lm_ggml_backend_cuda_reg());
|
72
|
+
#endif
|
73
|
+
#ifdef LM_GGML_USE_METAL
|
74
|
+
register_backend(lm_ggml_backend_metal_reg());
|
75
|
+
#endif
|
76
|
+
#ifdef LM_GGML_USE_SYCL
|
77
|
+
register_backend(lm_ggml_backend_sycl_reg());
|
78
|
+
#endif
|
79
|
+
#ifdef LM_GGML_USE_VULKAN
|
80
|
+
register_backend(lm_ggml_backend_vk_reg());
|
81
|
+
#endif
|
82
|
+
#ifdef LM_GGML_USE_CANN
|
83
|
+
register_backend(lm_ggml_backend_cann_reg());
|
84
|
+
#endif
|
85
|
+
#ifdef LM_GGML_USE_BLAS
|
86
|
+
register_backend(lm_ggml_backend_blas_reg());
|
87
|
+
#endif
|
88
|
+
#ifdef LM_GGML_USE_RPC
|
89
|
+
register_backend(lm_ggml_backend_rpc_reg());
|
90
|
+
#endif
|
91
|
+
#ifdef LM_GGML_USE_KOMPUTE
|
92
|
+
register_backend(lm_ggml_backend_kompute_reg());
|
93
|
+
#endif
|
94
|
+
#ifdef LM_GGML_USE_CPU
|
95
|
+
register_backend(lm_ggml_backend_cpu_reg());
|
96
|
+
#endif
|
97
|
+
}
|
98
|
+
|
99
|
+
~lm_ggml_backend_registry() {
|
100
|
+
while (!backends.empty()) {
|
101
|
+
// use silent since the log system may have been destroyed at this point
|
102
|
+
unload_backend(backends.back().reg, true);
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
void register_backend(lm_ggml_backend_reg_t reg, void * handle = nullptr) {
|
107
|
+
if (!reg) {
|
108
|
+
return;
|
109
|
+
}
|
110
|
+
|
111
|
+
#ifndef NDEBUG
|
112
|
+
LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
113
|
+
__func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
|
114
|
+
#endif
|
115
|
+
backends.push_back({ reg, handle });
|
116
|
+
for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
|
117
|
+
register_device(lm_ggml_backend_reg_dev_get(reg, i));
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
void register_device(lm_ggml_backend_dev_t device) {
|
122
|
+
#ifndef NDEBUG
|
123
|
+
LM_GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, lm_ggml_backend_dev_name(device), lm_ggml_backend_dev_description(device));
|
124
|
+
#endif
|
125
|
+
devices.push_back(device);
|
126
|
+
}
|
127
|
+
|
128
|
+
lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
129
|
+
#ifdef _WIN32
|
130
|
+
// suppress error dialogs for missing DLLs
|
131
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
132
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
133
|
+
|
134
|
+
HMODULE handle = LoadLibraryA(path);
|
135
|
+
|
136
|
+
if (!handle) {
|
137
|
+
if (!silent) {
|
138
|
+
LM_GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
|
139
|
+
}
|
140
|
+
SetErrorMode(old_mode);
|
141
|
+
return nullptr;
|
142
|
+
}
|
143
|
+
|
144
|
+
lm_ggml_backend_init_t backend_init = (lm_ggml_backend_init_t) GetProcAddress(handle, "lm_ggml_backend_init");
|
145
|
+
|
146
|
+
SetErrorMode(old_mode);
|
147
|
+
|
148
|
+
if (!backend_init) {
|
149
|
+
if (!silent) {
|
150
|
+
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
|
151
|
+
}
|
152
|
+
FreeLibrary(handle);
|
153
|
+
return nullptr;
|
154
|
+
}
|
155
|
+
#else
|
156
|
+
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
|
157
|
+
|
158
|
+
if (!handle) {
|
159
|
+
if (!silent) {
|
160
|
+
LM_GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
|
161
|
+
}
|
162
|
+
return nullptr;
|
163
|
+
}
|
164
|
+
|
165
|
+
auto * backend_init = (lm_ggml_backend_init_t) dlsym(handle, "lm_ggml_backend_init");
|
166
|
+
|
167
|
+
if (!backend_init) {
|
168
|
+
if (!silent) {
|
169
|
+
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %s\n", __func__, path, dlerror());
|
170
|
+
}
|
171
|
+
dlclose(handle);
|
172
|
+
return nullptr;
|
173
|
+
}
|
174
|
+
#endif
|
175
|
+
lm_ggml_backend_reg_t reg = backend_init();
|
176
|
+
|
177
|
+
if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
|
178
|
+
if (!silent) {
|
179
|
+
if (!reg) {
|
180
|
+
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
|
181
|
+
} else {
|
182
|
+
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
183
|
+
__func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
#ifdef _WIN32
|
187
|
+
FreeLibrary(handle);
|
188
|
+
#else
|
189
|
+
dlclose(handle);
|
190
|
+
#endif
|
191
|
+
return nullptr;
|
192
|
+
}
|
193
|
+
|
194
|
+
LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
|
195
|
+
register_backend(reg, handle);
|
196
|
+
return reg;
|
197
|
+
}
|
198
|
+
|
199
|
+
void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
|
200
|
+
auto it = std::find_if(backends.begin(), backends.end(),
|
201
|
+
[reg](lm_ggml_backend_reg_entry entry) { return entry.reg == reg; });
|
202
|
+
|
203
|
+
if (it == backends.end()) {
|
204
|
+
if (!silent) {
|
205
|
+
LM_GGML_LOG_ERROR("%s: backend not found\n", __func__);
|
206
|
+
}
|
207
|
+
return;
|
208
|
+
}
|
209
|
+
|
210
|
+
if (!silent) {
|
211
|
+
LM_GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, lm_ggml_backend_reg_name(reg));
|
212
|
+
}
|
213
|
+
|
214
|
+
// remove devices
|
215
|
+
devices.erase(
|
216
|
+
std::remove_if(devices.begin(), devices.end(),
|
217
|
+
[reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
|
218
|
+
devices.end());
|
219
|
+
|
220
|
+
// unload library
|
221
|
+
if (it->handle) {
|
222
|
+
#ifdef _WIN32
|
223
|
+
FreeLibrary((HMODULE) it->handle);
|
224
|
+
#else
|
225
|
+
dlclose(it->handle);
|
226
|
+
#endif
|
227
|
+
}
|
228
|
+
|
229
|
+
// remove backend
|
230
|
+
backends.erase(it);
|
231
|
+
}
|
232
|
+
};
|
233
|
+
|
234
|
+
static lm_ggml_backend_registry & get_reg() {
|
235
|
+
static lm_ggml_backend_registry reg;
|
236
|
+
return reg;
|
237
|
+
}
|
238
|
+
|
239
|
+
// Internal API
|
240
|
+
void lm_ggml_backend_register(lm_ggml_backend_reg_t reg) {
|
241
|
+
get_reg().register_backend(reg);
|
242
|
+
}
|
243
|
+
|
244
|
+
void lm_ggml_backend_device_register(lm_ggml_backend_dev_t device) {
|
245
|
+
get_reg().register_device(device);
|
246
|
+
}
|
247
|
+
|
248
|
+
// Backend (reg) enumeration
|
249
|
+
static bool striequals(const char * a, const char * b) {
|
250
|
+
for (; *a && *b; a++, b++) {
|
251
|
+
if (std::tolower(*a) != std::tolower(*b)) {
|
252
|
+
return false;
|
253
|
+
}
|
254
|
+
}
|
255
|
+
return *a == *b;
|
256
|
+
}
|
257
|
+
|
258
|
+
size_t lm_ggml_backend_reg_count() {
|
259
|
+
return get_reg().backends.size();
|
260
|
+
}
|
261
|
+
|
262
|
+
lm_ggml_backend_reg_t lm_ggml_backend_reg_get(size_t index) {
|
263
|
+
LM_GGML_ASSERT(index < lm_ggml_backend_reg_count());
|
264
|
+
return get_reg().backends[index].reg;
|
265
|
+
}
|
266
|
+
|
267
|
+
lm_ggml_backend_reg_t lm_ggml_backend_reg_by_name(const char * name) {
|
268
|
+
for (size_t i = 0; i < lm_ggml_backend_reg_count(); i++) {
|
269
|
+
lm_ggml_backend_reg_t reg = lm_ggml_backend_reg_get(i);
|
270
|
+
if (striequals(lm_ggml_backend_reg_name(reg), name)) {
|
271
|
+
return reg;
|
272
|
+
}
|
273
|
+
}
|
274
|
+
return nullptr;
|
275
|
+
}
|
276
|
+
|
277
|
+
// Device enumeration
|
278
|
+
size_t lm_ggml_backend_dev_count() {
|
279
|
+
return get_reg().devices.size();
|
280
|
+
}
|
281
|
+
|
282
|
+
lm_ggml_backend_dev_t lm_ggml_backend_dev_get(size_t index) {
|
283
|
+
LM_GGML_ASSERT(index < lm_ggml_backend_dev_count());
|
284
|
+
return get_reg().devices[index];
|
285
|
+
}
|
286
|
+
|
287
|
+
lm_ggml_backend_dev_t lm_ggml_backend_dev_by_name(const char * name) {
|
288
|
+
for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
|
289
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
|
290
|
+
if (striequals(lm_ggml_backend_dev_name(dev), name)) {
|
291
|
+
return dev;
|
292
|
+
}
|
293
|
+
}
|
294
|
+
return nullptr;
|
295
|
+
}
|
296
|
+
|
297
|
+
lm_ggml_backend_dev_t lm_ggml_backend_dev_by_type(enum lm_ggml_backend_dev_type type) {
|
298
|
+
for (size_t i = 0; i < lm_ggml_backend_dev_count(); i++) {
|
299
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_get(i);
|
300
|
+
if (lm_ggml_backend_dev_type(dev) == type) {
|
301
|
+
return dev;
|
302
|
+
}
|
303
|
+
}
|
304
|
+
return nullptr;
|
305
|
+
}
|
306
|
+
|
307
|
+
// Convenience functions
|
308
|
+
lm_ggml_backend_t lm_ggml_backend_init_by_name(const char * name, const char * params) {
|
309
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_name(name);
|
310
|
+
if (!dev) {
|
311
|
+
return nullptr;
|
312
|
+
}
|
313
|
+
return lm_ggml_backend_dev_init(dev, params);
|
314
|
+
}
|
315
|
+
|
316
|
+
lm_ggml_backend_t lm_ggml_backend_init_by_type(enum lm_ggml_backend_dev_type type, const char * params) {
|
317
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(type);
|
318
|
+
if (!dev) {
|
319
|
+
return nullptr;
|
320
|
+
}
|
321
|
+
return lm_ggml_backend_dev_init(dev, params);
|
322
|
+
}
|
323
|
+
|
324
|
+
lm_ggml_backend_t lm_ggml_backend_init_best(void) {
|
325
|
+
lm_ggml_backend_dev_t dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_GPU);
|
326
|
+
if (!dev) {
|
327
|
+
dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
|
328
|
+
}
|
329
|
+
if (!dev) {
|
330
|
+
return nullptr;
|
331
|
+
}
|
332
|
+
return lm_ggml_backend_dev_init(dev, nullptr);
|
333
|
+
}
|
334
|
+
|
335
|
+
// Dynamic loading
|
336
|
+
lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
|
337
|
+
return get_reg().load_backend(path, false);
|
338
|
+
}
|
339
|
+
|
340
|
+
void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
|
341
|
+
get_reg().unload_backend(reg, true);
|
342
|
+
}
|
343
|
+
|
344
|
+
void lm_ggml_backend_load_all() {
|
345
|
+
std::vector<std::string> search_prefix;
|
346
|
+
|
347
|
+
// add the executable directory to the search path
|
348
|
+
// FIXME: this is convenient for development, but it should probably be disabled in production
|
349
|
+
|
350
|
+
#if defined(__APPLE__)
|
351
|
+
// get executable path
|
352
|
+
std::vector<char> path;
|
353
|
+
uint32_t size;
|
354
|
+
while (true) {
|
355
|
+
size = path.size();
|
356
|
+
if (_NSGetExecutablePath(path.data(), &size) == 0) {
|
357
|
+
break;
|
358
|
+
}
|
359
|
+
path.resize(size);
|
360
|
+
}
|
361
|
+
std::string base_path(path.data(), size);
|
362
|
+
// remove executable name
|
363
|
+
auto last_slash = base_path.find_last_of('/');
|
364
|
+
if (last_slash != std::string::npos) {
|
365
|
+
base_path = base_path.substr(0, last_slash);
|
366
|
+
}
|
367
|
+
search_prefix.push_back(base_path + "/");
|
368
|
+
#elif defined(__linux__)
|
369
|
+
std::string base_path = ".";
|
370
|
+
std::vector<char> path(1024);
|
371
|
+
while (true) {
|
372
|
+
// get executable path
|
373
|
+
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
374
|
+
if (len == -1) {
|
375
|
+
break;
|
376
|
+
}
|
377
|
+
if (len < (ssize_t) path.size()) {
|
378
|
+
base_path = std::string(path.data(), len);
|
379
|
+
// remove executable name
|
380
|
+
auto last_slash = base_path.find_last_of('/');
|
381
|
+
if (last_slash != std::string::npos) {
|
382
|
+
base_path = base_path.substr(0, last_slash);
|
383
|
+
}
|
384
|
+
break;
|
385
|
+
}
|
386
|
+
path.resize(path.size() * 2);
|
387
|
+
}
|
388
|
+
|
389
|
+
search_prefix.push_back(base_path + "/");
|
390
|
+
#endif
|
391
|
+
|
392
|
+
auto & reg = get_reg();
|
393
|
+
|
394
|
+
auto try_load = [&](const std::string & name) {
|
395
|
+
std::string os_name;
|
396
|
+
#ifdef _WIN32
|
397
|
+
os_name = "ggml-" + name + ".dll";
|
398
|
+
#else
|
399
|
+
os_name = "libggml-" + name + ".so";
|
400
|
+
#endif
|
401
|
+
if (reg.load_backend(os_name.c_str(), true)) {
|
402
|
+
return;
|
403
|
+
}
|
404
|
+
for (const auto & prefix : search_prefix) {
|
405
|
+
if (reg.load_backend((prefix + os_name).c_str(), true)) {
|
406
|
+
return;
|
407
|
+
}
|
408
|
+
}
|
409
|
+
};
|
410
|
+
|
411
|
+
try_load("amx");
|
412
|
+
try_load("blas");
|
413
|
+
try_load("cann");
|
414
|
+
try_load("cuda");
|
415
|
+
try_load("hip");
|
416
|
+
try_load("kompute");
|
417
|
+
try_load("metal");
|
418
|
+
try_load("rpc");
|
419
|
+
try_load("sycl");
|
420
|
+
try_load("vulkan");
|
421
|
+
try_load("musa");
|
422
|
+
try_load("cpu");
|
423
|
+
}
|