cui-llama.rn 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +5 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +28 -44
- package/cpp/common.h +35 -14
- package/cpp/ggml-alloc.c +0 -1
- package/cpp/ggml-backend-impl.h +38 -20
- package/cpp/ggml-backend-reg.cpp +246 -92
- package/cpp/ggml-backend.h +1 -0
- package/cpp/ggml-common.h +42 -48
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +642 -223
- package/cpp/ggml-cpu-aarch64.h +2 -26
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +14122 -13971
- package/cpp/ggml-cpu.cpp +627 -715
- package/cpp/ggml-cpu.h +0 -17
- package/cpp/ggml-impl.h +22 -6
- package/cpp/ggml-metal.m +482 -24
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml.c +284 -178
- package/cpp/ggml.h +73 -25
- package/cpp/llama-grammar.cpp +15 -15
- package/cpp/llama-grammar.h +2 -5
- package/cpp/llama-sampling.cpp +35 -90
- package/cpp/llama-vocab.cpp +7 -2
- package/cpp/llama-vocab.h +1 -1
- package/cpp/llama.cpp +1782 -586
- package/cpp/llama.h +20 -19
- package/cpp/sampling.cpp +11 -16
- package/cpp/sgemm.cpp +265 -258
- package/cpp/sgemm.h +2 -2
- package/cpp/speculative.cpp +4 -0
- package/cpp/unicode.cpp +51 -51
- package/cpp/unicode.h +9 -10
- package/lib/commonjs/index.js +38 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +36 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +2 -3
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +36 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +3 -3
- package/src/index.ts +46 -2
- package/cpp/amx/amx.cpp +0 -196
- package/cpp/amx/amx.h +0 -20
- package/cpp/amx/common.h +0 -101
- package/cpp/amx/mmq.cpp +0 -2524
- package/cpp/amx/mmq.h +0 -16
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml-backend-reg.cpp
CHANGED
@@ -2,8 +2,13 @@
|
|
2
2
|
#include "ggml-backend.h"
|
3
3
|
#include "ggml-impl.h"
|
4
4
|
#include <algorithm>
|
5
|
+
#include <codecvt>
|
5
6
|
#include <cstring>
|
7
|
+
#include <filesystem>
|
8
|
+
#include <locale>
|
9
|
+
#include <memory>
|
6
10
|
#include <string>
|
11
|
+
#include <type_traits>
|
7
12
|
#include <vector>
|
8
13
|
|
9
14
|
#ifdef _WIN32
|
@@ -41,6 +46,10 @@
|
|
41
46
|
#include "ggml-vulkan.h"
|
42
47
|
#endif
|
43
48
|
|
49
|
+
#ifdef LM_GGML_USE_OPENCL
|
50
|
+
#include "ggml-opencl.h"
|
51
|
+
#endif
|
52
|
+
|
44
53
|
#ifdef LM_GGML_USE_BLAS
|
45
54
|
#include "ggml-blas.h"
|
46
55
|
#endif
|
@@ -57,9 +66,86 @@
|
|
57
66
|
#include "ggml-kompute.h"
|
58
67
|
#endif
|
59
68
|
|
69
|
+
// disable C++17 deprecation warning for std::codecvt_utf8
|
70
|
+
#if defined(__clang__)
|
71
|
+
# pragma clang diagnostic push
|
72
|
+
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
73
|
+
#endif
|
74
|
+
|
75
|
+
static std::wstring utf8_to_utf16(const std::string & str) {
|
76
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
77
|
+
return converter.from_bytes(str);
|
78
|
+
}
|
79
|
+
|
80
|
+
static std::string utf16_to_utf8(const std::wstring & str) {
|
81
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
82
|
+
return converter.to_bytes(str);
|
83
|
+
}
|
84
|
+
|
85
|
+
#if defined(__clang__)
|
86
|
+
# pragma clang diagnostic pop
|
87
|
+
#endif
|
88
|
+
|
89
|
+
#ifdef _WIN32
|
90
|
+
|
91
|
+
using dl_handle = std::remove_pointer_t<HMODULE>;
|
92
|
+
|
93
|
+
struct dl_handle_deleter {
|
94
|
+
void operator()(HMODULE handle) {
|
95
|
+
FreeLibrary(handle);
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
static dl_handle * dl_load_library(const std::wstring & path) {
|
100
|
+
// suppress error dialogs for missing DLLs
|
101
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
102
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
103
|
+
|
104
|
+
HMODULE handle = LoadLibraryW(path.c_str());
|
105
|
+
|
106
|
+
SetErrorMode(old_mode);
|
107
|
+
|
108
|
+
return handle;
|
109
|
+
}
|
110
|
+
|
111
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
112
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
113
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
114
|
+
|
115
|
+
void * p = (void *) GetProcAddress(handle, name);
|
116
|
+
|
117
|
+
SetErrorMode(old_mode);
|
118
|
+
|
119
|
+
return p;
|
120
|
+
}
|
121
|
+
|
122
|
+
#else
|
123
|
+
|
124
|
+
using dl_handle = void;
|
125
|
+
|
126
|
+
struct dl_handle_deleter {
|
127
|
+
void operator()(void * handle) {
|
128
|
+
dlclose(handle);
|
129
|
+
}
|
130
|
+
};
|
131
|
+
|
132
|
+
static void * dl_load_library(const std::wstring & path) {
|
133
|
+
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
134
|
+
|
135
|
+
return handle;
|
136
|
+
}
|
137
|
+
|
138
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
139
|
+
return dlsym(handle, name);
|
140
|
+
}
|
141
|
+
|
142
|
+
#endif
|
143
|
+
|
144
|
+
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
145
|
+
|
60
146
|
struct lm_ggml_backend_reg_entry {
|
61
147
|
lm_ggml_backend_reg_t reg;
|
62
|
-
|
148
|
+
dl_handle_ptr handle;
|
63
149
|
};
|
64
150
|
|
65
151
|
struct lm_ggml_backend_registry {
|
@@ -79,6 +165,9 @@ struct lm_ggml_backend_registry {
|
|
79
165
|
#ifdef LM_GGML_USE_VULKAN
|
80
166
|
register_backend(lm_ggml_backend_vk_reg());
|
81
167
|
#endif
|
168
|
+
#ifdef LM_GGML_USE_OPENCL
|
169
|
+
register_backend(lm_ggml_backend_opencl_reg());
|
170
|
+
#endif
|
82
171
|
#ifdef LM_GGML_USE_CANN
|
83
172
|
register_backend(lm_ggml_backend_cann_reg());
|
84
173
|
#endif
|
@@ -97,13 +186,16 @@ struct lm_ggml_backend_registry {
|
|
97
186
|
}
|
98
187
|
|
99
188
|
~lm_ggml_backend_registry() {
|
100
|
-
|
101
|
-
|
102
|
-
|
189
|
+
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
190
|
+
// since backend threads may still be running and accessing resources from the dynamic library
|
191
|
+
for (auto & entry : backends) {
|
192
|
+
if (entry.handle) {
|
193
|
+
entry.handle.release(); // NOLINT
|
194
|
+
}
|
103
195
|
}
|
104
196
|
}
|
105
197
|
|
106
|
-
void register_backend(lm_ggml_backend_reg_t reg,
|
198
|
+
void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
107
199
|
if (!reg) {
|
108
200
|
return;
|
109
201
|
}
|
@@ -112,7 +204,7 @@ struct lm_ggml_backend_registry {
|
|
112
204
|
LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
113
205
|
__func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
|
114
206
|
#endif
|
115
|
-
backends.push_back({ reg, handle });
|
207
|
+
backends.push_back({ reg, std::move(handle) });
|
116
208
|
for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
|
117
209
|
register_device(lm_ggml_backend_reg_dev_get(reg, i));
|
118
210
|
}
|
@@ -125,80 +217,54 @@ struct lm_ggml_backend_registry {
|
|
125
217
|
devices.push_back(device);
|
126
218
|
}
|
127
219
|
|
128
|
-
lm_ggml_backend_reg_t load_backend(const
|
129
|
-
|
130
|
-
// suppress error dialogs for missing DLLs
|
131
|
-
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
132
|
-
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
133
|
-
|
134
|
-
HMODULE handle = LoadLibraryA(path);
|
135
|
-
|
220
|
+
lm_ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
221
|
+
dl_handle_ptr handle { dl_load_library(path) };
|
136
222
|
if (!handle) {
|
137
223
|
if (!silent) {
|
138
|
-
LM_GGML_LOG_ERROR("%s: failed to load %s
|
224
|
+
LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
139
225
|
}
|
140
|
-
SetErrorMode(old_mode);
|
141
226
|
return nullptr;
|
142
227
|
}
|
143
228
|
|
144
|
-
|
145
|
-
|
146
|
-
SetErrorMode(old_mode);
|
147
|
-
|
148
|
-
if (!backend_init) {
|
229
|
+
auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
|
230
|
+
if (score_fn && score_fn() == 0) {
|
149
231
|
if (!silent) {
|
150
|
-
|
232
|
+
LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
151
233
|
}
|
152
|
-
FreeLibrary(handle);
|
153
234
|
return nullptr;
|
154
235
|
}
|
155
|
-
#else
|
156
|
-
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
|
157
|
-
|
158
|
-
if (!handle) {
|
159
|
-
if (!silent) {
|
160
|
-
LM_GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
|
161
|
-
}
|
162
|
-
return nullptr;
|
163
|
-
}
|
164
|
-
|
165
|
-
auto * backend_init = (lm_ggml_backend_init_t) dlsym(handle, "lm_ggml_backend_init");
|
166
236
|
|
167
|
-
|
237
|
+
auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
|
238
|
+
if (!backend_init_fn) {
|
168
239
|
if (!silent) {
|
169
|
-
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s
|
240
|
+
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
170
241
|
}
|
171
|
-
dlclose(handle);
|
172
242
|
return nullptr;
|
173
243
|
}
|
174
|
-
#endif
|
175
|
-
lm_ggml_backend_reg_t reg = backend_init();
|
176
244
|
|
245
|
+
lm_ggml_backend_reg_t reg = backend_init_fn();
|
177
246
|
if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
|
178
247
|
if (!silent) {
|
179
248
|
if (!reg) {
|
180
|
-
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
|
249
|
+
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
181
250
|
} else {
|
182
251
|
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
183
|
-
|
252
|
+
__func__, utf16_to_utf8(path).c_str(), reg->api_version, LM_GGML_BACKEND_API_VERSION);
|
184
253
|
}
|
185
254
|
}
|
186
|
-
#ifdef _WIN32
|
187
|
-
FreeLibrary(handle);
|
188
|
-
#else
|
189
|
-
dlclose(handle);
|
190
|
-
#endif
|
191
255
|
return nullptr;
|
192
256
|
}
|
193
257
|
|
194
|
-
LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
|
195
|
-
|
258
|
+
LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
259
|
+
|
260
|
+
register_backend(reg, std::move(handle));
|
261
|
+
|
196
262
|
return reg;
|
197
263
|
}
|
198
264
|
|
199
265
|
void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
|
200
266
|
auto it = std::find_if(backends.begin(), backends.end(),
|
201
|
-
|
267
|
+
[reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
202
268
|
|
203
269
|
if (it == backends.end()) {
|
204
270
|
if (!silent) {
|
@@ -217,15 +283,6 @@ struct lm_ggml_backend_registry {
|
|
217
283
|
[reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
|
218
284
|
devices.end());
|
219
285
|
|
220
|
-
// unload library
|
221
|
-
if (it->handle) {
|
222
|
-
#ifdef _WIN32
|
223
|
-
FreeLibrary((HMODULE) it->handle);
|
224
|
-
#else
|
225
|
-
dlclose(it->handle);
|
226
|
-
#endif
|
227
|
-
}
|
228
|
-
|
229
286
|
// remove backend
|
230
287
|
backends.erase(it);
|
231
288
|
}
|
@@ -334,19 +391,14 @@ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
|
|
334
391
|
|
335
392
|
// Dynamic loading
|
336
393
|
lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
|
337
|
-
return get_reg().load_backend(path, false);
|
394
|
+
return get_reg().load_backend(utf8_to_utf16(path), false);
|
338
395
|
}
|
339
396
|
|
340
397
|
void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
|
341
398
|
get_reg().unload_backend(reg, true);
|
342
399
|
}
|
343
400
|
|
344
|
-
|
345
|
-
std::vector<std::string> search_prefix;
|
346
|
-
|
347
|
-
// add the executable directory to the search path
|
348
|
-
// FIXME: this is convenient for development, but it should probably be disabled in production
|
349
|
-
|
401
|
+
static std::wstring get_executable_path() {
|
350
402
|
#if defined(__APPLE__)
|
351
403
|
// get executable path
|
352
404
|
std::vector<char> path;
|
@@ -364,13 +416,17 @@ void lm_ggml_backend_load_all() {
|
|
364
416
|
if (last_slash != std::string::npos) {
|
365
417
|
base_path = base_path.substr(0, last_slash);
|
366
418
|
}
|
367
|
-
|
368
|
-
#elif defined(__linux__)
|
419
|
+
return utf8_to_utf16(base_path + "/");
|
420
|
+
#elif defined(__linux__) || defined(__FreeBSD__)
|
369
421
|
std::string base_path = ".";
|
370
422
|
std::vector<char> path(1024);
|
371
423
|
while (true) {
|
372
424
|
// get executable path
|
425
|
+
# if defined(__linux__)
|
373
426
|
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
427
|
+
# elif defined(__FreeBSD__)
|
428
|
+
ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
|
429
|
+
# endif
|
374
430
|
if (len == -1) {
|
375
431
|
break;
|
376
432
|
}
|
@@ -386,38 +442,136 @@ void lm_ggml_backend_load_all() {
|
|
386
442
|
path.resize(path.size() * 2);
|
387
443
|
}
|
388
444
|
|
389
|
-
|
445
|
+
return utf8_to_utf16(base_path + "/");
|
446
|
+
#elif defined(_WIN32)
|
447
|
+
std::vector<wchar_t> path(MAX_PATH);
|
448
|
+
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
449
|
+
if (len == 0) {
|
450
|
+
return {};
|
451
|
+
}
|
452
|
+
std::wstring base_path(path.data(), len);
|
453
|
+
// remove executable name
|
454
|
+
auto last_slash = base_path.find_last_of('\\');
|
455
|
+
if (last_slash != std::string::npos) {
|
456
|
+
base_path = base_path.substr(0, last_slash);
|
457
|
+
}
|
458
|
+
return base_path + L"\\";
|
459
|
+
#else
|
460
|
+
return {};
|
390
461
|
#endif
|
462
|
+
}
|
391
463
|
|
392
|
-
|
464
|
+
static std::wstring backend_filename_prefix() {
|
465
|
+
#ifdef _WIN32
|
466
|
+
return L"ggml-";
|
467
|
+
#else
|
468
|
+
return L"libggml-";
|
469
|
+
#endif
|
470
|
+
}
|
393
471
|
|
394
|
-
|
395
|
-
std::string os_name;
|
472
|
+
static std::wstring backend_filename_suffix() {
|
396
473
|
#ifdef _WIN32
|
397
|
-
|
474
|
+
return L".dll";
|
398
475
|
#else
|
399
|
-
|
476
|
+
return L".so";
|
400
477
|
#endif
|
401
|
-
|
402
|
-
|
478
|
+
}
|
479
|
+
|
480
|
+
static std::wstring path_separator() {
|
481
|
+
#ifdef _WIN32
|
482
|
+
return L"\\";
|
483
|
+
#else
|
484
|
+
return L"/";
|
485
|
+
#endif
|
486
|
+
}
|
487
|
+
|
488
|
+
static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
489
|
+
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
490
|
+
// TODO: search system paths
|
491
|
+
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
492
|
+
std::vector<std::wstring> search_paths;
|
493
|
+
if (user_search_path == nullptr) {
|
494
|
+
search_paths.push_back(L"." + path_separator());
|
495
|
+
search_paths.push_back(get_executable_path());
|
496
|
+
} else {
|
497
|
+
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
498
|
+
}
|
499
|
+
|
500
|
+
int best_score = 0;
|
501
|
+
std::wstring best_path;
|
502
|
+
|
503
|
+
namespace fs = std::filesystem;
|
504
|
+
for (const auto & search_path : search_paths) {
|
505
|
+
if (!fs::exists(search_path)) {
|
506
|
+
continue;
|
403
507
|
}
|
404
|
-
|
405
|
-
|
406
|
-
|
508
|
+
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
509
|
+
for (const auto & entry : dir_it) {
|
510
|
+
if (entry.is_regular_file()) {
|
511
|
+
std::wstring filename = entry.path().filename().wstring();
|
512
|
+
std::wstring ext = entry.path().extension().wstring();
|
513
|
+
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
514
|
+
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
515
|
+
if (!handle && !silent) {
|
516
|
+
LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
517
|
+
}
|
518
|
+
if (handle) {
|
519
|
+
auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
|
520
|
+
if (score_fn) {
|
521
|
+
int s = score_fn();
|
522
|
+
#ifndef NDEBUG
|
523
|
+
LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
524
|
+
#endif
|
525
|
+
if (s > best_score) {
|
526
|
+
best_score = s;
|
527
|
+
best_path = entry.path().wstring();
|
528
|
+
}
|
529
|
+
} else {
|
530
|
+
if (!silent) {
|
531
|
+
LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
532
|
+
}
|
533
|
+
}
|
534
|
+
}
|
535
|
+
}
|
407
536
|
}
|
408
537
|
}
|
409
|
-
}
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
538
|
+
}
|
539
|
+
|
540
|
+
if (best_score == 0) {
|
541
|
+
// try to load the base backend
|
542
|
+
for (const auto & search_path : search_paths) {
|
543
|
+
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
544
|
+
if (fs::exists(path)) {
|
545
|
+
return get_reg().load_backend(path, silent);
|
546
|
+
}
|
547
|
+
}
|
548
|
+
return nullptr;
|
549
|
+
}
|
550
|
+
|
551
|
+
return get_reg().load_backend(best_path, silent);
|
552
|
+
}
|
553
|
+
|
554
|
+
void lm_ggml_backend_load_all() {
|
555
|
+
lm_ggml_backend_load_all_from_path(nullptr);
|
556
|
+
}
|
557
|
+
|
558
|
+
void lm_ggml_backend_load_all_from_path(const char * dir_path) {
|
559
|
+
#ifdef NDEBUG
|
560
|
+
bool silent = true;
|
561
|
+
#else
|
562
|
+
bool silent = false;
|
563
|
+
#endif
|
564
|
+
|
565
|
+
lm_ggml_backend_load_best("blas", silent, dir_path);
|
566
|
+
lm_ggml_backend_load_best("cann", silent, dir_path);
|
567
|
+
lm_ggml_backend_load_best("cuda", silent, dir_path);
|
568
|
+
lm_ggml_backend_load_best("hip", silent, dir_path);
|
569
|
+
lm_ggml_backend_load_best("kompute", silent, dir_path);
|
570
|
+
lm_ggml_backend_load_best("metal", silent, dir_path);
|
571
|
+
lm_ggml_backend_load_best("rpc", silent, dir_path);
|
572
|
+
lm_ggml_backend_load_best("sycl", silent, dir_path);
|
573
|
+
lm_ggml_backend_load_best("vulkan", silent, dir_path);
|
574
|
+
lm_ggml_backend_load_best("opencl", silent, dir_path);
|
575
|
+
lm_ggml_backend_load_best("musa", silent, dir_path);
|
576
|
+
lm_ggml_backend_load_best("cpu", silent, dir_path);
|
423
577
|
}
|
package/cpp/ggml-backend.h
CHANGED
@@ -228,6 +228,7 @@ extern "C" {
|
|
228
228
|
LM_GGML_API void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg);
|
229
229
|
// Load all known backends from dynamic libraries
|
230
230
|
LM_GGML_API void lm_ggml_backend_load_all(void);
|
231
|
+
LM_GGML_API void lm_ggml_backend_load_all_from_path(const char * dir_path);
|
231
232
|
|
232
233
|
//
|
233
234
|
// Backend scheduler
|
package/cpp/ggml-common.h
CHANGED
@@ -6,7 +6,20 @@
|
|
6
6
|
typedef uint16_t lm_ggml_half;
|
7
7
|
typedef uint32_t lm_ggml_half2;
|
8
8
|
|
9
|
-
#define
|
9
|
+
#define LM_GGML_COMMON_AGGR_U
|
10
|
+
#define LM_GGML_COMMON_AGGR_S
|
11
|
+
|
12
|
+
#define LM_GGML_COMMON_DECL
|
13
|
+
#elif defined(LM_GGML_COMMON_DECL_CPP)
|
14
|
+
#include <cstdint>
|
15
|
+
|
16
|
+
typedef uint16_t lm_ggml_half;
|
17
|
+
typedef uint32_t lm_ggml_half2;
|
18
|
+
|
19
|
+
// std-c++ allow anonymous unions but some compiler warn on it
|
20
|
+
#define LM_GGML_COMMON_AGGR_U data
|
21
|
+
// std-c++ do not allow it.
|
22
|
+
#define LM_GGML_COMMON_AGGR_S data
|
10
23
|
|
11
24
|
#define LM_GGML_COMMON_DECL
|
12
25
|
#elif defined(LM_GGML_COMMON_DECL_METAL)
|
@@ -15,7 +28,8 @@ typedef uint32_t lm_ggml_half2;
|
|
15
28
|
typedef half lm_ggml_half;
|
16
29
|
typedef half2 lm_ggml_half2;
|
17
30
|
|
18
|
-
#define
|
31
|
+
#define LM_GGML_COMMON_AGGR_U
|
32
|
+
#define LM_GGML_COMMON_AGGR_S
|
19
33
|
|
20
34
|
#define LM_GGML_COMMON_DECL
|
21
35
|
#elif defined(LM_GGML_COMMON_DECL_CUDA)
|
@@ -29,7 +43,8 @@ typedef half2 lm_ggml_half2;
|
|
29
43
|
typedef half lm_ggml_half;
|
30
44
|
typedef half2 lm_ggml_half2;
|
31
45
|
|
32
|
-
#define
|
46
|
+
#define LM_GGML_COMMON_AGGR_U
|
47
|
+
#define LM_GGML_COMMON_AGGR_S data
|
33
48
|
|
34
49
|
#define LM_GGML_COMMON_DECL
|
35
50
|
#elif defined(LM_GGML_COMMON_DECL_HIP)
|
@@ -39,7 +54,8 @@ typedef half2 lm_ggml_half2;
|
|
39
54
|
typedef half lm_ggml_half;
|
40
55
|
typedef half2 lm_ggml_half2;
|
41
56
|
|
42
|
-
#define
|
57
|
+
#define LM_GGML_COMMON_AGGR_U
|
58
|
+
#define LM_GGML_COMMON_AGGR_S data
|
43
59
|
|
44
60
|
#define LM_GGML_COMMON_DECL
|
45
61
|
#elif defined(LM_GGML_COMMON_DECL_SYCL)
|
@@ -49,7 +65,8 @@ typedef half2 lm_ggml_half2;
|
|
49
65
|
typedef sycl::half lm_ggml_half;
|
50
66
|
typedef sycl::half2 lm_ggml_half2;
|
51
67
|
|
52
|
-
#define
|
68
|
+
#define LM_GGML_COMMON_AGGR_U
|
69
|
+
#define LM_GGML_COMMON_AGGR_S data
|
53
70
|
|
54
71
|
#define LM_GGML_COMMON_DECL
|
55
72
|
#endif
|
@@ -154,9 +171,9 @@ typedef struct {
|
|
154
171
|
struct {
|
155
172
|
lm_ggml_half d; // delta
|
156
173
|
lm_ggml_half m; // min
|
157
|
-
}
|
174
|
+
} LM_GGML_COMMON_AGGR_S;
|
158
175
|
lm_ggml_half2 dm;
|
159
|
-
};
|
176
|
+
} LM_GGML_COMMON_AGGR_U;
|
160
177
|
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
161
178
|
} block_q4_1;
|
162
179
|
static_assert(sizeof(block_q4_1) == 2 * sizeof(lm_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
@@ -175,9 +192,9 @@ typedef struct {
|
|
175
192
|
struct {
|
176
193
|
lm_ggml_half d; // delta
|
177
194
|
lm_ggml_half m; // min
|
178
|
-
}
|
195
|
+
} LM_GGML_COMMON_AGGR_S;
|
179
196
|
lm_ggml_half2 dm;
|
180
|
-
};
|
197
|
+
} LM_GGML_COMMON_AGGR_U;
|
181
198
|
uint8_t qh[4]; // 5-th bit of quants
|
182
199
|
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
183
200
|
} block_q5_1;
|
@@ -196,37 +213,13 @@ typedef struct {
|
|
196
213
|
struct {
|
197
214
|
lm_ggml_half d; // delta
|
198
215
|
lm_ggml_half s; // d * sum(qs[i])
|
199
|
-
}
|
216
|
+
} LM_GGML_COMMON_AGGR_S;
|
200
217
|
lm_ggml_half2 ds;
|
201
|
-
};
|
218
|
+
} LM_GGML_COMMON_AGGR_U;
|
202
219
|
int8_t qs[QK8_1]; // quants
|
203
220
|
} block_q8_1;
|
204
221
|
static_assert(sizeof(block_q8_1) == 2*sizeof(lm_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
|
205
222
|
|
206
|
-
typedef struct {
|
207
|
-
lm_ggml_half d[4]; // deltas for 4 q4_0 blocks
|
208
|
-
uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
|
209
|
-
} block_q4_0x4;
|
210
|
-
static_assert(sizeof(block_q4_0x4) == 4 * sizeof(lm_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
|
211
|
-
|
212
|
-
typedef struct {
|
213
|
-
lm_ggml_half d[8]; // deltas for 8 q4_0 blocks
|
214
|
-
uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
|
215
|
-
} block_q4_0x8;
|
216
|
-
static_assert(sizeof(block_q4_0x8) == 8 * sizeof(lm_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
|
217
|
-
|
218
|
-
typedef struct {
|
219
|
-
lm_ggml_half d[4]; // deltas for 4 q8_0 blocks
|
220
|
-
int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
|
221
|
-
} block_q8_0x4;
|
222
|
-
static_assert(sizeof(block_q8_0x4) == 4 * sizeof(lm_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
|
223
|
-
|
224
|
-
typedef struct {
|
225
|
-
lm_ggml_half d[8]; // deltas for 8 q8_0 blocks
|
226
|
-
int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
|
227
|
-
} block_q8_0x8;
|
228
|
-
static_assert(sizeof(block_q8_0x8) == 8 * sizeof(lm_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
|
229
|
-
|
230
223
|
//
|
231
224
|
// Ternary quantization
|
232
225
|
//
|
@@ -261,9 +254,9 @@ typedef struct {
|
|
261
254
|
struct {
|
262
255
|
lm_ggml_half d; // super-block scale for quantized scales
|
263
256
|
lm_ggml_half dmin; // super-block scale for quantized mins
|
264
|
-
}
|
257
|
+
} LM_GGML_COMMON_AGGR_S;
|
265
258
|
lm_ggml_half2 dm;
|
266
|
-
};
|
259
|
+
} LM_GGML_COMMON_AGGR_U;
|
267
260
|
} block_q2_K;
|
268
261
|
static_assert(sizeof(block_q2_K) == 2*sizeof(lm_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
269
262
|
|
@@ -288,9 +281,9 @@ typedef struct {
|
|
288
281
|
struct {
|
289
282
|
lm_ggml_half d; // super-block scale for quantized scales
|
290
283
|
lm_ggml_half dmin; // super-block scale for quantized mins
|
291
|
-
}
|
284
|
+
} LM_GGML_COMMON_AGGR_S;
|
292
285
|
lm_ggml_half2 dm;
|
293
|
-
};
|
286
|
+
} LM_GGML_COMMON_AGGR_U;
|
294
287
|
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
295
288
|
uint8_t qs[QK_K/2]; // 4--bit quants
|
296
289
|
} block_q4_K;
|
@@ -305,9 +298,9 @@ typedef struct {
|
|
305
298
|
struct {
|
306
299
|
lm_ggml_half d; // super-block scale for quantized scales
|
307
300
|
lm_ggml_half dmin; // super-block scale for quantized mins
|
308
|
-
}
|
301
|
+
} LM_GGML_COMMON_AGGR_S;
|
309
302
|
lm_ggml_half2 dm;
|
310
|
-
};
|
303
|
+
} LM_GGML_COMMON_AGGR_U;
|
311
304
|
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
312
305
|
uint8_t qh[QK_K/8]; // quants, high bit
|
313
306
|
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
@@ -418,12 +411,6 @@ typedef struct {
|
|
418
411
|
} block_iq4_xs;
|
419
412
|
static_assert(sizeof(block_iq4_xs) == sizeof(lm_ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
420
413
|
|
421
|
-
typedef struct {
|
422
|
-
lm_ggml_half d[4]; // deltas for 4 iq4_nl blocks
|
423
|
-
uint8_t qs[QK4_NL * 2];// nibbles / quants for 4 iq4_nl blocks
|
424
|
-
} block_iq4_nlx4;
|
425
|
-
static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
|
426
|
-
|
427
414
|
#endif // LM_GGML_COMMON_DECL
|
428
415
|
#endif // LM_GGML_COMMON_DECL
|
429
416
|
|
@@ -437,6 +424,13 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "
|
|
437
424
|
#define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
438
425
|
#define LM_GGML_TABLE_END() };
|
439
426
|
|
427
|
+
#define LM_GGML_COMMON_IMPL
|
428
|
+
#elif defined(LM_GGML_COMMON_IMPL_CPP)
|
429
|
+
#include <cstdint>
|
430
|
+
|
431
|
+
#define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
432
|
+
#define LM_GGML_TABLE_END() };
|
433
|
+
|
440
434
|
#define LM_GGML_COMMON_IMPL
|
441
435
|
#elif defined(LM_GGML_COMMON_IMPL_METAL)
|
442
436
|
#include <metal_stdlib>
|
@@ -479,7 +473,7 @@ LM_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
|
|
479
473
|
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
480
474
|
LM_GGML_TABLE_END()
|
481
475
|
|
482
|
-
//#if __CUDA_ARCH__ >=
|
476
|
+
//#if __CUDA_ARCH__ >= LM_GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
|
483
477
|
LM_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
484
478
|
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
485
479
|
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|