cui-llama.rn 1.3.3 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +5 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +21 -40
- package/cpp/common.h +21 -12
- package/cpp/ggml-backend-impl.h +38 -20
- package/cpp/ggml-backend-reg.cpp +216 -87
- package/cpp/ggml-backend.h +1 -0
- package/cpp/ggml-common.h +42 -48
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +591 -152
- package/cpp/ggml-cpu-aarch64.h +2 -26
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +14122 -13971
- package/cpp/ggml-cpu.cpp +618 -715
- package/cpp/ggml-cpu.h +0 -17
- package/cpp/ggml-impl.h +6 -6
- package/cpp/ggml-metal.m +482 -24
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml.c +132 -43
- package/cpp/ggml.h +44 -13
- package/cpp/llama-sampling.cpp +35 -90
- package/cpp/llama-vocab.cpp +2 -1
- package/cpp/llama.cpp +737 -233
- package/cpp/llama.h +20 -16
- package/cpp/sampling.cpp +11 -16
- package/cpp/speculative.cpp +4 -0
- package/cpp/unicode.cpp +51 -51
- package/cpp/unicode.h +9 -10
- package/lib/commonjs/index.js +38 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +36 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +2 -3
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +36 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +3 -3
- package/src/index.ts +46 -2
- package/cpp/amx/amx.cpp +0 -196
- package/cpp/amx/amx.h +0 -20
- package/cpp/amx/common.h +0 -101
- package/cpp/amx/mmq.cpp +0 -2524
- package/cpp/amx/mmq.h +0 -16
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml-backend-reg.cpp
CHANGED
@@ -2,8 +2,13 @@
|
|
2
2
|
#include "ggml-backend.h"
|
3
3
|
#include "ggml-impl.h"
|
4
4
|
#include <algorithm>
|
5
|
+
#include <codecvt>
|
5
6
|
#include <cstring>
|
7
|
+
#include <filesystem>
|
8
|
+
#include <locale>
|
9
|
+
#include <memory>
|
6
10
|
#include <string>
|
11
|
+
#include <type_traits>
|
7
12
|
#include <vector>
|
8
13
|
|
9
14
|
#ifdef _WIN32
|
@@ -41,6 +46,10 @@
|
|
41
46
|
#include "ggml-vulkan.h"
|
42
47
|
#endif
|
43
48
|
|
49
|
+
#ifdef LM_GGML_USE_OPENCL
|
50
|
+
#include "ggml-opencl.h"
|
51
|
+
#endif
|
52
|
+
|
44
53
|
#ifdef LM_GGML_USE_BLAS
|
45
54
|
#include "ggml-blas.h"
|
46
55
|
#endif
|
@@ -57,9 +66,71 @@
|
|
57
66
|
#include "ggml-kompute.h"
|
58
67
|
#endif
|
59
68
|
|
69
|
+
#ifdef _WIN32
|
70
|
+
|
71
|
+
using dl_handle = std::remove_pointer_t<HMODULE>;
|
72
|
+
|
73
|
+
struct dl_handle_deleter {
|
74
|
+
void operator()(HMODULE handle) {
|
75
|
+
FreeLibrary(handle);
|
76
|
+
}
|
77
|
+
};
|
78
|
+
|
79
|
+
static dl_handle * dl_load_library(const std::wstring & path) {
|
80
|
+
// suppress error dialogs for missing DLLs
|
81
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
82
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
83
|
+
|
84
|
+
HMODULE handle = LoadLibraryW(path.c_str());
|
85
|
+
|
86
|
+
SetErrorMode(old_mode);
|
87
|
+
|
88
|
+
return handle;
|
89
|
+
}
|
90
|
+
|
91
|
+
static dl_handle * dl_load_library(const std::string & path) {
|
92
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
93
|
+
return dl_load_library(converter.from_bytes(path));
|
94
|
+
}
|
95
|
+
|
96
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
97
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
98
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
99
|
+
|
100
|
+
void * p = (void *) GetProcAddress(handle, name);
|
101
|
+
|
102
|
+
SetErrorMode(old_mode);
|
103
|
+
|
104
|
+
return p;
|
105
|
+
}
|
106
|
+
|
107
|
+
#else
|
108
|
+
|
109
|
+
using dl_handle = void;
|
110
|
+
|
111
|
+
struct dl_handle_deleter {
|
112
|
+
void operator()(void * handle) {
|
113
|
+
dlclose(handle);
|
114
|
+
}
|
115
|
+
};
|
116
|
+
|
117
|
+
static void * dl_load_library(const std::string & path) {
|
118
|
+
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
119
|
+
|
120
|
+
return handle;
|
121
|
+
}
|
122
|
+
|
123
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
124
|
+
return dlsym(handle, name);
|
125
|
+
}
|
126
|
+
|
127
|
+
#endif
|
128
|
+
|
129
|
+
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
130
|
+
|
60
131
|
struct lm_ggml_backend_reg_entry {
|
61
132
|
lm_ggml_backend_reg_t reg;
|
62
|
-
|
133
|
+
dl_handle_ptr handle;
|
63
134
|
};
|
64
135
|
|
65
136
|
struct lm_ggml_backend_registry {
|
@@ -79,6 +150,9 @@ struct lm_ggml_backend_registry {
|
|
79
150
|
#ifdef LM_GGML_USE_VULKAN
|
80
151
|
register_backend(lm_ggml_backend_vk_reg());
|
81
152
|
#endif
|
153
|
+
#ifdef LM_GGML_USE_OPENCL
|
154
|
+
register_backend(lm_ggml_backend_opencl_reg());
|
155
|
+
#endif
|
82
156
|
#ifdef LM_GGML_USE_CANN
|
83
157
|
register_backend(lm_ggml_backend_cann_reg());
|
84
158
|
#endif
|
@@ -97,13 +171,16 @@ struct lm_ggml_backend_registry {
|
|
97
171
|
}
|
98
172
|
|
99
173
|
~lm_ggml_backend_registry() {
|
100
|
-
|
101
|
-
|
102
|
-
|
174
|
+
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
175
|
+
// since backend threads may still be running and accessing resources from the dynamic library
|
176
|
+
for (auto & entry : backends) {
|
177
|
+
if (entry.handle) {
|
178
|
+
entry.handle.release(); // NOLINT
|
179
|
+
}
|
103
180
|
}
|
104
181
|
}
|
105
182
|
|
106
|
-
void register_backend(lm_ggml_backend_reg_t reg,
|
183
|
+
void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
107
184
|
if (!reg) {
|
108
185
|
return;
|
109
186
|
}
|
@@ -112,7 +189,7 @@ struct lm_ggml_backend_registry {
|
|
112
189
|
LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
113
190
|
__func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
|
114
191
|
#endif
|
115
|
-
backends.push_back({ reg, handle });
|
192
|
+
backends.push_back({ reg, std::move(handle) });
|
116
193
|
for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
|
117
194
|
register_device(lm_ggml_backend_reg_dev_get(reg, i));
|
118
195
|
}
|
@@ -126,79 +203,53 @@ struct lm_ggml_backend_registry {
|
|
126
203
|
}
|
127
204
|
|
128
205
|
lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
129
|
-
|
130
|
-
// suppress error dialogs for missing DLLs
|
131
|
-
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
132
|
-
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
133
|
-
|
134
|
-
HMODULE handle = LoadLibraryA(path);
|
135
|
-
|
206
|
+
dl_handle_ptr handle { dl_load_library(path) };
|
136
207
|
if (!handle) {
|
137
208
|
if (!silent) {
|
138
|
-
LM_GGML_LOG_ERROR("%s: failed to load %s
|
139
|
-
}
|
140
|
-
SetErrorMode(old_mode);
|
141
|
-
return nullptr;
|
142
|
-
}
|
143
|
-
|
144
|
-
lm_ggml_backend_init_t backend_init = (lm_ggml_backend_init_t) GetProcAddress(handle, "lm_ggml_backend_init");
|
145
|
-
|
146
|
-
SetErrorMode(old_mode);
|
147
|
-
|
148
|
-
if (!backend_init) {
|
149
|
-
if (!silent) {
|
150
|
-
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
|
209
|
+
LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
|
151
210
|
}
|
152
|
-
FreeLibrary(handle);
|
153
211
|
return nullptr;
|
154
212
|
}
|
155
|
-
#else
|
156
|
-
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
|
157
213
|
|
158
|
-
|
214
|
+
auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
|
215
|
+
if (score_fn && score_fn() == 0) {
|
159
216
|
if (!silent) {
|
160
|
-
|
217
|
+
LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
|
161
218
|
}
|
162
219
|
return nullptr;
|
163
220
|
}
|
164
221
|
|
165
|
-
auto
|
166
|
-
|
167
|
-
if (!backend_init) {
|
222
|
+
auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
|
223
|
+
if (!backend_init_fn) {
|
168
224
|
if (!silent) {
|
169
|
-
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s
|
225
|
+
LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, path);
|
170
226
|
}
|
171
|
-
dlclose(handle);
|
172
227
|
return nullptr;
|
173
228
|
}
|
174
|
-
#endif
|
175
|
-
lm_ggml_backend_reg_t reg = backend_init();
|
176
229
|
|
230
|
+
lm_ggml_backend_reg_t reg = backend_init_fn();
|
177
231
|
if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
|
178
232
|
if (!silent) {
|
179
233
|
if (!reg) {
|
180
234
|
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
|
181
235
|
} else {
|
182
236
|
LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
183
|
-
|
237
|
+
__func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
|
184
238
|
}
|
185
239
|
}
|
186
|
-
#ifdef _WIN32
|
187
|
-
FreeLibrary(handle);
|
188
|
-
#else
|
189
|
-
dlclose(handle);
|
190
|
-
#endif
|
191
240
|
return nullptr;
|
192
241
|
}
|
193
242
|
|
194
243
|
LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
|
195
|
-
|
244
|
+
|
245
|
+
register_backend(reg, std::move(handle));
|
246
|
+
|
196
247
|
return reg;
|
197
248
|
}
|
198
249
|
|
199
250
|
void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
|
200
251
|
auto it = std::find_if(backends.begin(), backends.end(),
|
201
|
-
|
252
|
+
[reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
202
253
|
|
203
254
|
if (it == backends.end()) {
|
204
255
|
if (!silent) {
|
@@ -217,15 +268,6 @@ struct lm_ggml_backend_registry {
|
|
217
268
|
[reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
|
218
269
|
devices.end());
|
219
270
|
|
220
|
-
// unload library
|
221
|
-
if (it->handle) {
|
222
|
-
#ifdef _WIN32
|
223
|
-
FreeLibrary((HMODULE) it->handle);
|
224
|
-
#else
|
225
|
-
dlclose(it->handle);
|
226
|
-
#endif
|
227
|
-
}
|
228
|
-
|
229
271
|
// remove backend
|
230
272
|
backends.erase(it);
|
231
273
|
}
|
@@ -341,12 +383,7 @@ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
|
|
341
383
|
get_reg().unload_backend(reg, true);
|
342
384
|
}
|
343
385
|
|
344
|
-
|
345
|
-
std::vector<std::string> search_prefix;
|
346
|
-
|
347
|
-
// add the executable directory to the search path
|
348
|
-
// FIXME: this is convenient for development, but it should probably be disabled in production
|
349
|
-
|
386
|
+
static std::string get_executable_path() {
|
350
387
|
#if defined(__APPLE__)
|
351
388
|
// get executable path
|
352
389
|
std::vector<char> path;
|
@@ -364,7 +401,7 @@ void lm_ggml_backend_load_all() {
|
|
364
401
|
if (last_slash != std::string::npos) {
|
365
402
|
base_path = base_path.substr(0, last_slash);
|
366
403
|
}
|
367
|
-
|
404
|
+
return base_path + "/";
|
368
405
|
#elif defined(__linux__)
|
369
406
|
std::string base_path = ".";
|
370
407
|
std::vector<char> path(1024);
|
@@ -386,38 +423,130 @@ void lm_ggml_backend_load_all() {
|
|
386
423
|
path.resize(path.size() * 2);
|
387
424
|
}
|
388
425
|
|
389
|
-
|
426
|
+
return base_path + "/";
|
427
|
+
#elif defined(_WIN32)
|
428
|
+
std::vector<char> path(MAX_PATH);
|
429
|
+
DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
|
430
|
+
if (len == 0) {
|
431
|
+
return "";
|
432
|
+
}
|
433
|
+
std::string base_path(path.data(), len);
|
434
|
+
// remove executable name
|
435
|
+
auto last_slash = base_path.find_last_of('\\');
|
436
|
+
if (last_slash != std::string::npos) {
|
437
|
+
base_path = base_path.substr(0, last_slash);
|
438
|
+
}
|
439
|
+
return base_path + "\\";
|
390
440
|
#endif
|
441
|
+
}
|
391
442
|
|
392
|
-
|
443
|
+
static std::string backend_filename_prefix() {
|
444
|
+
#ifdef _WIN32
|
445
|
+
return "ggml-";
|
446
|
+
#else
|
447
|
+
return "libggml-";
|
448
|
+
#endif
|
449
|
+
}
|
393
450
|
|
394
|
-
|
395
|
-
std::string os_name;
|
451
|
+
static std::string backend_filename_suffix() {
|
396
452
|
#ifdef _WIN32
|
397
|
-
|
453
|
+
return ".dll";
|
398
454
|
#else
|
399
|
-
|
455
|
+
return ".so";
|
400
456
|
#endif
|
401
|
-
|
402
|
-
|
457
|
+
}
|
458
|
+
|
459
|
+
static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
460
|
+
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
461
|
+
// TODO: search system paths
|
462
|
+
std::string file_prefix = backend_filename_prefix() + name + "-";
|
463
|
+
std::vector<std::string> search_paths;
|
464
|
+
if (user_search_path == nullptr) {
|
465
|
+
search_paths.push_back("./");
|
466
|
+
search_paths.push_back(get_executable_path());
|
467
|
+
} else {
|
468
|
+
#if defined(_WIN32)
|
469
|
+
search_paths.push_back(std::string(user_search_path) + "\\");
|
470
|
+
#else
|
471
|
+
search_paths.push_back(std::string(user_search_path) + "/");
|
472
|
+
#endif
|
473
|
+
}
|
474
|
+
|
475
|
+
int best_score = 0;
|
476
|
+
std::string best_path;
|
477
|
+
|
478
|
+
namespace fs = std::filesystem;
|
479
|
+
for (const auto & search_path : search_paths) {
|
480
|
+
if (!fs::exists(search_path)) {
|
481
|
+
continue;
|
403
482
|
}
|
404
|
-
|
405
|
-
|
406
|
-
|
483
|
+
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
484
|
+
for (const auto & entry : dir_it) {
|
485
|
+
if (entry.is_regular_file()) {
|
486
|
+
std::string filename = entry.path().filename().string();
|
487
|
+
std::string ext = entry.path().extension().string();
|
488
|
+
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
489
|
+
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
|
490
|
+
if (!handle && !silent) {
|
491
|
+
LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
492
|
+
}
|
493
|
+
if (handle) {
|
494
|
+
auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
|
495
|
+
if (score_fn) {
|
496
|
+
int s = score_fn();
|
497
|
+
#ifndef NDEBUG
|
498
|
+
LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
499
|
+
#endif
|
500
|
+
if (s > best_score) {
|
501
|
+
best_score = s;
|
502
|
+
best_path = entry.path().string();
|
503
|
+
}
|
504
|
+
} else {
|
505
|
+
if (!silent) {
|
506
|
+
LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
507
|
+
}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
}
|
511
|
+
}
|
512
|
+
}
|
513
|
+
}
|
514
|
+
|
515
|
+
if (best_score == 0) {
|
516
|
+
// try to load the base backend
|
517
|
+
for (const auto & search_path : search_paths) {
|
518
|
+
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
|
519
|
+
if (fs::exists(path)) {
|
520
|
+
return get_reg().load_backend(path.c_str(), silent);
|
407
521
|
}
|
408
522
|
}
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
523
|
+
return nullptr;
|
524
|
+
}
|
525
|
+
|
526
|
+
return get_reg().load_backend(best_path.c_str(), silent);
|
527
|
+
}
|
528
|
+
|
529
|
+
void lm_ggml_backend_load_all() {
|
530
|
+
lm_ggml_backend_load_all_from_path(nullptr);
|
531
|
+
}
|
532
|
+
|
533
|
+
void lm_ggml_backend_load_all_from_path(const char * dir_path) {
|
534
|
+
#ifdef NDEBUG
|
535
|
+
bool silent = true;
|
536
|
+
#else
|
537
|
+
bool silent = false;
|
538
|
+
#endif
|
539
|
+
|
540
|
+
lm_ggml_backend_load_best("blas", silent, dir_path);
|
541
|
+
lm_ggml_backend_load_best("cann", silent, dir_path);
|
542
|
+
lm_ggml_backend_load_best("cuda", silent, dir_path);
|
543
|
+
lm_ggml_backend_load_best("hip", silent, dir_path);
|
544
|
+
lm_ggml_backend_load_best("kompute", silent, dir_path);
|
545
|
+
lm_ggml_backend_load_best("metal", silent, dir_path);
|
546
|
+
lm_ggml_backend_load_best("rpc", silent, dir_path);
|
547
|
+
lm_ggml_backend_load_best("sycl", silent, dir_path);
|
548
|
+
lm_ggml_backend_load_best("vulkan", silent, dir_path);
|
549
|
+
lm_ggml_backend_load_best("opencl", silent, dir_path);
|
550
|
+
lm_ggml_backend_load_best("musa", silent, dir_path);
|
551
|
+
lm_ggml_backend_load_best("cpu", silent, dir_path);
|
423
552
|
}
|
package/cpp/ggml-backend.h
CHANGED
@@ -228,6 +228,7 @@ extern "C" {
|
|
228
228
|
LM_GGML_API void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg);
|
229
229
|
// Load all known backends from dynamic libraries
|
230
230
|
LM_GGML_API void lm_ggml_backend_load_all(void);
|
231
|
+
LM_GGML_API void lm_ggml_backend_load_all_from_path(const char * dir_path);
|
231
232
|
|
232
233
|
//
|
233
234
|
// Backend scheduler
|
package/cpp/ggml-common.h
CHANGED
@@ -6,7 +6,20 @@
|
|
6
6
|
typedef uint16_t lm_ggml_half;
|
7
7
|
typedef uint32_t lm_ggml_half2;
|
8
8
|
|
9
|
-
#define
|
9
|
+
#define LM_GGML_COMMON_AGGR_U
|
10
|
+
#define LM_GGML_COMMON_AGGR_S
|
11
|
+
|
12
|
+
#define LM_GGML_COMMON_DECL
|
13
|
+
#elif defined(LM_GGML_COMMON_DECL_CPP)
|
14
|
+
#include <cstdint>
|
15
|
+
|
16
|
+
typedef uint16_t lm_ggml_half;
|
17
|
+
typedef uint32_t lm_ggml_half2;
|
18
|
+
|
19
|
+
// std-c++ allow anonymous unions but some compiler warn on it
|
20
|
+
#define LM_GGML_COMMON_AGGR_U data
|
21
|
+
// std-c++ do not allow it.
|
22
|
+
#define LM_GGML_COMMON_AGGR_S data
|
10
23
|
|
11
24
|
#define LM_GGML_COMMON_DECL
|
12
25
|
#elif defined(LM_GGML_COMMON_DECL_METAL)
|
@@ -15,7 +28,8 @@ typedef uint32_t lm_ggml_half2;
|
|
15
28
|
typedef half lm_ggml_half;
|
16
29
|
typedef half2 lm_ggml_half2;
|
17
30
|
|
18
|
-
#define
|
31
|
+
#define LM_GGML_COMMON_AGGR_U
|
32
|
+
#define LM_GGML_COMMON_AGGR_S
|
19
33
|
|
20
34
|
#define LM_GGML_COMMON_DECL
|
21
35
|
#elif defined(LM_GGML_COMMON_DECL_CUDA)
|
@@ -29,7 +43,8 @@ typedef half2 lm_ggml_half2;
|
|
29
43
|
typedef half lm_ggml_half;
|
30
44
|
typedef half2 lm_ggml_half2;
|
31
45
|
|
32
|
-
#define
|
46
|
+
#define LM_GGML_COMMON_AGGR_U
|
47
|
+
#define LM_GGML_COMMON_AGGR_S data
|
33
48
|
|
34
49
|
#define LM_GGML_COMMON_DECL
|
35
50
|
#elif defined(LM_GGML_COMMON_DECL_HIP)
|
@@ -39,7 +54,8 @@ typedef half2 lm_ggml_half2;
|
|
39
54
|
typedef half lm_ggml_half;
|
40
55
|
typedef half2 lm_ggml_half2;
|
41
56
|
|
42
|
-
#define
|
57
|
+
#define LM_GGML_COMMON_AGGR_U
|
58
|
+
#define LM_GGML_COMMON_AGGR_S data
|
43
59
|
|
44
60
|
#define LM_GGML_COMMON_DECL
|
45
61
|
#elif defined(LM_GGML_COMMON_DECL_SYCL)
|
@@ -49,7 +65,8 @@ typedef half2 lm_ggml_half2;
|
|
49
65
|
typedef sycl::half lm_ggml_half;
|
50
66
|
typedef sycl::half2 lm_ggml_half2;
|
51
67
|
|
52
|
-
#define
|
68
|
+
#define LM_GGML_COMMON_AGGR_U
|
69
|
+
#define LM_GGML_COMMON_AGGR_S data
|
53
70
|
|
54
71
|
#define LM_GGML_COMMON_DECL
|
55
72
|
#endif
|
@@ -154,9 +171,9 @@ typedef struct {
|
|
154
171
|
struct {
|
155
172
|
lm_ggml_half d; // delta
|
156
173
|
lm_ggml_half m; // min
|
157
|
-
}
|
174
|
+
} LM_GGML_COMMON_AGGR_S;
|
158
175
|
lm_ggml_half2 dm;
|
159
|
-
};
|
176
|
+
} LM_GGML_COMMON_AGGR_U;
|
160
177
|
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
161
178
|
} block_q4_1;
|
162
179
|
static_assert(sizeof(block_q4_1) == 2 * sizeof(lm_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
@@ -175,9 +192,9 @@ typedef struct {
|
|
175
192
|
struct {
|
176
193
|
lm_ggml_half d; // delta
|
177
194
|
lm_ggml_half m; // min
|
178
|
-
}
|
195
|
+
} LM_GGML_COMMON_AGGR_S;
|
179
196
|
lm_ggml_half2 dm;
|
180
|
-
};
|
197
|
+
} LM_GGML_COMMON_AGGR_U;
|
181
198
|
uint8_t qh[4]; // 5-th bit of quants
|
182
199
|
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
183
200
|
} block_q5_1;
|
@@ -196,37 +213,13 @@ typedef struct {
|
|
196
213
|
struct {
|
197
214
|
lm_ggml_half d; // delta
|
198
215
|
lm_ggml_half s; // d * sum(qs[i])
|
199
|
-
}
|
216
|
+
} LM_GGML_COMMON_AGGR_S;
|
200
217
|
lm_ggml_half2 ds;
|
201
|
-
};
|
218
|
+
} LM_GGML_COMMON_AGGR_U;
|
202
219
|
int8_t qs[QK8_1]; // quants
|
203
220
|
} block_q8_1;
|
204
221
|
static_assert(sizeof(block_q8_1) == 2*sizeof(lm_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
|
205
222
|
|
206
|
-
typedef struct {
|
207
|
-
lm_ggml_half d[4]; // deltas for 4 q4_0 blocks
|
208
|
-
uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
|
209
|
-
} block_q4_0x4;
|
210
|
-
static_assert(sizeof(block_q4_0x4) == 4 * sizeof(lm_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
|
211
|
-
|
212
|
-
typedef struct {
|
213
|
-
lm_ggml_half d[8]; // deltas for 8 q4_0 blocks
|
214
|
-
uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
|
215
|
-
} block_q4_0x8;
|
216
|
-
static_assert(sizeof(block_q4_0x8) == 8 * sizeof(lm_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
|
217
|
-
|
218
|
-
typedef struct {
|
219
|
-
lm_ggml_half d[4]; // deltas for 4 q8_0 blocks
|
220
|
-
int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
|
221
|
-
} block_q8_0x4;
|
222
|
-
static_assert(sizeof(block_q8_0x4) == 4 * sizeof(lm_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
|
223
|
-
|
224
|
-
typedef struct {
|
225
|
-
lm_ggml_half d[8]; // deltas for 8 q8_0 blocks
|
226
|
-
int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
|
227
|
-
} block_q8_0x8;
|
228
|
-
static_assert(sizeof(block_q8_0x8) == 8 * sizeof(lm_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
|
229
|
-
|
230
223
|
//
|
231
224
|
// Ternary quantization
|
232
225
|
//
|
@@ -261,9 +254,9 @@ typedef struct {
|
|
261
254
|
struct {
|
262
255
|
lm_ggml_half d; // super-block scale for quantized scales
|
263
256
|
lm_ggml_half dmin; // super-block scale for quantized mins
|
264
|
-
}
|
257
|
+
} LM_GGML_COMMON_AGGR_S;
|
265
258
|
lm_ggml_half2 dm;
|
266
|
-
};
|
259
|
+
} LM_GGML_COMMON_AGGR_U;
|
267
260
|
} block_q2_K;
|
268
261
|
static_assert(sizeof(block_q2_K) == 2*sizeof(lm_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
269
262
|
|
@@ -288,9 +281,9 @@ typedef struct {
|
|
288
281
|
struct {
|
289
282
|
lm_ggml_half d; // super-block scale for quantized scales
|
290
283
|
lm_ggml_half dmin; // super-block scale for quantized mins
|
291
|
-
}
|
284
|
+
} LM_GGML_COMMON_AGGR_S;
|
292
285
|
lm_ggml_half2 dm;
|
293
|
-
};
|
286
|
+
} LM_GGML_COMMON_AGGR_U;
|
294
287
|
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
295
288
|
uint8_t qs[QK_K/2]; // 4--bit quants
|
296
289
|
} block_q4_K;
|
@@ -305,9 +298,9 @@ typedef struct {
|
|
305
298
|
struct {
|
306
299
|
lm_ggml_half d; // super-block scale for quantized scales
|
307
300
|
lm_ggml_half dmin; // super-block scale for quantized mins
|
308
|
-
}
|
301
|
+
} LM_GGML_COMMON_AGGR_S;
|
309
302
|
lm_ggml_half2 dm;
|
310
|
-
};
|
303
|
+
} LM_GGML_COMMON_AGGR_U;
|
311
304
|
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
312
305
|
uint8_t qh[QK_K/8]; // quants, high bit
|
313
306
|
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
@@ -418,12 +411,6 @@ typedef struct {
|
|
418
411
|
} block_iq4_xs;
|
419
412
|
static_assert(sizeof(block_iq4_xs) == sizeof(lm_ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
420
413
|
|
421
|
-
typedef struct {
|
422
|
-
lm_ggml_half d[4]; // deltas for 4 iq4_nl blocks
|
423
|
-
uint8_t qs[QK4_NL * 2];// nibbles / quants for 4 iq4_nl blocks
|
424
|
-
} block_iq4_nlx4;
|
425
|
-
static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
|
426
|
-
|
427
414
|
#endif // LM_GGML_COMMON_DECL
|
428
415
|
#endif // LM_GGML_COMMON_DECL
|
429
416
|
|
@@ -437,6 +424,13 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "
|
|
437
424
|
#define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
438
425
|
#define LM_GGML_TABLE_END() };
|
439
426
|
|
427
|
+
#define LM_GGML_COMMON_IMPL
|
428
|
+
#elif defined(LM_GGML_COMMON_IMPL_CPP)
|
429
|
+
#include <cstdint>
|
430
|
+
|
431
|
+
#define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
432
|
+
#define LM_GGML_TABLE_END() };
|
433
|
+
|
440
434
|
#define LM_GGML_COMMON_IMPL
|
441
435
|
#elif defined(LM_GGML_COMMON_IMPL_METAL)
|
442
436
|
#include <metal_stdlib>
|
@@ -479,7 +473,7 @@ LM_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
|
|
479
473
|
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
480
474
|
LM_GGML_TABLE_END()
|
481
475
|
|
482
|
-
//#if __CUDA_ARCH__ >=
|
476
|
+
//#if __CUDA_ARCH__ >= LM_GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
|
483
477
|
LM_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
484
478
|
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
485
479
|
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|