cui-llama.rn 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/android/src/main/CMakeLists.txt +5 -7
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
  3. package/android/src/main/jni.cpp +9 -9
  4. package/cpp/common.cpp +28 -44
  5. package/cpp/common.h +35 -14
  6. package/cpp/ggml-alloc.c +0 -1
  7. package/cpp/ggml-backend-impl.h +38 -20
  8. package/cpp/ggml-backend-reg.cpp +246 -92
  9. package/cpp/ggml-backend.h +1 -0
  10. package/cpp/ggml-common.h +42 -48
  11. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +642 -223
  12. package/cpp/ggml-cpu-aarch64.h +2 -26
  13. package/cpp/ggml-cpu-traits.cpp +36 -0
  14. package/cpp/ggml-cpu-traits.h +38 -0
  15. package/cpp/ggml-cpu.c +14122 -13971
  16. package/cpp/ggml-cpu.cpp +627 -715
  17. package/cpp/ggml-cpu.h +0 -17
  18. package/cpp/ggml-impl.h +22 -6
  19. package/cpp/ggml-metal.m +482 -24
  20. package/cpp/ggml-quants.c +0 -9
  21. package/cpp/ggml-threading.h +4 -2
  22. package/cpp/ggml.c +284 -178
  23. package/cpp/ggml.h +73 -25
  24. package/cpp/llama-grammar.cpp +15 -15
  25. package/cpp/llama-grammar.h +2 -5
  26. package/cpp/llama-sampling.cpp +35 -90
  27. package/cpp/llama-vocab.cpp +7 -2
  28. package/cpp/llama-vocab.h +1 -1
  29. package/cpp/llama.cpp +1782 -586
  30. package/cpp/llama.h +20 -19
  31. package/cpp/sampling.cpp +11 -16
  32. package/cpp/sgemm.cpp +265 -258
  33. package/cpp/sgemm.h +2 -2
  34. package/cpp/speculative.cpp +4 -0
  35. package/cpp/unicode.cpp +51 -51
  36. package/cpp/unicode.h +9 -10
  37. package/lib/commonjs/index.js +38 -1
  38. package/lib/commonjs/index.js.map +1 -1
  39. package/lib/module/index.js +36 -0
  40. package/lib/module/index.js.map +1 -1
  41. package/lib/typescript/NativeRNLlama.d.ts +2 -3
  42. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  43. package/lib/typescript/index.d.ts +36 -2
  44. package/lib/typescript/index.d.ts.map +1 -1
  45. package/package.json +1 -1
  46. package/src/NativeRNLlama.ts +3 -3
  47. package/src/index.ts +46 -2
  48. package/cpp/amx/amx.cpp +0 -196
  49. package/cpp/amx/amx.h +0 -20
  50. package/cpp/amx/common.h +0 -101
  51. package/cpp/amx/mmq.cpp +0 -2524
  52. package/cpp/amx/mmq.h +0 -16
  53. package/cpp/ggml-aarch64.c +0 -129
  54. package/cpp/ggml-aarch64.h +0 -19
@@ -2,8 +2,13 @@
2
2
  #include "ggml-backend.h"
3
3
  #include "ggml-impl.h"
4
4
  #include <algorithm>
5
+ #include <codecvt>
5
6
  #include <cstring>
7
+ #include <filesystem>
8
+ #include <locale>
9
+ #include <memory>
6
10
  #include <string>
11
+ #include <type_traits>
7
12
  #include <vector>
8
13
 
9
14
  #ifdef _WIN32
@@ -41,6 +46,10 @@
41
46
  #include "ggml-vulkan.h"
42
47
  #endif
43
48
 
49
+ #ifdef LM_GGML_USE_OPENCL
50
+ #include "ggml-opencl.h"
51
+ #endif
52
+
44
53
  #ifdef LM_GGML_USE_BLAS
45
54
  #include "ggml-blas.h"
46
55
  #endif
@@ -57,9 +66,86 @@
57
66
  #include "ggml-kompute.h"
58
67
  #endif
59
68
 
69
+ // disable C++17 deprecation warning for std::codecvt_utf8
70
+ #if defined(__clang__)
71
+ # pragma clang diagnostic push
72
+ # pragma clang diagnostic ignored "-Wdeprecated-declarations"
73
+ #endif
74
+
75
+ static std::wstring utf8_to_utf16(const std::string & str) {
76
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
77
+ return converter.from_bytes(str);
78
+ }
79
+
80
+ static std::string utf16_to_utf8(const std::wstring & str) {
81
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
82
+ return converter.to_bytes(str);
83
+ }
84
+
85
+ #if defined(__clang__)
86
+ # pragma clang diagnostic pop
87
+ #endif
88
+
89
+ #ifdef _WIN32
90
+
91
+ using dl_handle = std::remove_pointer_t<HMODULE>;
92
+
93
+ struct dl_handle_deleter {
94
+ void operator()(HMODULE handle) {
95
+ FreeLibrary(handle);
96
+ }
97
+ };
98
+
99
+ static dl_handle * dl_load_library(const std::wstring & path) {
100
+ // suppress error dialogs for missing DLLs
101
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
102
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
103
+
104
+ HMODULE handle = LoadLibraryW(path.c_str());
105
+
106
+ SetErrorMode(old_mode);
107
+
108
+ return handle;
109
+ }
110
+
111
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
112
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
113
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
114
+
115
+ void * p = (void *) GetProcAddress(handle, name);
116
+
117
+ SetErrorMode(old_mode);
118
+
119
+ return p;
120
+ }
121
+
122
+ #else
123
+
124
+ using dl_handle = void;
125
+
126
+ struct dl_handle_deleter {
127
+ void operator()(void * handle) {
128
+ dlclose(handle);
129
+ }
130
+ };
131
+
132
+ static void * dl_load_library(const std::wstring & path) {
133
+ dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
134
+
135
+ return handle;
136
+ }
137
+
138
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
139
+ return dlsym(handle, name);
140
+ }
141
+
142
+ #endif
143
+
144
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
145
+
60
146
  struct lm_ggml_backend_reg_entry {
61
147
  lm_ggml_backend_reg_t reg;
62
- void * handle;
148
+ dl_handle_ptr handle;
63
149
  };
64
150
 
65
151
  struct lm_ggml_backend_registry {
@@ -79,6 +165,9 @@ struct lm_ggml_backend_registry {
79
165
  #ifdef LM_GGML_USE_VULKAN
80
166
  register_backend(lm_ggml_backend_vk_reg());
81
167
  #endif
168
+ #ifdef LM_GGML_USE_OPENCL
169
+ register_backend(lm_ggml_backend_opencl_reg());
170
+ #endif
82
171
  #ifdef LM_GGML_USE_CANN
83
172
  register_backend(lm_ggml_backend_cann_reg());
84
173
  #endif
@@ -97,13 +186,16 @@ struct lm_ggml_backend_registry {
97
186
  }
98
187
 
99
188
  ~lm_ggml_backend_registry() {
100
- while (!backends.empty()) {
101
- // use silent since the log system may have been destroyed at this point
102
- unload_backend(backends.back().reg, true);
189
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
190
+ // since backend threads may still be running and accessing resources from the dynamic library
191
+ for (auto & entry : backends) {
192
+ if (entry.handle) {
193
+ entry.handle.release(); // NOLINT
194
+ }
103
195
  }
104
196
  }
105
197
 
106
- void register_backend(lm_ggml_backend_reg_t reg, void * handle = nullptr) {
198
+ void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
107
199
  if (!reg) {
108
200
  return;
109
201
  }
@@ -112,7 +204,7 @@ struct lm_ggml_backend_registry {
112
204
  LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
113
205
  __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
114
206
  #endif
115
- backends.push_back({ reg, handle });
207
+ backends.push_back({ reg, std::move(handle) });
116
208
  for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
117
209
  register_device(lm_ggml_backend_reg_dev_get(reg, i));
118
210
  }
@@ -125,80 +217,54 @@ struct lm_ggml_backend_registry {
125
217
  devices.push_back(device);
126
218
  }
127
219
 
128
- lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
129
- #ifdef _WIN32
130
- // suppress error dialogs for missing DLLs
131
- DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
132
- SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
133
-
134
- HMODULE handle = LoadLibraryA(path);
135
-
220
+ lm_ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
221
+ dl_handle_ptr handle { dl_load_library(path) };
136
222
  if (!handle) {
137
223
  if (!silent) {
138
- LM_GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
224
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
139
225
  }
140
- SetErrorMode(old_mode);
141
226
  return nullptr;
142
227
  }
143
228
 
144
- lm_ggml_backend_init_t backend_init = (lm_ggml_backend_init_t) GetProcAddress(handle, "lm_ggml_backend_init");
145
-
146
- SetErrorMode(old_mode);
147
-
148
- if (!backend_init) {
229
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
230
+ if (score_fn && score_fn() == 0) {
149
231
  if (!silent) {
150
- LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
232
+ LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
151
233
  }
152
- FreeLibrary(handle);
153
234
  return nullptr;
154
235
  }
155
- #else
156
- void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
157
-
158
- if (!handle) {
159
- if (!silent) {
160
- LM_GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
161
- }
162
- return nullptr;
163
- }
164
-
165
- auto * backend_init = (lm_ggml_backend_init_t) dlsym(handle, "lm_ggml_backend_init");
166
236
 
167
- if (!backend_init) {
237
+ auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
238
+ if (!backend_init_fn) {
168
239
  if (!silent) {
169
- LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %s\n", __func__, path, dlerror());
240
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
170
241
  }
171
- dlclose(handle);
172
242
  return nullptr;
173
243
  }
174
- #endif
175
- lm_ggml_backend_reg_t reg = backend_init();
176
244
 
245
+ lm_ggml_backend_reg_t reg = backend_init_fn();
177
246
  if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
178
247
  if (!silent) {
179
248
  if (!reg) {
180
- LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
249
+ LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
181
250
  } else {
182
251
  LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
183
- __func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
252
+ __func__, utf16_to_utf8(path).c_str(), reg->api_version, LM_GGML_BACKEND_API_VERSION);
184
253
  }
185
254
  }
186
- #ifdef _WIN32
187
- FreeLibrary(handle);
188
- #else
189
- dlclose(handle);
190
- #endif
191
255
  return nullptr;
192
256
  }
193
257
 
194
- LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
195
- register_backend(reg, handle);
258
+ LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
259
+
260
+ register_backend(reg, std::move(handle));
261
+
196
262
  return reg;
197
263
  }
198
264
 
199
265
  void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
200
266
  auto it = std::find_if(backends.begin(), backends.end(),
201
- [reg](lm_ggml_backend_reg_entry entry) { return entry.reg == reg; });
267
+ [reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
202
268
 
203
269
  if (it == backends.end()) {
204
270
  if (!silent) {
@@ -217,15 +283,6 @@ struct lm_ggml_backend_registry {
217
283
  [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
218
284
  devices.end());
219
285
 
220
- // unload library
221
- if (it->handle) {
222
- #ifdef _WIN32
223
- FreeLibrary((HMODULE) it->handle);
224
- #else
225
- dlclose(it->handle);
226
- #endif
227
- }
228
-
229
286
  // remove backend
230
287
  backends.erase(it);
231
288
  }
@@ -334,19 +391,14 @@ lm_ggml_backend_t lm_ggml_backend_init_best(void) {
334
391
 
335
392
  // Dynamic loading
336
393
  lm_ggml_backend_reg_t lm_ggml_backend_load(const char * path) {
337
- return get_reg().load_backend(path, false);
394
+ return get_reg().load_backend(utf8_to_utf16(path), false);
338
395
  }
339
396
 
340
397
  void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
341
398
  get_reg().unload_backend(reg, true);
342
399
  }
343
400
 
344
- void lm_ggml_backend_load_all() {
345
- std::vector<std::string> search_prefix;
346
-
347
- // add the executable directory to the search path
348
- // FIXME: this is convenient for development, but it should probably be disabled in production
349
-
401
+ static std::wstring get_executable_path() {
350
402
  #if defined(__APPLE__)
351
403
  // get executable path
352
404
  std::vector<char> path;
@@ -364,13 +416,17 @@ void lm_ggml_backend_load_all() {
364
416
  if (last_slash != std::string::npos) {
365
417
  base_path = base_path.substr(0, last_slash);
366
418
  }
367
- search_prefix.push_back(base_path + "/");
368
- #elif defined(__linux__)
419
+ return utf8_to_utf16(base_path + "/");
420
+ #elif defined(__linux__) || defined(__FreeBSD__)
369
421
  std::string base_path = ".";
370
422
  std::vector<char> path(1024);
371
423
  while (true) {
372
424
  // get executable path
425
+ # if defined(__linux__)
373
426
  ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
427
+ # elif defined(__FreeBSD__)
428
+ ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
429
+ # endif
374
430
  if (len == -1) {
375
431
  break;
376
432
  }
@@ -386,38 +442,136 @@ void lm_ggml_backend_load_all() {
386
442
  path.resize(path.size() * 2);
387
443
  }
388
444
 
389
- search_prefix.push_back(base_path + "/");
445
+ return utf8_to_utf16(base_path + "/");
446
+ #elif defined(_WIN32)
447
+ std::vector<wchar_t> path(MAX_PATH);
448
+ DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
449
+ if (len == 0) {
450
+ return {};
451
+ }
452
+ std::wstring base_path(path.data(), len);
453
+ // remove executable name
454
+ auto last_slash = base_path.find_last_of('\\');
455
+ if (last_slash != std::string::npos) {
456
+ base_path = base_path.substr(0, last_slash);
457
+ }
458
+ return base_path + L"\\";
459
+ #else
460
+ return {};
390
461
  #endif
462
+ }
391
463
 
392
- auto & reg = get_reg();
464
+ static std::wstring backend_filename_prefix() {
465
+ #ifdef _WIN32
466
+ return L"ggml-";
467
+ #else
468
+ return L"libggml-";
469
+ #endif
470
+ }
393
471
 
394
- auto try_load = [&](const std::string & name) {
395
- std::string os_name;
472
+ static std::wstring backend_filename_suffix() {
396
473
  #ifdef _WIN32
397
- os_name = "ggml-" + name + ".dll";
474
+ return L".dll";
398
475
  #else
399
- os_name = "libggml-" + name + ".so";
476
+ return L".so";
400
477
  #endif
401
- if (reg.load_backend(os_name.c_str(), true)) {
402
- return;
478
+ }
479
+
480
+ static std::wstring path_separator() {
481
+ #ifdef _WIN32
482
+ return L"\\";
483
+ #else
484
+ return L"/";
485
+ #endif
486
+ }
487
+
488
+ static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
489
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
490
+ // TODO: search system paths
491
+ std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
492
+ std::vector<std::wstring> search_paths;
493
+ if (user_search_path == nullptr) {
494
+ search_paths.push_back(L"." + path_separator());
495
+ search_paths.push_back(get_executable_path());
496
+ } else {
497
+ search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
498
+ }
499
+
500
+ int best_score = 0;
501
+ std::wstring best_path;
502
+
503
+ namespace fs = std::filesystem;
504
+ for (const auto & search_path : search_paths) {
505
+ if (!fs::exists(search_path)) {
506
+ continue;
403
507
  }
404
- for (const auto & prefix : search_prefix) {
405
- if (reg.load_backend((prefix + os_name).c_str(), true)) {
406
- return;
508
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
509
+ for (const auto & entry : dir_it) {
510
+ if (entry.is_regular_file()) {
511
+ std::wstring filename = entry.path().filename().wstring();
512
+ std::wstring ext = entry.path().extension().wstring();
513
+ if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
514
+ dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
515
+ if (!handle && !silent) {
516
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
517
+ }
518
+ if (handle) {
519
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
520
+ if (score_fn) {
521
+ int s = score_fn();
522
+ #ifndef NDEBUG
523
+ LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
524
+ #endif
525
+ if (s > best_score) {
526
+ best_score = s;
527
+ best_path = entry.path().wstring();
528
+ }
529
+ } else {
530
+ if (!silent) {
531
+ LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
532
+ }
533
+ }
534
+ }
535
+ }
407
536
  }
408
537
  }
409
- };
410
-
411
- try_load("amx");
412
- try_load("blas");
413
- try_load("cann");
414
- try_load("cuda");
415
- try_load("hip");
416
- try_load("kompute");
417
- try_load("metal");
418
- try_load("rpc");
419
- try_load("sycl");
420
- try_load("vulkan");
421
- try_load("musa");
422
- try_load("cpu");
538
+ }
539
+
540
+ if (best_score == 0) {
541
+ // try to load the base backend
542
+ for (const auto & search_path : search_paths) {
543
+ std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
544
+ if (fs::exists(path)) {
545
+ return get_reg().load_backend(path, silent);
546
+ }
547
+ }
548
+ return nullptr;
549
+ }
550
+
551
+ return get_reg().load_backend(best_path, silent);
552
+ }
553
+
554
+ void lm_ggml_backend_load_all() {
555
+ lm_ggml_backend_load_all_from_path(nullptr);
556
+ }
557
+
558
+ void lm_ggml_backend_load_all_from_path(const char * dir_path) {
559
+ #ifdef NDEBUG
560
+ bool silent = true;
561
+ #else
562
+ bool silent = false;
563
+ #endif
564
+
565
+ lm_ggml_backend_load_best("blas", silent, dir_path);
566
+ lm_ggml_backend_load_best("cann", silent, dir_path);
567
+ lm_ggml_backend_load_best("cuda", silent, dir_path);
568
+ lm_ggml_backend_load_best("hip", silent, dir_path);
569
+ lm_ggml_backend_load_best("kompute", silent, dir_path);
570
+ lm_ggml_backend_load_best("metal", silent, dir_path);
571
+ lm_ggml_backend_load_best("rpc", silent, dir_path);
572
+ lm_ggml_backend_load_best("sycl", silent, dir_path);
573
+ lm_ggml_backend_load_best("vulkan", silent, dir_path);
574
+ lm_ggml_backend_load_best("opencl", silent, dir_path);
575
+ lm_ggml_backend_load_best("musa", silent, dir_path);
576
+ lm_ggml_backend_load_best("cpu", silent, dir_path);
423
577
  }
@@ -228,6 +228,7 @@ extern "C" {
228
228
  LM_GGML_API void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg);
229
229
  // Load all known backends from dynamic libraries
230
230
  LM_GGML_API void lm_ggml_backend_load_all(void);
231
+ LM_GGML_API void lm_ggml_backend_load_all_from_path(const char * dir_path);
231
232
 
232
233
  //
233
234
  // Backend scheduler
package/cpp/ggml-common.h CHANGED
@@ -6,7 +6,20 @@
6
6
  typedef uint16_t lm_ggml_half;
7
7
  typedef uint32_t lm_ggml_half2;
8
8
 
9
- #define LM_GGML_COMMON_AGGR
9
+ #define LM_GGML_COMMON_AGGR_U
10
+ #define LM_GGML_COMMON_AGGR_S
11
+
12
+ #define LM_GGML_COMMON_DECL
13
+ #elif defined(LM_GGML_COMMON_DECL_CPP)
14
+ #include <cstdint>
15
+
16
+ typedef uint16_t lm_ggml_half;
17
+ typedef uint32_t lm_ggml_half2;
18
+
19
+ // std-c++ allow anonymous unions but some compiler warn on it
20
+ #define LM_GGML_COMMON_AGGR_U data
21
+ // std-c++ do not allow it.
22
+ #define LM_GGML_COMMON_AGGR_S data
10
23
 
11
24
  #define LM_GGML_COMMON_DECL
12
25
  #elif defined(LM_GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t lm_ggml_half2;
15
28
  typedef half lm_ggml_half;
16
29
  typedef half2 lm_ggml_half2;
17
30
 
18
- #define LM_GGML_COMMON_AGGR
31
+ #define LM_GGML_COMMON_AGGR_U
32
+ #define LM_GGML_COMMON_AGGR_S
19
33
 
20
34
  #define LM_GGML_COMMON_DECL
21
35
  #elif defined(LM_GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 lm_ggml_half2;
29
43
  typedef half lm_ggml_half;
30
44
  typedef half2 lm_ggml_half2;
31
45
 
32
- #define LM_GGML_COMMON_AGGR data
46
+ #define LM_GGML_COMMON_AGGR_U
47
+ #define LM_GGML_COMMON_AGGR_S data
33
48
 
34
49
  #define LM_GGML_COMMON_DECL
35
50
  #elif defined(LM_GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 lm_ggml_half2;
39
54
  typedef half lm_ggml_half;
40
55
  typedef half2 lm_ggml_half2;
41
56
 
42
- #define LM_GGML_COMMON_AGGR data
57
+ #define LM_GGML_COMMON_AGGR_U
58
+ #define LM_GGML_COMMON_AGGR_S data
43
59
 
44
60
  #define LM_GGML_COMMON_DECL
45
61
  #elif defined(LM_GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 lm_ggml_half2;
49
65
  typedef sycl::half lm_ggml_half;
50
66
  typedef sycl::half2 lm_ggml_half2;
51
67
 
52
- #define LM_GGML_COMMON_AGGR data
68
+ #define LM_GGML_COMMON_AGGR_U
69
+ #define LM_GGML_COMMON_AGGR_S data
53
70
 
54
71
  #define LM_GGML_COMMON_DECL
55
72
  #endif
@@ -154,9 +171,9 @@ typedef struct {
154
171
  struct {
155
172
  lm_ggml_half d; // delta
156
173
  lm_ggml_half m; // min
157
- } LM_GGML_COMMON_AGGR;
174
+ } LM_GGML_COMMON_AGGR_S;
158
175
  lm_ggml_half2 dm;
159
- };
176
+ } LM_GGML_COMMON_AGGR_U;
160
177
  uint8_t qs[QK4_1 / 2]; // nibbles / quants
161
178
  } block_q4_1;
162
179
  static_assert(sizeof(block_q4_1) == 2 * sizeof(lm_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
175
192
  struct {
176
193
  lm_ggml_half d; // delta
177
194
  lm_ggml_half m; // min
178
- } LM_GGML_COMMON_AGGR;
195
+ } LM_GGML_COMMON_AGGR_S;
179
196
  lm_ggml_half2 dm;
180
- };
197
+ } LM_GGML_COMMON_AGGR_U;
181
198
  uint8_t qh[4]; // 5-th bit of quants
182
199
  uint8_t qs[QK5_1 / 2]; // nibbles / quants
183
200
  } block_q5_1;
@@ -196,37 +213,13 @@ typedef struct {
196
213
  struct {
197
214
  lm_ggml_half d; // delta
198
215
  lm_ggml_half s; // d * sum(qs[i])
199
- } LM_GGML_COMMON_AGGR;
216
+ } LM_GGML_COMMON_AGGR_S;
200
217
  lm_ggml_half2 ds;
201
- };
218
+ } LM_GGML_COMMON_AGGR_U;
202
219
  int8_t qs[QK8_1]; // quants
203
220
  } block_q8_1;
204
221
  static_assert(sizeof(block_q8_1) == 2*sizeof(lm_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
205
222
 
206
- typedef struct {
207
- lm_ggml_half d[4]; // deltas for 4 q4_0 blocks
208
- uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
209
- } block_q4_0x4;
210
- static_assert(sizeof(block_q4_0x4) == 4 * sizeof(lm_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
211
-
212
- typedef struct {
213
- lm_ggml_half d[8]; // deltas for 8 q4_0 blocks
214
- uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
215
- } block_q4_0x8;
216
- static_assert(sizeof(block_q4_0x8) == 8 * sizeof(lm_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
217
-
218
- typedef struct {
219
- lm_ggml_half d[4]; // deltas for 4 q8_0 blocks
220
- int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
221
- } block_q8_0x4;
222
- static_assert(sizeof(block_q8_0x4) == 4 * sizeof(lm_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
223
-
224
- typedef struct {
225
- lm_ggml_half d[8]; // deltas for 8 q8_0 blocks
226
- int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
227
- } block_q8_0x8;
228
- static_assert(sizeof(block_q8_0x8) == 8 * sizeof(lm_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229
-
230
223
  //
231
224
  // Ternary quantization
232
225
  //
@@ -261,9 +254,9 @@ typedef struct {
261
254
  struct {
262
255
  lm_ggml_half d; // super-block scale for quantized scales
263
256
  lm_ggml_half dmin; // super-block scale for quantized mins
264
- } LM_GGML_COMMON_AGGR;
257
+ } LM_GGML_COMMON_AGGR_S;
265
258
  lm_ggml_half2 dm;
266
- };
259
+ } LM_GGML_COMMON_AGGR_U;
267
260
  } block_q2_K;
268
261
  static_assert(sizeof(block_q2_K) == 2*sizeof(lm_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
269
262
 
@@ -288,9 +281,9 @@ typedef struct {
288
281
  struct {
289
282
  lm_ggml_half d; // super-block scale for quantized scales
290
283
  lm_ggml_half dmin; // super-block scale for quantized mins
291
- } LM_GGML_COMMON_AGGR;
284
+ } LM_GGML_COMMON_AGGR_S;
292
285
  lm_ggml_half2 dm;
293
- };
286
+ } LM_GGML_COMMON_AGGR_U;
294
287
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295
288
  uint8_t qs[QK_K/2]; // 4--bit quants
296
289
  } block_q4_K;
@@ -305,9 +298,9 @@ typedef struct {
305
298
  struct {
306
299
  lm_ggml_half d; // super-block scale for quantized scales
307
300
  lm_ggml_half dmin; // super-block scale for quantized mins
308
- } LM_GGML_COMMON_AGGR;
301
+ } LM_GGML_COMMON_AGGR_S;
309
302
  lm_ggml_half2 dm;
310
- };
303
+ } LM_GGML_COMMON_AGGR_U;
311
304
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312
305
  uint8_t qh[QK_K/8]; // quants, high bit
313
306
  uint8_t qs[QK_K/2]; // quants, low 4 bits
@@ -418,12 +411,6 @@ typedef struct {
418
411
  } block_iq4_xs;
419
412
  static_assert(sizeof(block_iq4_xs) == sizeof(lm_ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
420
413
 
421
- typedef struct {
422
- lm_ggml_half d[4]; // deltas for 4 iq4_nl blocks
423
- uint8_t qs[QK4_NL * 2];// nibbles / quants for 4 iq4_nl blocks
424
- } block_iq4_nlx4;
425
- static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
426
-
427
414
  #endif // LM_GGML_COMMON_DECL
428
415
  #endif // LM_GGML_COMMON_DECL
429
416
 
@@ -437,6 +424,13 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "
437
424
  #define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
438
425
  #define LM_GGML_TABLE_END() };
439
426
 
427
+ #define LM_GGML_COMMON_IMPL
428
+ #elif defined(LM_GGML_COMMON_IMPL_CPP)
429
+ #include <cstdint>
430
+
431
+ #define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
+ #define LM_GGML_TABLE_END() };
433
+
440
434
  #define LM_GGML_COMMON_IMPL
441
435
  #elif defined(LM_GGML_COMMON_IMPL_METAL)
442
436
  #include <metal_stdlib>
@@ -479,7 +473,7 @@ LM_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
479
473
  240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
480
474
  LM_GGML_TABLE_END()
481
475
 
482
- //#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
476
+ //#if __CUDA_ARCH__ >= LM_GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
483
477
  LM_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
484
478
  0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
485
479
  0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,