cui-llama.rn 1.3.3 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/android/src/main/CMakeLists.txt +5 -7
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
  3. package/android/src/main/jni.cpp +9 -9
  4. package/cpp/common.cpp +21 -40
  5. package/cpp/common.h +21 -12
  6. package/cpp/ggml-backend-impl.h +38 -20
  7. package/cpp/ggml-backend-reg.cpp +216 -87
  8. package/cpp/ggml-backend.h +1 -0
  9. package/cpp/ggml-common.h +42 -48
  10. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +591 -152
  11. package/cpp/ggml-cpu-aarch64.h +2 -26
  12. package/cpp/ggml-cpu-traits.cpp +36 -0
  13. package/cpp/ggml-cpu-traits.h +38 -0
  14. package/cpp/ggml-cpu.c +14122 -13971
  15. package/cpp/ggml-cpu.cpp +618 -715
  16. package/cpp/ggml-cpu.h +0 -17
  17. package/cpp/ggml-impl.h +6 -6
  18. package/cpp/ggml-metal.m +482 -24
  19. package/cpp/ggml-quants.c +0 -9
  20. package/cpp/ggml-threading.h +4 -2
  21. package/cpp/ggml.c +132 -43
  22. package/cpp/ggml.h +44 -13
  23. package/cpp/llama-sampling.cpp +35 -90
  24. package/cpp/llama-vocab.cpp +2 -1
  25. package/cpp/llama.cpp +737 -233
  26. package/cpp/llama.h +20 -16
  27. package/cpp/sampling.cpp +11 -16
  28. package/cpp/speculative.cpp +4 -0
  29. package/cpp/unicode.cpp +51 -51
  30. package/cpp/unicode.h +9 -10
  31. package/lib/commonjs/index.js +38 -1
  32. package/lib/commonjs/index.js.map +1 -1
  33. package/lib/module/index.js +36 -0
  34. package/lib/module/index.js.map +1 -1
  35. package/lib/typescript/NativeRNLlama.d.ts +2 -3
  36. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  37. package/lib/typescript/index.d.ts +36 -2
  38. package/lib/typescript/index.d.ts.map +1 -1
  39. package/package.json +1 -1
  40. package/src/NativeRNLlama.ts +3 -3
  41. package/src/index.ts +46 -2
  42. package/cpp/amx/amx.cpp +0 -196
  43. package/cpp/amx/amx.h +0 -20
  44. package/cpp/amx/common.h +0 -101
  45. package/cpp/amx/mmq.cpp +0 -2524
  46. package/cpp/amx/mmq.h +0 -16
  47. package/cpp/ggml-aarch64.c +0 -129
  48. package/cpp/ggml-aarch64.h +0 -19
@@ -2,8 +2,13 @@
2
2
  #include "ggml-backend.h"
3
3
  #include "ggml-impl.h"
4
4
  #include <algorithm>
5
+ #include <codecvt>
5
6
  #include <cstring>
7
+ #include <filesystem>
8
+ #include <locale>
9
+ #include <memory>
6
10
  #include <string>
11
+ #include <type_traits>
7
12
  #include <vector>
8
13
 
9
14
  #ifdef _WIN32
@@ -41,6 +46,10 @@
41
46
  #include "ggml-vulkan.h"
42
47
  #endif
43
48
 
49
+ #ifdef LM_GGML_USE_OPENCL
50
+ #include "ggml-opencl.h"
51
+ #endif
52
+
44
53
  #ifdef LM_GGML_USE_BLAS
45
54
  #include "ggml-blas.h"
46
55
  #endif
@@ -57,9 +66,71 @@
57
66
  #include "ggml-kompute.h"
58
67
  #endif
59
68
 
69
+ #ifdef _WIN32
70
+
71
+ using dl_handle = std::remove_pointer_t<HMODULE>;
72
+
73
+ struct dl_handle_deleter {
74
+ void operator()(HMODULE handle) {
75
+ FreeLibrary(handle);
76
+ }
77
+ };
78
+
79
+ static dl_handle * dl_load_library(const std::wstring & path) {
80
+ // suppress error dialogs for missing DLLs
81
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
82
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
83
+
84
+ HMODULE handle = LoadLibraryW(path.c_str());
85
+
86
+ SetErrorMode(old_mode);
87
+
88
+ return handle;
89
+ }
90
+
91
+ static dl_handle * dl_load_library(const std::string & path) {
92
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
93
+ return dl_load_library(converter.from_bytes(path));
94
+ }
95
+
96
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
97
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
98
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
99
+
100
+ void * p = (void *) GetProcAddress(handle, name);
101
+
102
+ SetErrorMode(old_mode);
103
+
104
+ return p;
105
+ }
106
+
107
+ #else
108
+
109
+ using dl_handle = void;
110
+
111
+ struct dl_handle_deleter {
112
+ void operator()(void * handle) {
113
+ dlclose(handle);
114
+ }
115
+ };
116
+
117
+ static void * dl_load_library(const std::string & path) {
118
+ dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
119
+
120
+ return handle;
121
+ }
122
+
123
+ static void * dl_get_sym(dl_handle * handle, const char * name) {
124
+ return dlsym(handle, name);
125
+ }
126
+
127
+ #endif
128
+
129
+ using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
130
+
60
131
  struct lm_ggml_backend_reg_entry {
61
132
  lm_ggml_backend_reg_t reg;
62
- void * handle;
133
+ dl_handle_ptr handle;
63
134
  };
64
135
 
65
136
  struct lm_ggml_backend_registry {
@@ -79,6 +150,9 @@ struct lm_ggml_backend_registry {
79
150
  #ifdef LM_GGML_USE_VULKAN
80
151
  register_backend(lm_ggml_backend_vk_reg());
81
152
  #endif
153
+ #ifdef LM_GGML_USE_OPENCL
154
+ register_backend(lm_ggml_backend_opencl_reg());
155
+ #endif
82
156
  #ifdef LM_GGML_USE_CANN
83
157
  register_backend(lm_ggml_backend_cann_reg());
84
158
  #endif
@@ -97,13 +171,16 @@ struct lm_ggml_backend_registry {
97
171
  }
98
172
 
99
173
  ~lm_ggml_backend_registry() {
100
- while (!backends.empty()) {
101
- // use silent since the log system may have been destroyed at this point
102
- unload_backend(backends.back().reg, true);
174
+ // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
175
+ // since backend threads may still be running and accessing resources from the dynamic library
176
+ for (auto & entry : backends) {
177
+ if (entry.handle) {
178
+ entry.handle.release(); // NOLINT
179
+ }
103
180
  }
104
181
  }
105
182
 
106
- void register_backend(lm_ggml_backend_reg_t reg, void * handle = nullptr) {
183
+ void register_backend(lm_ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
107
184
  if (!reg) {
108
185
  return;
109
186
  }
@@ -112,7 +189,7 @@ struct lm_ggml_backend_registry {
112
189
  LM_GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
113
190
  __func__, lm_ggml_backend_reg_name(reg), lm_ggml_backend_reg_dev_count(reg));
114
191
  #endif
115
- backends.push_back({ reg, handle });
192
+ backends.push_back({ reg, std::move(handle) });
116
193
  for (size_t i = 0; i < lm_ggml_backend_reg_dev_count(reg); i++) {
117
194
  register_device(lm_ggml_backend_reg_dev_get(reg, i));
118
195
  }
@@ -126,79 +203,53 @@ struct lm_ggml_backend_registry {
126
203
  }
127
204
 
128
205
  lm_ggml_backend_reg_t load_backend(const char * path, bool silent) {
129
- #ifdef _WIN32
130
- // suppress error dialogs for missing DLLs
131
- DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
132
- SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
133
-
134
- HMODULE handle = LoadLibraryA(path);
135
-
206
+ dl_handle_ptr handle { dl_load_library(path) };
136
207
  if (!handle) {
137
208
  if (!silent) {
138
- LM_GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
139
- }
140
- SetErrorMode(old_mode);
141
- return nullptr;
142
- }
143
-
144
- lm_ggml_backend_init_t backend_init = (lm_ggml_backend_init_t) GetProcAddress(handle, "lm_ggml_backend_init");
145
-
146
- SetErrorMode(old_mode);
147
-
148
- if (!backend_init) {
149
- if (!silent) {
150
- LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
209
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
151
210
  }
152
- FreeLibrary(handle);
153
211
  return nullptr;
154
212
  }
155
- #else
156
- void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
157
213
 
158
- if (!handle) {
214
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
215
+ if (score_fn && score_fn() == 0) {
159
216
  if (!silent) {
160
- LM_GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
217
+ LM_GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
161
218
  }
162
219
  return nullptr;
163
220
  }
164
221
 
165
- auto * backend_init = (lm_ggml_backend_init_t) dlsym(handle, "lm_ggml_backend_init");
166
-
167
- if (!backend_init) {
222
+ auto backend_init_fn = (lm_ggml_backend_init_t) dl_get_sym(handle.get(), "lm_ggml_backend_init");
223
+ if (!backend_init_fn) {
168
224
  if (!silent) {
169
- LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s: %s\n", __func__, path, dlerror());
225
+ LM_GGML_LOG_ERROR("%s: failed to find lm_ggml_backend_init in %s\n", __func__, path);
170
226
  }
171
- dlclose(handle);
172
227
  return nullptr;
173
228
  }
174
- #endif
175
- lm_ggml_backend_reg_t reg = backend_init();
176
229
 
230
+ lm_ggml_backend_reg_t reg = backend_init_fn();
177
231
  if (!reg || reg->api_version != LM_GGML_BACKEND_API_VERSION) {
178
232
  if (!silent) {
179
233
  if (!reg) {
180
234
  LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: lm_ggml_backend_init returned NULL\n", __func__, path);
181
235
  } else {
182
236
  LM_GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
183
- __func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
237
+ __func__, path, reg->api_version, LM_GGML_BACKEND_API_VERSION);
184
238
  }
185
239
  }
186
- #ifdef _WIN32
187
- FreeLibrary(handle);
188
- #else
189
- dlclose(handle);
190
- #endif
191
240
  return nullptr;
192
241
  }
193
242
 
194
243
  LM_GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, lm_ggml_backend_reg_name(reg), path);
195
- register_backend(reg, handle);
244
+
245
+ register_backend(reg, std::move(handle));
246
+
196
247
  return reg;
197
248
  }
198
249
 
199
250
  void unload_backend(lm_ggml_backend_reg_t reg, bool silent) {
200
251
  auto it = std::find_if(backends.begin(), backends.end(),
201
- [reg](lm_ggml_backend_reg_entry entry) { return entry.reg == reg; });
252
+ [reg](const lm_ggml_backend_reg_entry & entry) { return entry.reg == reg; });
202
253
 
203
254
  if (it == backends.end()) {
204
255
  if (!silent) {
@@ -217,15 +268,6 @@ struct lm_ggml_backend_registry {
217
268
  [reg](lm_ggml_backend_dev_t dev) { return lm_ggml_backend_dev_backend_reg(dev) == reg; }),
218
269
  devices.end());
219
270
 
220
- // unload library
221
- if (it->handle) {
222
- #ifdef _WIN32
223
- FreeLibrary((HMODULE) it->handle);
224
- #else
225
- dlclose(it->handle);
226
- #endif
227
- }
228
-
229
271
  // remove backend
230
272
  backends.erase(it);
231
273
  }
@@ -341,12 +383,7 @@ void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg) {
341
383
  get_reg().unload_backend(reg, true);
342
384
  }
343
385
 
344
- void lm_ggml_backend_load_all() {
345
- std::vector<std::string> search_prefix;
346
-
347
- // add the executable directory to the search path
348
- // FIXME: this is convenient for development, but it should probably be disabled in production
349
-
386
+ static std::string get_executable_path() {
350
387
  #if defined(__APPLE__)
351
388
  // get executable path
352
389
  std::vector<char> path;
@@ -364,7 +401,7 @@ void lm_ggml_backend_load_all() {
364
401
  if (last_slash != std::string::npos) {
365
402
  base_path = base_path.substr(0, last_slash);
366
403
  }
367
- search_prefix.push_back(base_path + "/");
404
+ return base_path + "/";
368
405
  #elif defined(__linux__)
369
406
  std::string base_path = ".";
370
407
  std::vector<char> path(1024);
@@ -386,38 +423,130 @@ void lm_ggml_backend_load_all() {
386
423
  path.resize(path.size() * 2);
387
424
  }
388
425
 
389
- search_prefix.push_back(base_path + "/");
426
+ return base_path + "/";
427
+ #elif defined(_WIN32)
428
+ std::vector<char> path(MAX_PATH);
429
+ DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
430
+ if (len == 0) {
431
+ return "";
432
+ }
433
+ std::string base_path(path.data(), len);
434
+ // remove executable name
435
+ auto last_slash = base_path.find_last_of('\\');
436
+ if (last_slash != std::string::npos) {
437
+ base_path = base_path.substr(0, last_slash);
438
+ }
439
+ return base_path + "\\";
390
440
  #endif
441
+ }
391
442
 
392
- auto & reg = get_reg();
443
+ static std::string backend_filename_prefix() {
444
+ #ifdef _WIN32
445
+ return "ggml-";
446
+ #else
447
+ return "libggml-";
448
+ #endif
449
+ }
393
450
 
394
- auto try_load = [&](const std::string & name) {
395
- std::string os_name;
451
+ static std::string backend_filename_suffix() {
396
452
  #ifdef _WIN32
397
- os_name = "ggml-" + name + ".dll";
453
+ return ".dll";
398
454
  #else
399
- os_name = "libggml-" + name + ".so";
455
+ return ".so";
400
456
  #endif
401
- if (reg.load_backend(os_name.c_str(), true)) {
402
- return;
457
+ }
458
+
459
+ static lm_ggml_backend_reg_t lm_ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
460
+ // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
461
+ // TODO: search system paths
462
+ std::string file_prefix = backend_filename_prefix() + name + "-";
463
+ std::vector<std::string> search_paths;
464
+ if (user_search_path == nullptr) {
465
+ search_paths.push_back("./");
466
+ search_paths.push_back(get_executable_path());
467
+ } else {
468
+ #if defined(_WIN32)
469
+ search_paths.push_back(std::string(user_search_path) + "\\");
470
+ #else
471
+ search_paths.push_back(std::string(user_search_path) + "/");
472
+ #endif
473
+ }
474
+
475
+ int best_score = 0;
476
+ std::string best_path;
477
+
478
+ namespace fs = std::filesystem;
479
+ for (const auto & search_path : search_paths) {
480
+ if (!fs::exists(search_path)) {
481
+ continue;
403
482
  }
404
- for (const auto & prefix : search_prefix) {
405
- if (reg.load_backend((prefix + os_name).c_str(), true)) {
406
- return;
483
+ fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
484
+ for (const auto & entry : dir_it) {
485
+ if (entry.is_regular_file()) {
486
+ std::string filename = entry.path().filename().string();
487
+ std::string ext = entry.path().extension().string();
488
+ if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
489
+ dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
490
+ if (!handle && !silent) {
491
+ LM_GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
492
+ }
493
+ if (handle) {
494
+ auto score_fn = (lm_ggml_backend_score_t) dl_get_sym(handle.get(), "lm_ggml_backend_score");
495
+ if (score_fn) {
496
+ int s = score_fn();
497
+ #ifndef NDEBUG
498
+ LM_GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
499
+ #endif
500
+ if (s > best_score) {
501
+ best_score = s;
502
+ best_path = entry.path().string();
503
+ }
504
+ } else {
505
+ if (!silent) {
506
+ LM_GGML_LOG_INFO("%s: failed to find lm_ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
507
+ }
508
+ }
509
+ }
510
+ }
511
+ }
512
+ }
513
+ }
514
+
515
+ if (best_score == 0) {
516
+ // try to load the base backend
517
+ for (const auto & search_path : search_paths) {
518
+ std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
519
+ if (fs::exists(path)) {
520
+ return get_reg().load_backend(path.c_str(), silent);
407
521
  }
408
522
  }
409
- };
410
-
411
- try_load("amx");
412
- try_load("blas");
413
- try_load("cann");
414
- try_load("cuda");
415
- try_load("hip");
416
- try_load("kompute");
417
- try_load("metal");
418
- try_load("rpc");
419
- try_load("sycl");
420
- try_load("vulkan");
421
- try_load("musa");
422
- try_load("cpu");
523
+ return nullptr;
524
+ }
525
+
526
+ return get_reg().load_backend(best_path.c_str(), silent);
527
+ }
528
+
529
+ void lm_ggml_backend_load_all() {
530
+ lm_ggml_backend_load_all_from_path(nullptr);
531
+ }
532
+
533
+ void lm_ggml_backend_load_all_from_path(const char * dir_path) {
534
+ #ifdef NDEBUG
535
+ bool silent = true;
536
+ #else
537
+ bool silent = false;
538
+ #endif
539
+
540
+ lm_ggml_backend_load_best("blas", silent, dir_path);
541
+ lm_ggml_backend_load_best("cann", silent, dir_path);
542
+ lm_ggml_backend_load_best("cuda", silent, dir_path);
543
+ lm_ggml_backend_load_best("hip", silent, dir_path);
544
+ lm_ggml_backend_load_best("kompute", silent, dir_path);
545
+ lm_ggml_backend_load_best("metal", silent, dir_path);
546
+ lm_ggml_backend_load_best("rpc", silent, dir_path);
547
+ lm_ggml_backend_load_best("sycl", silent, dir_path);
548
+ lm_ggml_backend_load_best("vulkan", silent, dir_path);
549
+ lm_ggml_backend_load_best("opencl", silent, dir_path);
550
+ lm_ggml_backend_load_best("musa", silent, dir_path);
551
+ lm_ggml_backend_load_best("cpu", silent, dir_path);
423
552
  }
@@ -228,6 +228,7 @@ extern "C" {
228
228
  LM_GGML_API void lm_ggml_backend_unload(lm_ggml_backend_reg_t reg);
229
229
  // Load all known backends from dynamic libraries
230
230
  LM_GGML_API void lm_ggml_backend_load_all(void);
231
+ LM_GGML_API void lm_ggml_backend_load_all_from_path(const char * dir_path);
231
232
 
232
233
  //
233
234
  // Backend scheduler
package/cpp/ggml-common.h CHANGED
@@ -6,7 +6,20 @@
6
6
  typedef uint16_t lm_ggml_half;
7
7
  typedef uint32_t lm_ggml_half2;
8
8
 
9
- #define LM_GGML_COMMON_AGGR
9
+ #define LM_GGML_COMMON_AGGR_U
10
+ #define LM_GGML_COMMON_AGGR_S
11
+
12
+ #define LM_GGML_COMMON_DECL
13
+ #elif defined(LM_GGML_COMMON_DECL_CPP)
14
+ #include <cstdint>
15
+
16
+ typedef uint16_t lm_ggml_half;
17
+ typedef uint32_t lm_ggml_half2;
18
+
19
+ // std-c++ allow anonymous unions but some compiler warn on it
20
+ #define LM_GGML_COMMON_AGGR_U data
21
+ // std-c++ do not allow it.
22
+ #define LM_GGML_COMMON_AGGR_S data
10
23
 
11
24
  #define LM_GGML_COMMON_DECL
12
25
  #elif defined(LM_GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t lm_ggml_half2;
15
28
  typedef half lm_ggml_half;
16
29
  typedef half2 lm_ggml_half2;
17
30
 
18
- #define LM_GGML_COMMON_AGGR
31
+ #define LM_GGML_COMMON_AGGR_U
32
+ #define LM_GGML_COMMON_AGGR_S
19
33
 
20
34
  #define LM_GGML_COMMON_DECL
21
35
  #elif defined(LM_GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 lm_ggml_half2;
29
43
  typedef half lm_ggml_half;
30
44
  typedef half2 lm_ggml_half2;
31
45
 
32
- #define LM_GGML_COMMON_AGGR data
46
+ #define LM_GGML_COMMON_AGGR_U
47
+ #define LM_GGML_COMMON_AGGR_S data
33
48
 
34
49
  #define LM_GGML_COMMON_DECL
35
50
  #elif defined(LM_GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 lm_ggml_half2;
39
54
  typedef half lm_ggml_half;
40
55
  typedef half2 lm_ggml_half2;
41
56
 
42
- #define LM_GGML_COMMON_AGGR data
57
+ #define LM_GGML_COMMON_AGGR_U
58
+ #define LM_GGML_COMMON_AGGR_S data
43
59
 
44
60
  #define LM_GGML_COMMON_DECL
45
61
  #elif defined(LM_GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 lm_ggml_half2;
49
65
  typedef sycl::half lm_ggml_half;
50
66
  typedef sycl::half2 lm_ggml_half2;
51
67
 
52
- #define LM_GGML_COMMON_AGGR data
68
+ #define LM_GGML_COMMON_AGGR_U
69
+ #define LM_GGML_COMMON_AGGR_S data
53
70
 
54
71
  #define LM_GGML_COMMON_DECL
55
72
  #endif
@@ -154,9 +171,9 @@ typedef struct {
154
171
  struct {
155
172
  lm_ggml_half d; // delta
156
173
  lm_ggml_half m; // min
157
- } LM_GGML_COMMON_AGGR;
174
+ } LM_GGML_COMMON_AGGR_S;
158
175
  lm_ggml_half2 dm;
159
- };
176
+ } LM_GGML_COMMON_AGGR_U;
160
177
  uint8_t qs[QK4_1 / 2]; // nibbles / quants
161
178
  } block_q4_1;
162
179
  static_assert(sizeof(block_q4_1) == 2 * sizeof(lm_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
175
192
  struct {
176
193
  lm_ggml_half d; // delta
177
194
  lm_ggml_half m; // min
178
- } LM_GGML_COMMON_AGGR;
195
+ } LM_GGML_COMMON_AGGR_S;
179
196
  lm_ggml_half2 dm;
180
- };
197
+ } LM_GGML_COMMON_AGGR_U;
181
198
  uint8_t qh[4]; // 5-th bit of quants
182
199
  uint8_t qs[QK5_1 / 2]; // nibbles / quants
183
200
  } block_q5_1;
@@ -196,37 +213,13 @@ typedef struct {
196
213
  struct {
197
214
  lm_ggml_half d; // delta
198
215
  lm_ggml_half s; // d * sum(qs[i])
199
- } LM_GGML_COMMON_AGGR;
216
+ } LM_GGML_COMMON_AGGR_S;
200
217
  lm_ggml_half2 ds;
201
- };
218
+ } LM_GGML_COMMON_AGGR_U;
202
219
  int8_t qs[QK8_1]; // quants
203
220
  } block_q8_1;
204
221
  static_assert(sizeof(block_q8_1) == 2*sizeof(lm_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
205
222
 
206
- typedef struct {
207
- lm_ggml_half d[4]; // deltas for 4 q4_0 blocks
208
- uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
209
- } block_q4_0x4;
210
- static_assert(sizeof(block_q4_0x4) == 4 * sizeof(lm_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
211
-
212
- typedef struct {
213
- lm_ggml_half d[8]; // deltas for 8 q4_0 blocks
214
- uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
215
- } block_q4_0x8;
216
- static_assert(sizeof(block_q4_0x8) == 8 * sizeof(lm_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
217
-
218
- typedef struct {
219
- lm_ggml_half d[4]; // deltas for 4 q8_0 blocks
220
- int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
221
- } block_q8_0x4;
222
- static_assert(sizeof(block_q8_0x4) == 4 * sizeof(lm_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
223
-
224
- typedef struct {
225
- lm_ggml_half d[8]; // deltas for 8 q8_0 blocks
226
- int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
227
- } block_q8_0x8;
228
- static_assert(sizeof(block_q8_0x8) == 8 * sizeof(lm_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229
-
230
223
  //
231
224
  // Ternary quantization
232
225
  //
@@ -261,9 +254,9 @@ typedef struct {
261
254
  struct {
262
255
  lm_ggml_half d; // super-block scale for quantized scales
263
256
  lm_ggml_half dmin; // super-block scale for quantized mins
264
- } LM_GGML_COMMON_AGGR;
257
+ } LM_GGML_COMMON_AGGR_S;
265
258
  lm_ggml_half2 dm;
266
- };
259
+ } LM_GGML_COMMON_AGGR_U;
267
260
  } block_q2_K;
268
261
  static_assert(sizeof(block_q2_K) == 2*sizeof(lm_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
269
262
 
@@ -288,9 +281,9 @@ typedef struct {
288
281
  struct {
289
282
  lm_ggml_half d; // super-block scale for quantized scales
290
283
  lm_ggml_half dmin; // super-block scale for quantized mins
291
- } LM_GGML_COMMON_AGGR;
284
+ } LM_GGML_COMMON_AGGR_S;
292
285
  lm_ggml_half2 dm;
293
- };
286
+ } LM_GGML_COMMON_AGGR_U;
294
287
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295
288
  uint8_t qs[QK_K/2]; // 4--bit quants
296
289
  } block_q4_K;
@@ -305,9 +298,9 @@ typedef struct {
305
298
  struct {
306
299
  lm_ggml_half d; // super-block scale for quantized scales
307
300
  lm_ggml_half dmin; // super-block scale for quantized mins
308
- } LM_GGML_COMMON_AGGR;
301
+ } LM_GGML_COMMON_AGGR_S;
309
302
  lm_ggml_half2 dm;
310
- };
303
+ } LM_GGML_COMMON_AGGR_U;
311
304
  uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312
305
  uint8_t qh[QK_K/8]; // quants, high bit
313
306
  uint8_t qs[QK_K/2]; // quants, low 4 bits
@@ -418,12 +411,6 @@ typedef struct {
418
411
  } block_iq4_xs;
419
412
  static_assert(sizeof(block_iq4_xs) == sizeof(lm_ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
420
413
 
421
- typedef struct {
422
- lm_ggml_half d[4]; // deltas for 4 iq4_nl blocks
423
- uint8_t qs[QK4_NL * 2];// nibbles / quants for 4 iq4_nl blocks
424
- } block_iq4_nlx4;
425
- static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
426
-
427
414
  #endif // LM_GGML_COMMON_DECL
428
415
  #endif // LM_GGML_COMMON_DECL
429
416
 
@@ -437,6 +424,13 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(lm_ggml_half) + QK4_NL * 2, "
437
424
  #define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
438
425
  #define LM_GGML_TABLE_END() };
439
426
 
427
+ #define LM_GGML_COMMON_IMPL
428
+ #elif defined(LM_GGML_COMMON_IMPL_CPP)
429
+ #include <cstdint>
430
+
431
+ #define LM_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
432
+ #define LM_GGML_TABLE_END() };
433
+
440
434
  #define LM_GGML_COMMON_IMPL
441
435
  #elif defined(LM_GGML_COMMON_IMPL_METAL)
442
436
  #include <metal_stdlib>
@@ -479,7 +473,7 @@ LM_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
479
473
  240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
480
474
  LM_GGML_TABLE_END()
481
475
 
482
- //#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
476
+ //#if __CUDA_ARCH__ >= LM_GGML_CUDA_CC_DP4A // lowest compute capability for integer intrinsics
483
477
  LM_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
484
478
  0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
485
479
  0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,