cui-llama.rn 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +4 -23
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +13 -7
  4. package/android/src/main/java/com/rnllama/LlamaContext.java +27 -20
  5. package/android/src/main/java/com/rnllama/RNLlama.java +5 -1
  6. package/android/src/main/jni.cpp +15 -12
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/cpp/README.md +1 -1
  16. package/cpp/common.cpp +158 -267
  17. package/cpp/common.h +46 -12
  18. package/cpp/ggml-alloc.c +1042 -1037
  19. package/cpp/ggml-backend-impl.h +255 -256
  20. package/cpp/ggml-backend-reg.cpp +582 -582
  21. package/cpp/ggml-backend.cpp +2002 -2002
  22. package/cpp/ggml-backend.h +354 -352
  23. package/cpp/ggml-common.h +1853 -1853
  24. package/cpp/ggml-cpp.h +39 -39
  25. package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
  26. package/cpp/ggml-cpu-aarch64.h +8 -8
  27. package/cpp/ggml-cpu-impl.h +386 -386
  28. package/cpp/ggml-cpu-quants.c +10920 -10839
  29. package/cpp/ggml-cpu-traits.cpp +36 -36
  30. package/cpp/ggml-cpu-traits.h +38 -38
  31. package/cpp/ggml-cpu.c +329 -60
  32. package/cpp/ggml-cpu.cpp +10 -2
  33. package/cpp/ggml-cpu.h +135 -135
  34. package/cpp/ggml-impl.h +567 -567
  35. package/cpp/ggml-metal-impl.h +17 -17
  36. package/cpp/ggml-metal.m +4884 -4884
  37. package/cpp/ggml-quants.c +5238 -5238
  38. package/cpp/ggml-threading.h +14 -14
  39. package/cpp/ggml.c +6514 -6448
  40. package/cpp/ggml.h +2194 -2163
  41. package/cpp/gguf.cpp +1329 -1325
  42. package/cpp/gguf.h +202 -202
  43. package/cpp/json-schema-to-grammar.cpp +1045 -1045
  44. package/cpp/json-schema-to-grammar.h +8 -8
  45. package/cpp/json.hpp +24766 -24766
  46. package/cpp/llama-adapter.cpp +347 -346
  47. package/cpp/llama-adapter.h +74 -73
  48. package/cpp/llama-arch.cpp +1487 -1434
  49. package/cpp/llama-arch.h +400 -395
  50. package/cpp/llama-batch.cpp +368 -368
  51. package/cpp/llama-batch.h +88 -88
  52. package/cpp/llama-chat.cpp +578 -567
  53. package/cpp/llama-chat.h +52 -51
  54. package/cpp/llama-context.cpp +1775 -1771
  55. package/cpp/llama-context.h +128 -128
  56. package/cpp/llama-cparams.cpp +1 -1
  57. package/cpp/llama-cparams.h +37 -37
  58. package/cpp/llama-cpp.h +30 -30
  59. package/cpp/llama-grammar.cpp +1139 -1139
  60. package/cpp/llama-grammar.h +143 -143
  61. package/cpp/llama-hparams.cpp +71 -71
  62. package/cpp/llama-hparams.h +139 -140
  63. package/cpp/llama-impl.cpp +167 -167
  64. package/cpp/llama-impl.h +61 -61
  65. package/cpp/llama-kv-cache.cpp +718 -718
  66. package/cpp/llama-kv-cache.h +218 -218
  67. package/cpp/llama-mmap.cpp +2 -1
  68. package/cpp/llama-mmap.h +67 -67
  69. package/cpp/llama-model-loader.cpp +1124 -1011
  70. package/cpp/llama-model-loader.h +167 -158
  71. package/cpp/llama-model.cpp +3997 -2202
  72. package/cpp/llama-model.h +370 -391
  73. package/cpp/llama-sampling.cpp +2408 -2406
  74. package/cpp/llama-sampling.h +32 -48
  75. package/cpp/llama-vocab.cpp +3247 -1982
  76. package/cpp/llama-vocab.h +125 -182
  77. package/cpp/llama.cpp +416 -2886
  78. package/cpp/llama.h +1323 -1285
  79. package/cpp/log.cpp +401 -401
  80. package/cpp/log.h +121 -121
  81. package/cpp/rn-llama.cpp +822 -0
  82. package/cpp/rn-llama.h +123 -0
  83. package/cpp/rn-llama.hpp +18 -12
  84. package/cpp/sampling.cpp +505 -500
  85. package/cpp/sgemm.cpp +2597 -2597
  86. package/cpp/speculative.cpp +277 -274
  87. package/cpp/speculative.h +28 -28
  88. package/cpp/unicode.cpp +2 -3
  89. package/ios/CMakeLists.txt +99 -0
  90. package/ios/RNLlama.h +5 -1
  91. package/ios/RNLlama.mm +2 -2
  92. package/ios/RNLlamaContext.h +8 -1
  93. package/ios/RNLlamaContext.mm +15 -11
  94. package/ios/rnllama.xcframework/Info.plist +74 -0
  95. package/jest/mock.js +3 -2
  96. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  97. package/lib/commonjs/index.js +4 -2
  98. package/lib/commonjs/index.js.map +1 -1
  99. package/lib/module/NativeRNLlama.js.map +1 -1
  100. package/lib/module/index.js +4 -2
  101. package/lib/module/index.js.map +1 -1
  102. package/lib/typescript/NativeRNLlama.d.ts +5 -1
  103. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  104. package/lib/typescript/index.d.ts.map +1 -1
  105. package/llama-rn.podspec +8 -2
  106. package/package.json +5 -2
  107. package/src/NativeRNLlama.ts +5 -1
  108. package/src/index.ts +9 -2
package/cpp/gguf.cpp CHANGED
@@ -1,1325 +1,1329 @@
1
- #include "ggml.h"
2
- #include "ggml-backend.h"
3
- #include "ggml-impl.h"
4
- #include "gguf.h"
5
-
6
- #include <cinttypes>
7
- #include <cstddef>
8
- #include <cstdint>
9
- #include <cstdio>
10
- #include <cstdlib>
11
- #include <cstring>
12
- #include <map>
13
- #include <new>
14
- #include <stdexcept>
15
- #include <string>
16
- #include <vector>
17
-
18
- template <typename T>
19
- struct type_to_lm_gguf_type;
20
-
21
- template <>
22
- struct type_to_lm_gguf_type<uint8_t> {
23
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT8;
24
- };
25
-
26
- template <>
27
- struct type_to_lm_gguf_type<int8_t> {
28
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT8;
29
- };
30
-
31
- template <>
32
- struct type_to_lm_gguf_type<uint16_t> {
33
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT16;
34
- };
35
-
36
- template <>
37
- struct type_to_lm_gguf_type<int16_t> {
38
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT16;
39
- };
40
-
41
- template <>
42
- struct type_to_lm_gguf_type<uint32_t> {
43
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT32;
44
- };
45
-
46
- template <>
47
- struct type_to_lm_gguf_type<int32_t> {
48
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT32;
49
- };
50
-
51
- template <>
52
- struct type_to_lm_gguf_type<float> {
53
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_FLOAT32;
54
- };
55
-
56
- template <>
57
- struct type_to_lm_gguf_type<bool> {
58
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_BOOL;
59
- };
60
-
61
- template <>
62
- struct type_to_lm_gguf_type<std::string> {
63
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_STRING;
64
- };
65
-
66
- template <>
67
- struct type_to_lm_gguf_type<uint64_t> {
68
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT64;
69
- };
70
-
71
- template <>
72
- struct type_to_lm_gguf_type<int64_t> {
73
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT64;
74
- };
75
-
76
- template <>
77
- struct type_to_lm_gguf_type<double> {
78
- static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_FLOAT64;
79
- };
80
-
81
- static const std::map<lm_gguf_type, size_t> LM_GGUF_TYPE_SIZE = {
82
- {LM_GGUF_TYPE_UINT8, sizeof(uint8_t)},
83
- {LM_GGUF_TYPE_INT8, sizeof(int8_t)},
84
- {LM_GGUF_TYPE_UINT16, sizeof(uint16_t)},
85
- {LM_GGUF_TYPE_INT16, sizeof(int16_t)},
86
- {LM_GGUF_TYPE_UINT32, sizeof(uint32_t)},
87
- {LM_GGUF_TYPE_INT32, sizeof(int32_t)},
88
- {LM_GGUF_TYPE_FLOAT32, sizeof(float)},
89
- {LM_GGUF_TYPE_BOOL, sizeof(int8_t)},
90
- {LM_GGUF_TYPE_STRING, 0}, // undefined
91
- {LM_GGUF_TYPE_ARRAY, 0}, // undefined
92
- {LM_GGUF_TYPE_UINT64, sizeof(uint64_t)},
93
- {LM_GGUF_TYPE_INT64, sizeof(int64_t)},
94
- {LM_GGUF_TYPE_FLOAT64, sizeof(double)},
95
- };
96
- static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
97
-
98
- static const std::map<lm_gguf_type, const char *> LM_GGUF_TYPE_NAME = {
99
- {LM_GGUF_TYPE_UINT8, "u8"},
100
- {LM_GGUF_TYPE_INT8, "i8"},
101
- {LM_GGUF_TYPE_UINT16, "u16"},
102
- {LM_GGUF_TYPE_INT16, "i16"},
103
- {LM_GGUF_TYPE_UINT32, "u32"},
104
- {LM_GGUF_TYPE_INT32, "i32"},
105
- {LM_GGUF_TYPE_FLOAT32, "f32"},
106
- {LM_GGUF_TYPE_BOOL, "bool"},
107
- {LM_GGUF_TYPE_STRING, "str"},
108
- {LM_GGUF_TYPE_ARRAY, "arr"},
109
- {LM_GGUF_TYPE_UINT64, "u64"},
110
- {LM_GGUF_TYPE_INT64, "i64"},
111
- {LM_GGUF_TYPE_FLOAT64, "f64"},
112
- };
113
- static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
114
-
115
- size_t lm_gguf_type_size(enum lm_gguf_type type) {
116
- auto it = LM_GGUF_TYPE_SIZE.find(type);
117
- return it == LM_GGUF_TYPE_SIZE.end() ? 0 : it->second;
118
- }
119
-
120
- struct lm_gguf_kv {
121
- std::string key;
122
-
123
- bool is_array;
124
- enum lm_gguf_type type;
125
-
126
- std::vector<int8_t> data;
127
- std::vector<std::string> data_string;
128
-
129
- template <typename T>
130
- lm_gguf_kv(const std::string & key, const T value)
131
- : key(key), is_array(false), type(type_to_lm_gguf_type<T>::value) {
132
- LM_GGML_ASSERT(!key.empty());
133
- data.resize(sizeof(T));
134
- memcpy(data.data(), &value, sizeof(T));
135
- }
136
-
137
- template <typename T>
138
- lm_gguf_kv(const std::string & key, const std::vector<T> & value)
139
- : key(key), is_array(true), type(type_to_lm_gguf_type<T>::value) {
140
- LM_GGML_ASSERT(!key.empty());
141
- data.resize(value.size()*sizeof(T));
142
- for (size_t i = 0; i < value.size(); ++i) {
143
- const T tmp = value[i];
144
- memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
145
- }
146
- }
147
-
148
- lm_gguf_kv(const std::string & key, const std::string & value)
149
- : key(key), is_array(false), type(LM_GGUF_TYPE_STRING) {
150
- LM_GGML_ASSERT(!key.empty());
151
- data_string.push_back(value);
152
- }
153
-
154
- lm_gguf_kv(const std::string & key, const std::vector<std::string> & value)
155
- : key(key), is_array(true), type(LM_GGUF_TYPE_STRING) {
156
- LM_GGML_ASSERT(!key.empty());
157
- data_string = value;
158
- }
159
-
160
- const std::string & get_key() const {
161
- return key;
162
- }
163
-
164
- const enum lm_gguf_type & get_type() const {
165
- return type;
166
- }
167
-
168
- size_t get_ne() const {
169
- if (type == LM_GGUF_TYPE_STRING) {
170
- const size_t ne = data_string.size();
171
- LM_GGML_ASSERT(is_array || ne == 1);
172
- return ne;
173
- }
174
- const size_t type_size = lm_gguf_type_size(type);
175
- LM_GGML_ASSERT(data.size() % type_size == 0);
176
- const size_t ne = data.size() / type_size;
177
- LM_GGML_ASSERT(is_array || ne == 1);
178
- return ne;
179
- }
180
-
181
- template <typename T>
182
- const T & get_val(const size_t i = 0) const {
183
- LM_GGML_ASSERT(type_to_lm_gguf_type<T>::value == type);
184
- if constexpr (std::is_same<T, std::string>::value) {
185
- LM_GGML_ASSERT(data_string.size() >= i+1);
186
- return data_string[i];
187
- }
188
- const size_t type_size = lm_gguf_type_size(type);
189
- LM_GGML_ASSERT(data.size() % type_size == 0);
190
- LM_GGML_ASSERT(data.size() >= (i+1)*type_size);
191
- return reinterpret_cast<const T *>(data.data())[i];
192
- }
193
-
194
- void cast(const enum lm_gguf_type new_type) {
195
- const size_t new_type_size = lm_gguf_type_size(new_type);
196
- LM_GGML_ASSERT(data.size() % new_type_size == 0);
197
- type = new_type;
198
- }
199
- };
200
-
201
- struct lm_gguf_tensor_info {
202
- struct lm_ggml_tensor t; // for holding the equivalent info
203
- uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
204
- };
205
-
206
- struct lm_gguf_context {
207
- uint32_t version = LM_GGUF_VERSION;
208
-
209
- std::vector<struct lm_gguf_kv> kv;
210
- std::vector<struct lm_gguf_tensor_info> info;
211
-
212
- size_t alignment = LM_GGUF_DEFAULT_ALIGNMENT;
213
- size_t offset = 0; // offset of `data` from beginning of file
214
- size_t size = 0; // size of `data` in bytes
215
-
216
- void * data = nullptr;
217
- };
218
-
219
- struct lm_gguf_reader {
220
- FILE * file;
221
-
222
- lm_gguf_reader(FILE * file) : file(file) {}
223
-
224
- template <typename T>
225
- bool read(T & dst) const {
226
- return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
227
- }
228
-
229
- template <typename T>
230
- bool read(std::vector<T> & dst, const size_t n) const {
231
- dst.resize(n);
232
- for (size_t i = 0; i < dst.size(); ++i) {
233
- if constexpr (std::is_same<T, bool>::value) {
234
- bool tmp;
235
- if (!read(tmp)) {
236
- return false;
237
- }
238
- dst[i] = tmp;
239
- } else {
240
- if (!read(dst[i])) {
241
- return false;
242
- }
243
- }
244
- }
245
- return true;
246
- }
247
-
248
- bool read(bool & dst) const {
249
- int8_t tmp = -1;
250
- if (!read(tmp)) {
251
- return false;
252
- }
253
- dst = tmp != 0;
254
- return true;
255
- }
256
-
257
- bool read(enum lm_ggml_type & dst) const {
258
- int32_t tmp = -1;
259
- if (!read(tmp)) {
260
- return false;
261
- }
262
- dst = lm_ggml_type(tmp);
263
- return true;
264
- }
265
-
266
- bool read(enum lm_gguf_type & dst) const {
267
- int32_t tmp = -1;
268
- if (!read(tmp)) {
269
- return false;
270
- }
271
- dst = lm_gguf_type(tmp);
272
- return true;
273
- }
274
-
275
- bool read(std::string & dst) const {
276
- uint64_t size = -1;
277
- if (!read(size)) {
278
- return false;
279
- }
280
- dst.resize(size);
281
- return fread(dst.data(), 1, dst.length(), file) == dst.length();
282
- }
283
-
284
- bool read(void * dst, const size_t size) const {
285
- return fread(dst, 1, size, file) == size;
286
- }
287
- };
288
-
289
- struct lm_gguf_context * lm_gguf_init_empty(void) {
290
- return new lm_gguf_context;
291
- }
292
-
293
- template<typename T>
294
- bool lm_gguf_read_emplace_helper(const struct lm_gguf_reader & gr, std::vector<struct lm_gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
295
- if (is_array) {
296
- std::vector<T> value;
297
- try {
298
- if (!gr.read(value, n)) {
299
- return false;
300
- }
301
- } catch (std::length_error &) {
302
- fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
303
- return false;
304
- } catch (std::bad_alloc &) {
305
- fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
306
- return false;
307
- }
308
- kv.emplace_back(key, value);
309
- } else {
310
- T value;
311
- if (!gr.read(value)) {
312
- return false;
313
- }
314
- kv.emplace_back(key, value);
315
- }
316
- return true;
317
- }
318
-
319
- struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
320
- const struct lm_gguf_reader gr(file);
321
- struct lm_gguf_context * ctx = new lm_gguf_context;
322
-
323
- bool ok = true;
324
-
325
- // file magic
326
- {
327
- std::vector<char> magic;
328
- ok = ok && gr.read(magic, 4);
329
-
330
- if (!ok) {
331
- fprintf(stderr, "%s: failed to read magic\n", __func__);
332
- lm_gguf_free(ctx);
333
- return nullptr;
334
- }
335
-
336
- for (uint32_t i = 0; i < magic.size(); i++) {
337
- if (magic[i] != LM_GGUF_MAGIC[i]) {
338
- fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
339
- lm_gguf_free(ctx);
340
- return nullptr;
341
- }
342
- }
343
- }
344
-
345
- // header
346
- int64_t n_kv = 0;
347
- int64_t n_tensors = 0;
348
-
349
- if (ok && gr.read(ctx->version)) {
350
- if (ctx->version == 1) {
351
- fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352
- ok = false;
353
- }
354
- if (ctx->version > LM_GGUF_VERSION) {
355
- fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356
- __func__, ctx->version, LM_GGUF_VERSION);
357
- ok = false;
358
- }
359
- } else {
360
- ok = false;
361
- }
362
-
363
- if (ok && gr.read(n_tensors)) {
364
- static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
365
- if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(lm_gguf_tensor_info))) {
366
- fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
367
- __func__, n_tensors, SIZE_MAX/sizeof(lm_gguf_tensor_info));
368
- ok = false;
369
- }
370
- } else {
371
- ok = false;
372
- }
373
-
374
- if (ok && gr.read(n_kv)) {
375
- static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
376
- if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(lm_gguf_kv))) {
377
- fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
378
- __func__, n_kv, SIZE_MAX/sizeof(lm_gguf_kv));
379
- ok = false;
380
- }
381
- } else {
382
- ok = false;
383
- }
384
-
385
- if (!ok) {
386
- fprintf(stderr, "%s: failed to read header\n", __func__);
387
- lm_gguf_free(ctx);
388
- return nullptr;
389
- }
390
-
391
- // KV pairs
392
- {
393
- for (int64_t i = 0; ok && i < n_kv; ++i) {
394
- std::string key;
395
- lm_gguf_type type = lm_gguf_type(-1);
396
- bool is_array = false;
397
- uint64_t n = 1;
398
-
399
- try {
400
- ok = ok && gr.read(key);
401
- } catch (std::length_error &) {
402
- fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
403
- ok = false;
404
- } catch (std::bad_alloc &) {
405
- fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
406
- ok = false;
407
- }
408
- for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
409
- if (key == ctx->kv[j].key) {
410
- fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
411
- ok = false;
412
- }
413
- }
414
- if (!ok) {
415
- break;
416
- }
417
-
418
- ok = ok && gr.read(type);
419
- if (type == LM_GGUF_TYPE_ARRAY) {
420
- is_array = true;
421
- ok = ok && gr.read(type);
422
- ok = ok && gr.read(n);
423
- }
424
- if (!ok) {
425
- break;
426
- }
427
-
428
- switch (type) {
429
- case LM_GGUF_TYPE_UINT8: ok = ok && lm_gguf_read_emplace_helper<uint8_t> (gr, ctx->kv, key, is_array, n); break;
430
- case LM_GGUF_TYPE_INT8: ok = ok && lm_gguf_read_emplace_helper<int8_t> (gr, ctx->kv, key, is_array, n); break;
431
- case LM_GGUF_TYPE_UINT16: ok = ok && lm_gguf_read_emplace_helper<uint16_t> (gr, ctx->kv, key, is_array, n); break;
432
- case LM_GGUF_TYPE_INT16: ok = ok && lm_gguf_read_emplace_helper<int16_t> (gr, ctx->kv, key, is_array, n); break;
433
- case LM_GGUF_TYPE_UINT32: ok = ok && lm_gguf_read_emplace_helper<uint32_t> (gr, ctx->kv, key, is_array, n); break;
434
- case LM_GGUF_TYPE_INT32: ok = ok && lm_gguf_read_emplace_helper<int32_t> (gr, ctx->kv, key, is_array, n); break;
435
- case LM_GGUF_TYPE_FLOAT32: ok = ok && lm_gguf_read_emplace_helper<float> (gr, ctx->kv, key, is_array, n); break;
436
- case LM_GGUF_TYPE_BOOL: ok = ok && lm_gguf_read_emplace_helper<bool> (gr, ctx->kv, key, is_array, n); break;
437
- case LM_GGUF_TYPE_STRING: ok = ok && lm_gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
438
- case LM_GGUF_TYPE_UINT64: ok = ok && lm_gguf_read_emplace_helper<uint64_t> (gr, ctx->kv, key, is_array, n); break;
439
- case LM_GGUF_TYPE_INT64: ok = ok && lm_gguf_read_emplace_helper<int64_t> (gr, ctx->kv, key, is_array, n); break;
440
- case LM_GGUF_TYPE_FLOAT64: ok = ok && lm_gguf_read_emplace_helper<double> (gr, ctx->kv, key, is_array, n); break;
441
- case LM_GGUF_TYPE_ARRAY:
442
- default:
443
- {
444
- fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
445
- ok = false;
446
- } break;
447
- }
448
- }
449
-
450
- if (!ok) {
451
- fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
452
- lm_gguf_free(ctx);
453
- return nullptr;
454
- }
455
- LM_GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
456
-
457
- const int alignment_idx = lm_gguf_find_key(ctx, LM_GGUF_KEY_GENERAL_ALIGNMENT);
458
- ctx->alignment = alignment_idx == -1 ? LM_GGUF_DEFAULT_ALIGNMENT : lm_gguf_get_val_u32(ctx, alignment_idx);
459
-
460
- if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
461
- fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
462
- lm_gguf_free(ctx);
463
- return nullptr;
464
- }
465
- }
466
-
467
- // read the tensor info
468
- for (int64_t i = 0; ok && i < n_tensors; ++i) {
469
- struct lm_gguf_tensor_info info;
470
-
471
- // tensor name
472
- {
473
- std::string name;
474
- try {
475
- ok = ok && gr.read(name);
476
- } catch (std::length_error &) {
477
- fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
478
- ok = false;
479
- } catch (std::bad_alloc &) {
480
- fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
481
- ok = false;
482
- }
483
- if (name.length() >= LM_GGML_MAX_NAME) {
484
- fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), LM_GGML_MAX_NAME);
485
- ok = false;
486
- break;
487
- }
488
- lm_ggml_set_name(&info.t, name.c_str());
489
-
490
- // make sure there are no duplicate tensor names
491
- for (int64_t j = 0; ok && j < i; ++j) {
492
- if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
493
- fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
494
- ok = false;
495
- break;
496
- }
497
- }
498
- }
499
- if (!ok) {
500
- break;
501
- }
502
-
503
- // tensor shape
504
- {
505
- uint32_t n_dims = -1;
506
- ok = ok && gr.read(n_dims);
507
- if (n_dims > LM_GGML_MAX_DIMS) {
508
- fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
509
- __func__, info.t.name, n_dims, LM_GGML_MAX_DIMS);
510
- ok = false;
511
- break;
512
- }
513
- for (uint32_t j = 0; ok && j < LM_GGML_MAX_DIMS; ++j) {
514
- info.t.ne[j] = 1;
515
- if (j < n_dims) {
516
- ok = ok && gr.read(info.t.ne[j]);
517
- }
518
-
519
- // check that all ne are non-negative
520
- if (info.t.ne[j] < 0) {
521
- fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
522
- __func__, info.t.name, j, info.t.ne[j]);
523
- ok = false;
524
- break;
525
- }
526
- }
527
-
528
- // check that the total number of elements is representable
529
- if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
530
- (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
531
- (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
532
-
533
- fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
534
- "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
535
- __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
536
- ok = false;
537
- break;
538
- }
539
- }
540
- if (!ok) {
541
- break;
542
- }
543
-
544
- // tensor type
545
- {
546
- ok = ok && gr.read(info.t.type);
547
-
548
- // check that tensor type is within defined range
549
- if (info.t.type < 0 || info.t.type >= LM_GGML_TYPE_COUNT) {
550
- fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
551
- __func__, info.t.name, info.t.type, lm_ggml_type_name(info.t.type));
552
- ok = false;
553
- break;
554
- }
555
- const size_t type_size = lm_ggml_type_size(info.t.type);
556
- const int64_t blck_size = lm_ggml_blck_size(info.t.type);
557
-
558
- // check that row size is divisible by block size
559
- if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
560
- fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
561
- "not a multiple of block size (%" PRId64 ")\n",
562
- __func__, info.t.name, (int) info.t.type, lm_ggml_type_name(info.t.type), info.t.ne[0], blck_size);
563
- ok = false;
564
- break;
565
- }
566
-
567
- // calculate byte offsets given the tensor shape and type
568
- info.t.nb[0] = type_size;
569
- info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
570
- for (int j = 2; j < LM_GGML_MAX_DIMS; ++j) {
571
- info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
572
- }
573
- }
574
- if (!ok) {
575
- break;
576
- }
577
-
578
- // tensor data offset within buffer
579
- ok = ok && gr.read(info.offset);
580
-
581
- ctx->info.push_back(info);
582
- }
583
-
584
- if (!ok) {
585
- fprintf(stderr, "%s: failed to read tensor info\n", __func__);
586
- lm_gguf_free(ctx);
587
- return nullptr;
588
- }
589
- LM_GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
590
-
591
- // we require the data section to be aligned, so take into account any padding
592
- if (fseek(file, LM_GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
593
- fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
594
- lm_gguf_free(ctx);
595
- return nullptr;
596
- }
597
-
598
- // store the current file offset - this is where the data section starts
599
- ctx->offset = ftell(file);
600
-
601
- // compute the total size of the data section, taking into account the alignment
602
- {
603
- ctx->size = 0;
604
- for (size_t i = 0; i < ctx->info.size(); ++i) {
605
- const lm_gguf_tensor_info & ti = ctx->info[i];
606
- if (ti.offset != ctx->size) {
607
- fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
608
- __func__, ti.t.name, ti.offset, ctx->size);
609
- fprintf(stderr, "%s: failed to read tensor data\n", __func__);
610
- lm_gguf_free(ctx);
611
- return nullptr;
612
- }
613
- ctx->size += LM_GGML_PAD(lm_ggml_nbytes(&ti.t), ctx->alignment);
614
- }
615
- }
616
-
617
- // load the tensor data only if requested
618
- if (params.ctx != nullptr) {
619
- // if the provided lm_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
620
- // otherwise, we load the binary blob into the created lm_ggml_context as well, and point the "data" members of
621
- // the lm_ggml_tensor structs to the appropriate locations in the binary blob
622
-
623
- // compute the exact size needed for the new lm_ggml_context
624
- const size_t mem_size =
625
- params.no_alloc ?
626
- (n_tensors )*lm_ggml_tensor_overhead() :
627
- (n_tensors + 1)*lm_ggml_tensor_overhead() + ctx->size;
628
-
629
- struct lm_ggml_init_params pdata = {
630
- /*mem_size =*/ mem_size,
631
- /*mem_buffer =*/ nullptr,
632
- /*no_alloc =*/ params.no_alloc,
633
- };
634
-
635
- *params.ctx = lm_ggml_init(pdata);
636
- if (*params.ctx == nullptr) {
637
- fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
638
- lm_gguf_free(ctx);
639
- return nullptr;
640
- }
641
-
642
- struct lm_ggml_context * ctx_data = *params.ctx;
643
-
644
- struct lm_ggml_tensor * data = nullptr;
645
-
646
- if (!params.no_alloc) {
647
- data = lm_ggml_new_tensor_1d(ctx_data, LM_GGML_TYPE_I8, ctx->size);
648
-
649
- ok = ok && data != nullptr;
650
-
651
- // read the binary blob with the tensor data
652
- ok = ok && gr.read(data->data, ctx->size);
653
-
654
- if (!ok) {
655
- fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
656
- lm_ggml_free(ctx_data);
657
- *params.ctx = nullptr;
658
- lm_gguf_free(ctx);
659
- return nullptr;
660
- }
661
-
662
- ctx->data = data->data;
663
- }
664
-
665
- lm_ggml_set_no_alloc(ctx_data, true);
666
-
667
- // create the tensors
668
- for (size_t i = 0; i < ctx->info.size(); ++i) {
669
- const struct lm_gguf_tensor_info & info = ctx->info[i];
670
-
671
- struct lm_ggml_tensor * cur = lm_ggml_new_tensor(ctx_data, info.t.type, LM_GGML_MAX_DIMS, info.t.ne);
672
-
673
- ok = ok && cur != nullptr;
674
-
675
- if (!ok) {
676
- break;
677
- }
678
-
679
- lm_ggml_set_name(cur, info.t.name);
680
-
681
- // point the data member to the appropriate location in the binary blob using the tensor info
682
- if (!params.no_alloc) {
683
- cur->data = (char *) data->data + info.offset;
684
- }
685
- }
686
-
687
- if (!ok) {
688
- fprintf(stderr, "%s: failed to create tensors\n", __func__);
689
- lm_ggml_free(ctx_data);
690
- *params.ctx = nullptr;
691
- lm_gguf_free(ctx);
692
- return nullptr;
693
- }
694
-
695
- lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
696
- }
697
-
698
- return ctx;
699
- }
700
-
701
- struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
702
- FILE * file = lm_ggml_fopen(fname, "rb");
703
-
704
- if (!file) {
705
- fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
706
- return nullptr;
707
- }
708
-
709
- struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
710
- fclose(file);
711
- return result;
712
- }
713
-
714
- void lm_gguf_free(struct lm_gguf_context * ctx) {
715
- if (ctx == nullptr) {
716
- return;
717
- }
718
- delete ctx;
719
- }
720
-
721
- const char * lm_gguf_type_name(enum lm_gguf_type type) {
722
- auto it = LM_GGUF_TYPE_NAME.find(type);
723
- return it == LM_GGUF_TYPE_NAME.end() ? nullptr : it->second;
724
- }
725
-
726
- uint32_t lm_gguf_get_version(const struct lm_gguf_context * ctx) {
727
- return ctx->version;
728
- }
729
-
730
- size_t lm_gguf_get_alignment(const struct lm_gguf_context * ctx) {
731
- return ctx->alignment;
732
- }
733
-
734
- size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx) {
735
- return ctx->offset;
736
- }
737
-
738
- int64_t lm_gguf_get_n_kv(const struct lm_gguf_context * ctx) {
739
- return ctx->kv.size();
740
- }
741
-
742
- int64_t lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key) {
743
- // return -1 if key not found
744
- int64_t keyfound = -1;
745
-
746
- const int64_t n_kv = lm_gguf_get_n_kv(ctx);
747
-
748
- for (int64_t i = 0; i < n_kv; ++i) {
749
- if (strcmp(key, lm_gguf_get_key(ctx, i)) == 0) {
750
- keyfound = i;
751
- break;
752
- }
753
- }
754
-
755
- return keyfound;
756
- }
757
-
758
- const char * lm_gguf_get_key(const struct lm_gguf_context * ctx, int64_t key_id) {
759
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
760
- return ctx->kv[key_id].get_key().c_str();
761
- }
762
-
763
- enum lm_gguf_type lm_gguf_get_kv_type(const struct lm_gguf_context * ctx, int64_t key_id) {
764
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
765
- return ctx->kv[key_id].is_array ? LM_GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
766
- }
767
-
768
- enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int64_t key_id) {
769
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
770
- LM_GGML_ASSERT(ctx->kv[key_id].is_array);
771
- return ctx->kv[key_id].get_type();
772
- }
773
-
774
- const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int64_t key_id) {
775
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
776
- LM_GGML_ASSERT(ctx->kv[key_id].get_type() != LM_GGUF_TYPE_STRING);
777
- return ctx->kv[key_id].data.data();
778
- }
779
-
780
- const char * lm_gguf_get_arr_str(const struct lm_gguf_context * ctx, int64_t key_id, size_t i) {
781
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
782
- LM_GGML_ASSERT(ctx->kv[key_id].get_type() == LM_GGUF_TYPE_STRING);
783
- return ctx->kv[key_id].data_string[i].c_str();
784
- }
785
-
786
- size_t lm_gguf_get_arr_n(const struct lm_gguf_context * ctx, int64_t key_id) {
787
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
788
-
789
- if (ctx->kv[key_id].type == LM_GGUF_TYPE_STRING) {
790
- return ctx->kv[key_id].data_string.size();
791
- }
792
-
793
- const size_t type_size = lm_gguf_type_size(ctx->kv[key_id].type);
794
- LM_GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
795
- return ctx->kv[key_id].data.size() / type_size;
796
- }
797
-
798
- uint8_t lm_gguf_get_val_u8(const struct lm_gguf_context * ctx, int64_t key_id) {
799
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
800
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
801
- return ctx->kv[key_id].get_val<uint8_t>();
802
- }
803
-
804
- int8_t lm_gguf_get_val_i8(const struct lm_gguf_context * ctx, int64_t key_id) {
805
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
806
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
807
- return ctx->kv[key_id].get_val<int8_t>();
808
- }
809
-
810
- uint16_t lm_gguf_get_val_u16(const struct lm_gguf_context * ctx, int64_t key_id) {
811
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
812
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
813
- return ctx->kv[key_id].get_val<uint16_t>();
814
- }
815
-
816
- int16_t lm_gguf_get_val_i16(const struct lm_gguf_context * ctx, int64_t key_id) {
817
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
818
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
819
- return ctx->kv[key_id].get_val<int16_t>();
820
- }
821
-
822
- uint32_t lm_gguf_get_val_u32(const struct lm_gguf_context * ctx, int64_t key_id) {
823
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
824
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
825
- return ctx->kv[key_id].get_val<uint32_t>();
826
- }
827
-
828
- int32_t lm_gguf_get_val_i32(const struct lm_gguf_context * ctx, int64_t key_id) {
829
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
830
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
831
- return ctx->kv[key_id].get_val<int32_t>();
832
- }
833
-
834
- float lm_gguf_get_val_f32(const struct lm_gguf_context * ctx, int64_t key_id) {
835
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
836
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
837
- return ctx->kv[key_id].get_val<float>();
838
- }
839
-
840
- uint64_t lm_gguf_get_val_u64(const struct lm_gguf_context * ctx, int64_t key_id) {
841
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
842
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
843
- return ctx->kv[key_id].get_val<uint64_t>();
844
- }
845
-
846
- int64_t lm_gguf_get_val_i64(const struct lm_gguf_context * ctx, int64_t key_id) {
847
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
848
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
849
- return ctx->kv[key_id].get_val<int64_t>();
850
- }
851
-
852
- double lm_gguf_get_val_f64(const struct lm_gguf_context * ctx, int64_t key_id) {
853
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
854
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
855
- return ctx->kv[key_id].get_val<double>();
856
- }
857
-
858
- bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int64_t key_id) {
859
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
860
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
861
- return ctx->kv[key_id].get_val<bool>();
862
- }
863
-
864
- const char * lm_gguf_get_val_str(const struct lm_gguf_context * ctx, int64_t key_id) {
865
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
866
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
867
- return ctx->kv[key_id].get_val<std::string>().c_str();
868
- }
869
-
870
- const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int64_t key_id) {
871
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
872
- LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
873
- LM_GGML_ASSERT(ctx->kv[key_id].get_type() != LM_GGUF_TYPE_STRING);
874
- return ctx->kv[key_id].data.data();
875
- }
876
-
877
- int64_t lm_gguf_get_n_tensors(const struct lm_gguf_context * ctx) {
878
- return ctx->info.size();
879
- }
880
-
881
- int64_t lm_gguf_find_tensor(const struct lm_gguf_context * ctx, const char * name) {
882
- // return -1 if tensor not found
883
- int64_t tensor_id = -1;
884
-
885
- const int64_t n_tensors = lm_gguf_get_n_tensors(ctx);
886
-
887
- for (int64_t i = 0; i < n_tensors; ++i) {
888
- if (strcmp(name, lm_gguf_get_tensor_name(ctx, i)) == 0) {
889
- tensor_id = i;
890
- break;
891
- }
892
- }
893
-
894
- return tensor_id;
895
- }
896
-
897
- size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int64_t tensor_id) {
898
- LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
899
- return ctx->info[tensor_id].offset;
900
- }
901
-
902
- const char * lm_gguf_get_tensor_name(const struct lm_gguf_context * ctx, int64_t tensor_id) {
903
- LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
904
- return ctx->info[tensor_id].t.name;
905
- }
906
-
907
- enum lm_ggml_type lm_gguf_get_tensor_type(const struct lm_gguf_context * ctx, int64_t tensor_id) {
908
- LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
909
- return ctx->info[tensor_id].t.type;
910
- }
911
-
912
- size_t lm_gguf_get_tensor_size(const struct lm_gguf_context * ctx, int64_t tensor_id) {
913
- LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
914
- return lm_ggml_nbytes(&ctx->info[tensor_id].t);
915
- }
916
-
917
- int64_t lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key) {
918
- const int64_t key_id = lm_gguf_find_key(ctx, key);
919
- if (key_id >= 0) {
920
- ctx->kv.erase(ctx->kv.begin() + key_id);
921
- }
922
- return key_id;
923
- }
924
-
925
- template<typename T>
926
- static void lm_gguf_check_reserved_keys(const std::string & key, const T val) {
927
- if (key == LM_GGUF_KEY_GENERAL_ALIGNMENT) {
928
- if constexpr (std::is_same<T, uint32_t>::value) {
929
- LM_GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && LM_GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
930
- } else {
931
- LM_GGML_ABORT(LM_GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
932
- }
933
- }
934
- }
935
-
936
- void lm_gguf_set_val_u8(struct lm_gguf_context * ctx, const char * key, uint8_t val) {
937
- lm_gguf_check_reserved_keys(key, val);
938
- lm_gguf_remove_key(ctx, key);
939
- ctx->kv.emplace_back(key, val);
940
- }
941
-
942
- void lm_gguf_set_val_i8(struct lm_gguf_context * ctx, const char * key, int8_t val) {
943
- lm_gguf_check_reserved_keys(key, val);
944
- lm_gguf_remove_key(ctx, key);
945
- ctx->kv.emplace_back(key, val);
946
- }
947
-
948
- void lm_gguf_set_val_u16(struct lm_gguf_context * ctx, const char * key, uint16_t val) {
949
- lm_gguf_check_reserved_keys(key, val);
950
- lm_gguf_remove_key(ctx, key);
951
- ctx->kv.emplace_back(key, val);
952
- }
953
-
954
- void lm_gguf_set_val_i16(struct lm_gguf_context * ctx, const char * key, int16_t val) {
955
- lm_gguf_check_reserved_keys(key, val);
956
- lm_gguf_remove_key(ctx, key);
957
- ctx->kv.emplace_back(key, val);
958
- }
959
-
960
- void lm_gguf_set_val_u32(struct lm_gguf_context * ctx, const char * key, uint32_t val) {
961
- lm_gguf_check_reserved_keys(key, val);
962
- lm_gguf_remove_key(ctx, key);
963
- ctx->kv.emplace_back(key, val);
964
- }
965
-
966
- void lm_gguf_set_val_i32(struct lm_gguf_context * ctx, const char * key, int32_t val) {
967
- lm_gguf_check_reserved_keys(key, val);
968
- lm_gguf_remove_key(ctx, key);
969
- ctx->kv.emplace_back(key, val);
970
- }
971
-
972
- void lm_gguf_set_val_f32(struct lm_gguf_context * ctx, const char * key, float val) {
973
- lm_gguf_check_reserved_keys(key, val);
974
- lm_gguf_remove_key(ctx, key);
975
- ctx->kv.emplace_back(key, val);
976
- }
977
-
978
- void lm_gguf_set_val_u64(struct lm_gguf_context * ctx, const char * key, uint64_t val) {
979
- lm_gguf_check_reserved_keys(key, val);
980
- lm_gguf_remove_key(ctx, key);
981
- ctx->kv.emplace_back(key, val);
982
- }
983
-
984
- void lm_gguf_set_val_i64(struct lm_gguf_context * ctx, const char * key, int64_t val) {
985
- lm_gguf_check_reserved_keys(key, val);
986
- lm_gguf_remove_key(ctx, key);
987
- ctx->kv.emplace_back(key, val);
988
- }
989
-
990
- void lm_gguf_set_val_f64(struct lm_gguf_context * ctx, const char * key, double val) {
991
- lm_gguf_check_reserved_keys(key, val);
992
- lm_gguf_remove_key(ctx, key);
993
- ctx->kv.emplace_back(key, val);
994
- }
995
-
996
- void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val) {
997
- lm_gguf_check_reserved_keys(key, val);
998
- lm_gguf_remove_key(ctx, key);
999
- ctx->kv.emplace_back(key, val);
1000
- }
1001
-
1002
- void lm_gguf_set_val_str(struct lm_gguf_context * ctx, const char * key, const char * val) {
1003
- lm_gguf_check_reserved_keys(key, val);
1004
- lm_gguf_remove_key(ctx, key);
1005
- ctx->kv.emplace_back(key, std::string(val));
1006
- }
1007
-
1008
- void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, size_t n) {
1009
- lm_gguf_check_reserved_keys(key, data);
1010
- lm_gguf_remove_key(ctx, key);
1011
-
1012
- const size_t nbytes = n*lm_gguf_type_size(type);
1013
- std::vector<int8_t> tmp(nbytes);
1014
- if (!tmp.empty()) {
1015
- memcpy(tmp.data(), data, nbytes);
1016
- }
1017
- ctx->kv.emplace_back(key, tmp);
1018
- ctx->kv.back().cast(type);
1019
- }
1020
-
1021
- void lm_gguf_set_arr_str(struct lm_gguf_context * ctx, const char * key, const char ** data, size_t n) {
1022
- lm_gguf_check_reserved_keys(key, data);
1023
- lm_gguf_remove_key(ctx, key);
1024
-
1025
- std::vector<std::string> tmp(n);
1026
- for (size_t i = 0; i < n; ++i) {
1027
- tmp[i] = data[i];
1028
- }
1029
- ctx->kv.emplace_back(key, tmp);
1030
- }
1031
-
1032
- // set or add KV pairs from another context
1033
- void lm_gguf_set_kv(struct lm_gguf_context * ctx, const struct lm_gguf_context * src) {
1034
- const int64_t n_kv = lm_gguf_get_n_kv(src);
1035
- for (int64_t i = 0; i < n_kv; ++i) {
1036
- const struct lm_gguf_kv & kv = src->kv[i];
1037
-
1038
- if (!kv.is_array) {
1039
- switch (kv.get_type()) {
1040
- case LM_GGUF_TYPE_UINT8: lm_gguf_set_val_u8 (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>()); break;
1041
- case LM_GGUF_TYPE_INT8: lm_gguf_set_val_i8 (ctx, kv.get_key().c_str(), kv.get_val<int8_t>()); break;
1042
- case LM_GGUF_TYPE_UINT16: lm_gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>()); break;
1043
- case LM_GGUF_TYPE_INT16: lm_gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>()); break;
1044
- case LM_GGUF_TYPE_UINT32: lm_gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>()); break;
1045
- case LM_GGUF_TYPE_INT32: lm_gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>()); break;
1046
- case LM_GGUF_TYPE_FLOAT32: lm_gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>()); break;
1047
- case LM_GGUF_TYPE_UINT64: lm_gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>()); break;
1048
- case LM_GGUF_TYPE_INT64: lm_gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>()); break;
1049
- case LM_GGUF_TYPE_FLOAT64: lm_gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>()); break;
1050
- case LM_GGUF_TYPE_BOOL: lm_gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>()); break;
1051
- case LM_GGUF_TYPE_STRING: lm_gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
1052
- case LM_GGUF_TYPE_ARRAY:
1053
- default: LM_GGML_ABORT("invalid type");
1054
- }
1055
- continue;
1056
- }
1057
-
1058
- const size_t ne = kv.get_ne();
1059
-
1060
- switch (kv.get_type()) {
1061
- case LM_GGUF_TYPE_UINT8:
1062
- case LM_GGUF_TYPE_INT8:
1063
- case LM_GGUF_TYPE_UINT16:
1064
- case LM_GGUF_TYPE_INT16:
1065
- case LM_GGUF_TYPE_UINT32:
1066
- case LM_GGUF_TYPE_INT32:
1067
- case LM_GGUF_TYPE_FLOAT32:
1068
- case LM_GGUF_TYPE_UINT64:
1069
- case LM_GGUF_TYPE_INT64:
1070
- case LM_GGUF_TYPE_FLOAT64:
1071
- case LM_GGUF_TYPE_BOOL: {
1072
- lm_gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
1073
- } break;
1074
- case LM_GGUF_TYPE_STRING: {
1075
- std::vector<const char *> tmp(ne);
1076
- for (size_t j = 0; j < ne; ++j) {
1077
- tmp[j] = kv.data_string[j].c_str();
1078
- }
1079
- lm_gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
1080
- } break;
1081
- case LM_GGUF_TYPE_ARRAY:
1082
- default: LM_GGML_ABORT("invalid type");
1083
- }
1084
- }
1085
- }
1086
-
1087
- void lm_gguf_add_tensor(
1088
- struct lm_gguf_context * ctx,
1089
- const struct lm_ggml_tensor * tensor) {
1090
- LM_GGML_ASSERT(tensor);
1091
- if (lm_gguf_find_tensor(ctx, tensor->name) != -1) {
1092
- LM_GGML_ABORT("duplicate tensor name: %s", tensor->name);
1093
- }
1094
-
1095
- struct lm_gguf_tensor_info ti;
1096
- ti.t = *tensor;
1097
- ti.offset = ctx->info.empty() ? 0 :
1098
- ctx->info.back().offset + LM_GGML_PAD(lm_ggml_nbytes(&ctx->info.back().t), ctx->alignment);
1099
- ctx->info.push_back(ti);
1100
- }
1101
-
1102
- void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type) {
1103
- const int64_t tensor_id = lm_gguf_find_tensor(ctx, name);
1104
- if (tensor_id < 0) {
1105
- LM_GGML_ABORT("tensor not found: %s", name);
1106
- }
1107
- struct lm_ggml_tensor * tensor = &ctx->info[tensor_id].t;
1108
- const size_t type_size = lm_ggml_type_size(type);
1109
- const int64_t blck_size = lm_ggml_blck_size(type);
1110
-
1111
- tensor->type = type;
1112
- LM_GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
1113
-
1114
- tensor->nb[0] = type_size;
1115
- tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
1116
- for (int i = 2; i < LM_GGML_MAX_DIMS; i++) {
1117
- tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
1118
- }
1119
-
1120
- // update offsets
1121
- const int64_t n_tensors = lm_gguf_get_n_tensors(ctx);
1122
- for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
1123
- ctx->info[i].offset = ctx->info[i - 1].offset + LM_GGML_PAD(lm_ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
1124
- }
1125
- }
1126
-
1127
- void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data) {
1128
- const int64_t tensor_id = lm_gguf_find_tensor(ctx, name);
1129
- if (tensor_id < 0) {
1130
- LM_GGML_ABORT("tensor not found: %s", name);
1131
- }
1132
-
1133
- ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
1134
- }
1135
-
1136
- struct lm_gguf_writer {
1137
- std::vector<int8_t> & buf;
1138
-
1139
- lm_gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
1140
-
1141
- template <typename T>
1142
- void write(const T & val) const {
1143
- for (size_t i = 0; i < sizeof(val); ++i) {
1144
- buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
1145
- }
1146
- }
1147
-
1148
- void write(const std::vector<int8_t> & val) const {
1149
- buf.insert(buf.end(), val.begin(), val.end());
1150
- }
1151
-
1152
- void write(const bool & val) const {
1153
- const int8_t val8 = val ? 1 : 0;
1154
- write(val8);
1155
- }
1156
-
1157
- void write(const std::string & val) const {
1158
- {
1159
- const uint64_t n = val.length();
1160
- write(n);
1161
- }
1162
- for (size_t i = 0; i < val.length(); ++i) {
1163
- buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]);
1164
- }
1165
- }
1166
-
1167
- void write(const char * val) const {
1168
- write(std::string(val));
1169
- }
1170
-
1171
- void write(const enum lm_ggml_type & val) const {
1172
- write(int32_t(val));
1173
- }
1174
-
1175
- void write(const enum lm_gguf_type & val) const {
1176
- write(int32_t(val));
1177
- }
1178
-
1179
- void write(const struct lm_gguf_kv & kv) const {
1180
- const uint64_t ne = kv.get_ne();
1181
-
1182
- write(kv.get_key());
1183
-
1184
- if (kv.is_array) {
1185
- write(LM_GGUF_TYPE_ARRAY);
1186
- write(kv.get_type());
1187
- write(ne);
1188
- } else {
1189
- write(kv.get_type());
1190
- }
1191
-
1192
- switch (kv.get_type()) {
1193
- case LM_GGUF_TYPE_UINT8:
1194
- case LM_GGUF_TYPE_INT8:
1195
- case LM_GGUF_TYPE_UINT16:
1196
- case LM_GGUF_TYPE_INT16:
1197
- case LM_GGUF_TYPE_UINT32:
1198
- case LM_GGUF_TYPE_INT32:
1199
- case LM_GGUF_TYPE_FLOAT32:
1200
- case LM_GGUF_TYPE_UINT64:
1201
- case LM_GGUF_TYPE_INT64:
1202
- case LM_GGUF_TYPE_FLOAT64: {
1203
- write(kv.data);
1204
- } break;
1205
- case LM_GGUF_TYPE_BOOL: {
1206
- for (size_t i = 0; i < ne; ++i) {
1207
- write(kv.get_val<bool>(i));
1208
- }
1209
- } break;
1210
- case LM_GGUF_TYPE_STRING: {
1211
- for (size_t i = 0; i < ne; ++i) {
1212
- write(kv.get_val<std::string>(i));
1213
- }
1214
- } break;
1215
- case LM_GGUF_TYPE_ARRAY:
1216
- default: LM_GGML_ABORT("invalid type");
1217
- }
1218
- }
1219
-
1220
- void write_tensor_meta(const struct lm_gguf_tensor_info & info) const {
1221
- write(info.t.name);
1222
-
1223
- const uint32_t n_dims = lm_ggml_n_dims(&info.t);
1224
- write(n_dims);
1225
-
1226
- for (uint32_t j = 0; j < n_dims; ++j) {
1227
- write(info.t.ne[j]);
1228
- }
1229
- write(info.t.type);
1230
- write(info.offset);
1231
- }
1232
-
1233
- void pad(const size_t alignment) const {
1234
- while (buf.size() % alignment != 0) {
1235
- const int8_t zero = 0;
1236
- write(zero);
1237
- }
1238
- }
1239
-
1240
- void write_tensor_data(const struct lm_gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
1241
- LM_GGML_ASSERT(buf.size() - offset_data == info.offset);
1242
-
1243
- LM_GGML_ASSERT(lm_ggml_is_contiguous(&info.t));
1244
- const size_t offset = buf.size();
1245
- const size_t nbytes = lm_ggml_nbytes(&info.t);
1246
-
1247
- buf.resize(offset + nbytes);
1248
- if (info.t.buffer) {
1249
- lm_ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
1250
- } else {
1251
- LM_GGML_ASSERT(info.t.data);
1252
- memcpy(buf.data() + offset, info.t.data, nbytes);
1253
- }
1254
-
1255
- pad(alignment);
1256
- }
1257
- };
1258
-
1259
- void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
1260
- const struct lm_gguf_writer gw(buf);
1261
-
1262
- const int64_t n_kv = lm_gguf_get_n_kv(ctx);
1263
- const int64_t n_tensors = lm_gguf_get_n_tensors(ctx);
1264
-
1265
- // write header
1266
- gw.write(LM_GGUF_MAGIC[0]);
1267
- gw.write(LM_GGUF_MAGIC[1]);
1268
- gw.write(LM_GGUF_MAGIC[2]);
1269
- gw.write(LM_GGUF_MAGIC[3]);
1270
- gw.write(ctx->version);
1271
- gw.write(n_tensors);
1272
- gw.write(n_kv);
1273
-
1274
- // write key-value pairs
1275
- for (int64_t i = 0; i < n_kv; ++i) {
1276
- gw.write(ctx->kv[i]);
1277
- }
1278
-
1279
- // write tensor info
1280
- for (int64_t i = 0; i < n_tensors; ++i) {
1281
- gw.write_tensor_meta(ctx->info[i]);
1282
- }
1283
-
1284
- // we require the data section to be aligned
1285
- gw.pad(ctx->alignment);
1286
-
1287
- if (only_meta) {
1288
- return;
1289
- }
1290
-
1291
- const size_t offset_data = gw.buf.size();
1292
-
1293
- // write tensor data
1294
- for (int64_t i = 0; i < n_tensors; ++i) {
1295
- gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
1296
- }
1297
- }
1298
-
1299
- bool lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta) {
1300
- FILE * file = lm_ggml_fopen(fname, "wb");
1301
-
1302
- if (!file) {
1303
- fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1304
- return false;
1305
- }
1306
-
1307
- std::vector<int8_t> buf;
1308
- lm_gguf_write_to_buf(ctx, buf, only_meta);
1309
- const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size();
1310
- fclose(file);
1311
- return ok;
1312
- }
1313
-
1314
- size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx) {
1315
- // only return size
1316
- std::vector<int8_t> buf;
1317
- lm_gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1318
- return buf.size();
1319
- }
1320
-
1321
- void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data) {
1322
- std::vector<int8_t> buf;
1323
- lm_gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1324
- memcpy(data, buf.data(), buf.size());
1325
- }
1
+ #include "ggml.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include "gguf.h"
5
+
6
+ #include <cinttypes>
7
+ #include <cstddef>
8
+ #include <cstdint>
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+ #include <cstring>
12
+ #include <map>
13
+ #include <new>
14
+ #include <stdexcept>
15
+ #include <string>
16
+ #include <vector>
17
+
18
+ template <typename T>
19
+ struct type_to_lm_gguf_type;
20
+
21
+ template <>
22
+ struct type_to_lm_gguf_type<uint8_t> {
23
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT8;
24
+ };
25
+
26
+ template <>
27
+ struct type_to_lm_gguf_type<int8_t> {
28
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT8;
29
+ };
30
+
31
+ template <>
32
+ struct type_to_lm_gguf_type<uint16_t> {
33
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT16;
34
+ };
35
+
36
+ template <>
37
+ struct type_to_lm_gguf_type<int16_t> {
38
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT16;
39
+ };
40
+
41
+ template <>
42
+ struct type_to_lm_gguf_type<uint32_t> {
43
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT32;
44
+ };
45
+
46
+ template <>
47
+ struct type_to_lm_gguf_type<int32_t> {
48
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT32;
49
+ };
50
+
51
+ template <>
52
+ struct type_to_lm_gguf_type<float> {
53
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_FLOAT32;
54
+ };
55
+
56
+ template <>
57
+ struct type_to_lm_gguf_type<bool> {
58
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_BOOL;
59
+ };
60
+
61
+ template <>
62
+ struct type_to_lm_gguf_type<std::string> {
63
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_STRING;
64
+ };
65
+
66
+ template <>
67
+ struct type_to_lm_gguf_type<uint64_t> {
68
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_UINT64;
69
+ };
70
+
71
+ template <>
72
+ struct type_to_lm_gguf_type<int64_t> {
73
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_INT64;
74
+ };
75
+
76
+ template <>
77
+ struct type_to_lm_gguf_type<double> {
78
+ static constexpr enum lm_gguf_type value = LM_GGUF_TYPE_FLOAT64;
79
+ };
80
+
81
+ static const std::map<lm_gguf_type, size_t> LM_GGUF_TYPE_SIZE = {
82
+ {LM_GGUF_TYPE_UINT8, sizeof(uint8_t)},
83
+ {LM_GGUF_TYPE_INT8, sizeof(int8_t)},
84
+ {LM_GGUF_TYPE_UINT16, sizeof(uint16_t)},
85
+ {LM_GGUF_TYPE_INT16, sizeof(int16_t)},
86
+ {LM_GGUF_TYPE_UINT32, sizeof(uint32_t)},
87
+ {LM_GGUF_TYPE_INT32, sizeof(int32_t)},
88
+ {LM_GGUF_TYPE_FLOAT32, sizeof(float)},
89
+ {LM_GGUF_TYPE_BOOL, sizeof(int8_t)},
90
+ {LM_GGUF_TYPE_STRING, 0}, // undefined
91
+ {LM_GGUF_TYPE_ARRAY, 0}, // undefined
92
+ {LM_GGUF_TYPE_UINT64, sizeof(uint64_t)},
93
+ {LM_GGUF_TYPE_INT64, sizeof(int64_t)},
94
+ {LM_GGUF_TYPE_FLOAT64, sizeof(double)},
95
+ };
96
+ static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
97
+
98
+ static const std::map<lm_gguf_type, const char *> LM_GGUF_TYPE_NAME = {
99
+ {LM_GGUF_TYPE_UINT8, "u8"},
100
+ {LM_GGUF_TYPE_INT8, "i8"},
101
+ {LM_GGUF_TYPE_UINT16, "u16"},
102
+ {LM_GGUF_TYPE_INT16, "i16"},
103
+ {LM_GGUF_TYPE_UINT32, "u32"},
104
+ {LM_GGUF_TYPE_INT32, "i32"},
105
+ {LM_GGUF_TYPE_FLOAT32, "f32"},
106
+ {LM_GGUF_TYPE_BOOL, "bool"},
107
+ {LM_GGUF_TYPE_STRING, "str"},
108
+ {LM_GGUF_TYPE_ARRAY, "arr"},
109
+ {LM_GGUF_TYPE_UINT64, "u64"},
110
+ {LM_GGUF_TYPE_INT64, "i64"},
111
+ {LM_GGUF_TYPE_FLOAT64, "f64"},
112
+ };
113
+ static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
114
+
115
+ size_t lm_gguf_type_size(enum lm_gguf_type type) {
116
+ auto it = LM_GGUF_TYPE_SIZE.find(type);
117
+ return it == LM_GGUF_TYPE_SIZE.end() ? 0 : it->second;
118
+ }
119
+
120
+ struct lm_gguf_kv {
121
+ std::string key;
122
+
123
+ bool is_array;
124
+ enum lm_gguf_type type;
125
+
126
+ std::vector<int8_t> data;
127
+ std::vector<std::string> data_string;
128
+
129
+ template <typename T>
130
+ lm_gguf_kv(const std::string & key, const T value)
131
+ : key(key), is_array(false), type(type_to_lm_gguf_type<T>::value) {
132
+ LM_GGML_ASSERT(!key.empty());
133
+ data.resize(sizeof(T));
134
+ memcpy(data.data(), &value, sizeof(T));
135
+ }
136
+
137
+ template <typename T>
138
+ lm_gguf_kv(const std::string & key, const std::vector<T> & value)
139
+ : key(key), is_array(true), type(type_to_lm_gguf_type<T>::value) {
140
+ LM_GGML_ASSERT(!key.empty());
141
+ data.resize(value.size()*sizeof(T));
142
+ for (size_t i = 0; i < value.size(); ++i) {
143
+ const T tmp = value[i];
144
+ memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
145
+ }
146
+ }
147
+
148
+ lm_gguf_kv(const std::string & key, const std::string & value)
149
+ : key(key), is_array(false), type(LM_GGUF_TYPE_STRING) {
150
+ LM_GGML_ASSERT(!key.empty());
151
+ data_string.push_back(value);
152
+ }
153
+
154
+ lm_gguf_kv(const std::string & key, const std::vector<std::string> & value)
155
+ : key(key), is_array(true), type(LM_GGUF_TYPE_STRING) {
156
+ LM_GGML_ASSERT(!key.empty());
157
+ data_string = value;
158
+ }
159
+
160
+ const std::string & get_key() const {
161
+ return key;
162
+ }
163
+
164
+ const enum lm_gguf_type & get_type() const {
165
+ return type;
166
+ }
167
+
168
+ size_t get_ne() const {
169
+ if (type == LM_GGUF_TYPE_STRING) {
170
+ const size_t ne = data_string.size();
171
+ LM_GGML_ASSERT(is_array || ne == 1);
172
+ return ne;
173
+ }
174
+ const size_t type_size = lm_gguf_type_size(type);
175
+ LM_GGML_ASSERT(data.size() % type_size == 0);
176
+ const size_t ne = data.size() / type_size;
177
+ LM_GGML_ASSERT(is_array || ne == 1);
178
+ return ne;
179
+ }
180
+
181
+ template <typename T>
182
+ const T & get_val(const size_t i = 0) const {
183
+ LM_GGML_ASSERT(type_to_lm_gguf_type<T>::value == type);
184
+ if constexpr (std::is_same<T, std::string>::value) {
185
+ LM_GGML_ASSERT(data_string.size() >= i+1);
186
+ return data_string[i];
187
+ }
188
+ const size_t type_size = lm_gguf_type_size(type);
189
+ LM_GGML_ASSERT(data.size() % type_size == 0);
190
+ LM_GGML_ASSERT(data.size() >= (i+1)*type_size);
191
+ return reinterpret_cast<const T *>(data.data())[i];
192
+ }
193
+
194
+ void cast(const enum lm_gguf_type new_type) {
195
+ const size_t new_type_size = lm_gguf_type_size(new_type);
196
+ LM_GGML_ASSERT(data.size() % new_type_size == 0);
197
+ type = new_type;
198
+ }
199
+ };
200
+
201
+ struct lm_gguf_tensor_info {
202
+ struct lm_ggml_tensor t; // for holding the equivalent info
203
+ uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
204
+ };
205
+
206
+ struct lm_gguf_context {
207
+ uint32_t version = LM_GGUF_VERSION;
208
+
209
+ std::vector<struct lm_gguf_kv> kv;
210
+ std::vector<struct lm_gguf_tensor_info> info;
211
+
212
+ size_t alignment = LM_GGUF_DEFAULT_ALIGNMENT;
213
+ size_t offset = 0; // offset of `data` from beginning of file
214
+ size_t size = 0; // size of `data` in bytes
215
+
216
+ void * data = nullptr;
217
+ };
218
+
219
+ struct lm_gguf_reader {
220
+ FILE * file;
221
+
222
+ lm_gguf_reader(FILE * file) : file(file) {}
223
+
224
+ template <typename T>
225
+ bool read(T & dst) const {
226
+ return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
227
+ }
228
+
229
+ template <typename T>
230
+ bool read(std::vector<T> & dst, const size_t n) const {
231
+ dst.resize(n);
232
+ for (size_t i = 0; i < dst.size(); ++i) {
233
+ if constexpr (std::is_same<T, bool>::value) {
234
+ bool tmp;
235
+ if (!read(tmp)) {
236
+ return false;
237
+ }
238
+ dst[i] = tmp;
239
+ } else {
240
+ if (!read(dst[i])) {
241
+ return false;
242
+ }
243
+ }
244
+ }
245
+ return true;
246
+ }
247
+
248
+ bool read(bool & dst) const {
249
+ int8_t tmp = -1;
250
+ if (!read(tmp)) {
251
+ return false;
252
+ }
253
+ dst = tmp != 0;
254
+ return true;
255
+ }
256
+
257
+ bool read(enum lm_ggml_type & dst) const {
258
+ int32_t tmp = -1;
259
+ if (!read(tmp)) {
260
+ return false;
261
+ }
262
+ dst = lm_ggml_type(tmp);
263
+ return true;
264
+ }
265
+
266
+ bool read(enum lm_gguf_type & dst) const {
267
+ int32_t tmp = -1;
268
+ if (!read(tmp)) {
269
+ return false;
270
+ }
271
+ dst = lm_gguf_type(tmp);
272
+ return true;
273
+ }
274
+
275
+ bool read(std::string & dst) const {
276
+ uint64_t size = -1;
277
+ if (!read(size)) {
278
+ return false;
279
+ }
280
+ dst.resize(size);
281
+ return fread(dst.data(), 1, dst.length(), file) == dst.length();
282
+ }
283
+
284
+ bool read(void * dst, const size_t size) const {
285
+ return fread(dst, 1, size, file) == size;
286
+ }
287
+ };
288
+
289
+ struct lm_gguf_context * lm_gguf_init_empty(void) {
290
+ return new lm_gguf_context;
291
+ }
292
+
293
+ template<typename T>
294
+ bool lm_gguf_read_emplace_helper(const struct lm_gguf_reader & gr, std::vector<struct lm_gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
295
+ if (is_array) {
296
+ std::vector<T> value;
297
+ try {
298
+ if (!gr.read(value, n)) {
299
+ return false;
300
+ }
301
+ } catch (std::length_error &) {
302
+ fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
303
+ return false;
304
+ } catch (std::bad_alloc &) {
305
+ fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
306
+ return false;
307
+ }
308
+ kv.emplace_back(key, value);
309
+ } else {
310
+ T value;
311
+ if (!gr.read(value)) {
312
+ return false;
313
+ }
314
+ kv.emplace_back(key, value);
315
+ }
316
+ return true;
317
+ }
318
+
319
+ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
320
+ const struct lm_gguf_reader gr(file);
321
+ struct lm_gguf_context * ctx = new lm_gguf_context;
322
+
323
+ bool ok = true;
324
+
325
+ // file magic
326
+ {
327
+ std::vector<char> magic;
328
+ ok = ok && gr.read(magic, 4);
329
+
330
+ if (!ok) {
331
+ fprintf(stderr, "%s: failed to read magic\n", __func__);
332
+ lm_gguf_free(ctx);
333
+ return nullptr;
334
+ }
335
+
336
+ for (uint32_t i = 0; i < magic.size(); i++) {
337
+ if (magic[i] != LM_GGUF_MAGIC[i]) {
338
+ fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
339
+ lm_gguf_free(ctx);
340
+ return nullptr;
341
+ }
342
+ }
343
+ }
344
+
345
+ // header
346
+ int64_t n_kv = 0;
347
+ int64_t n_tensors = 0;
348
+
349
+ if (ok && gr.read(ctx->version)) {
350
+ if (ctx->version == 1) {
351
+ fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352
+ ok = false;
353
+ }
354
+ if (ctx->version > LM_GGUF_VERSION) {
355
+ fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356
+ __func__, ctx->version, LM_GGUF_VERSION);
357
+ ok = false;
358
+ }
359
+ } else {
360
+ ok = false;
361
+ }
362
+
363
+ if (ok && gr.read(n_tensors)) {
364
+ static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
365
+ if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(lm_gguf_tensor_info))) {
366
+ fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
367
+ __func__, n_tensors, SIZE_MAX/sizeof(lm_gguf_tensor_info));
368
+ ok = false;
369
+ }
370
+ } else {
371
+ ok = false;
372
+ }
373
+
374
+ if (ok && gr.read(n_kv)) {
375
+ static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
376
+ if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(lm_gguf_kv))) {
377
+ fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
378
+ __func__, n_kv, SIZE_MAX/sizeof(lm_gguf_kv));
379
+ ok = false;
380
+ }
381
+ } else {
382
+ ok = false;
383
+ }
384
+
385
+ if (!ok) {
386
+ fprintf(stderr, "%s: failed to read header\n", __func__);
387
+ lm_gguf_free(ctx);
388
+ return nullptr;
389
+ }
390
+
391
+ // KV pairs
392
+ {
393
+ for (int64_t i = 0; ok && i < n_kv; ++i) {
394
+ std::string key;
395
+ lm_gguf_type type = lm_gguf_type(-1);
396
+ bool is_array = false;
397
+ uint64_t n = 1;
398
+
399
+ try {
400
+ ok = ok && gr.read(key);
401
+ } catch (std::length_error &) {
402
+ fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
403
+ ok = false;
404
+ } catch (std::bad_alloc &) {
405
+ fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
406
+ ok = false;
407
+ }
408
+ for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
409
+ if (key == ctx->kv[j].key) {
410
+ fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
411
+ ok = false;
412
+ }
413
+ }
414
+ if (!ok) {
415
+ break;
416
+ }
417
+
418
+ ok = ok && gr.read(type);
419
+ if (type == LM_GGUF_TYPE_ARRAY) {
420
+ is_array = true;
421
+ ok = ok && gr.read(type);
422
+ ok = ok && gr.read(n);
423
+ }
424
+ if (!ok) {
425
+ break;
426
+ }
427
+
428
+ switch (type) {
429
+ case LM_GGUF_TYPE_UINT8: ok = ok && lm_gguf_read_emplace_helper<uint8_t> (gr, ctx->kv, key, is_array, n); break;
430
+ case LM_GGUF_TYPE_INT8: ok = ok && lm_gguf_read_emplace_helper<int8_t> (gr, ctx->kv, key, is_array, n); break;
431
+ case LM_GGUF_TYPE_UINT16: ok = ok && lm_gguf_read_emplace_helper<uint16_t> (gr, ctx->kv, key, is_array, n); break;
432
+ case LM_GGUF_TYPE_INT16: ok = ok && lm_gguf_read_emplace_helper<int16_t> (gr, ctx->kv, key, is_array, n); break;
433
+ case LM_GGUF_TYPE_UINT32: ok = ok && lm_gguf_read_emplace_helper<uint32_t> (gr, ctx->kv, key, is_array, n); break;
434
+ case LM_GGUF_TYPE_INT32: ok = ok && lm_gguf_read_emplace_helper<int32_t> (gr, ctx->kv, key, is_array, n); break;
435
+ case LM_GGUF_TYPE_FLOAT32: ok = ok && lm_gguf_read_emplace_helper<float> (gr, ctx->kv, key, is_array, n); break;
436
+ case LM_GGUF_TYPE_BOOL: ok = ok && lm_gguf_read_emplace_helper<bool> (gr, ctx->kv, key, is_array, n); break;
437
+ case LM_GGUF_TYPE_STRING: ok = ok && lm_gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
438
+ case LM_GGUF_TYPE_UINT64: ok = ok && lm_gguf_read_emplace_helper<uint64_t> (gr, ctx->kv, key, is_array, n); break;
439
+ case LM_GGUF_TYPE_INT64: ok = ok && lm_gguf_read_emplace_helper<int64_t> (gr, ctx->kv, key, is_array, n); break;
440
+ case LM_GGUF_TYPE_FLOAT64: ok = ok && lm_gguf_read_emplace_helper<double> (gr, ctx->kv, key, is_array, n); break;
441
+ case LM_GGUF_TYPE_ARRAY:
442
+ default:
443
+ {
444
+ fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
445
+ ok = false;
446
+ } break;
447
+ }
448
+ }
449
+
450
+ if (!ok) {
451
+ fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
452
+ lm_gguf_free(ctx);
453
+ return nullptr;
454
+ }
455
+ LM_GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
456
+
457
+ const int alignment_idx = lm_gguf_find_key(ctx, LM_GGUF_KEY_GENERAL_ALIGNMENT);
458
+ ctx->alignment = alignment_idx == -1 ? LM_GGUF_DEFAULT_ALIGNMENT : lm_gguf_get_val_u32(ctx, alignment_idx);
459
+
460
+ if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
461
+ fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
462
+ lm_gguf_free(ctx);
463
+ return nullptr;
464
+ }
465
+ }
466
+
467
+ // read the tensor info
468
+ for (int64_t i = 0; ok && i < n_tensors; ++i) {
469
+ struct lm_gguf_tensor_info info;
470
+
471
+ // tensor name
472
+ {
473
+ std::string name;
474
+ try {
475
+ ok = ok && gr.read(name);
476
+ } catch (std::length_error &) {
477
+ fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
478
+ ok = false;
479
+ } catch (std::bad_alloc &) {
480
+ fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
481
+ ok = false;
482
+ }
483
+ if (name.length() >= LM_GGML_MAX_NAME) {
484
+ fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), LM_GGML_MAX_NAME);
485
+ ok = false;
486
+ break;
487
+ }
488
+ lm_ggml_set_name(&info.t, name.c_str());
489
+
490
+ // make sure there are no duplicate tensor names
491
+ for (int64_t j = 0; ok && j < i; ++j) {
492
+ if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
493
+ fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
494
+ ok = false;
495
+ break;
496
+ }
497
+ }
498
+ }
499
+ if (!ok) {
500
+ break;
501
+ }
502
+
503
+ // tensor shape
504
+ {
505
+ uint32_t n_dims = -1;
506
+ ok = ok && gr.read(n_dims);
507
+ if (n_dims > LM_GGML_MAX_DIMS) {
508
+ fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
509
+ __func__, info.t.name, n_dims, LM_GGML_MAX_DIMS);
510
+ ok = false;
511
+ break;
512
+ }
513
+ for (uint32_t j = 0; ok && j < LM_GGML_MAX_DIMS; ++j) {
514
+ info.t.ne[j] = 1;
515
+ if (j < n_dims) {
516
+ ok = ok && gr.read(info.t.ne[j]);
517
+ }
518
+
519
+ // check that all ne are non-negative
520
+ if (info.t.ne[j] < 0) {
521
+ fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
522
+ __func__, info.t.name, j, info.t.ne[j]);
523
+ ok = false;
524
+ break;
525
+ }
526
+ }
527
+
528
+ // check that the total number of elements is representable
529
+ if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
530
+ (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
531
+ (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
532
+
533
+ fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
534
+ "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
535
+ __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
536
+ ok = false;
537
+ break;
538
+ }
539
+ }
540
+ if (!ok) {
541
+ break;
542
+ }
543
+
544
+ // tensor type
545
+ {
546
+ ok = ok && gr.read(info.t.type);
547
+
548
+ // check that tensor type is within defined range
549
+ if (info.t.type < 0 || info.t.type >= LM_GGML_TYPE_COUNT) {
550
+ fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
551
+ __func__, info.t.name, info.t.type, lm_ggml_type_name(info.t.type));
552
+ ok = false;
553
+ break;
554
+ }
555
+ const size_t type_size = lm_ggml_type_size(info.t.type);
556
+ const int64_t blck_size = lm_ggml_blck_size(info.t.type);
557
+
558
+ // check that row size is divisible by block size
559
+ if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
560
+ fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
561
+ "not a multiple of block size (%" PRId64 ")\n",
562
+ __func__, info.t.name, (int) info.t.type, lm_ggml_type_name(info.t.type), info.t.ne[0], blck_size);
563
+ ok = false;
564
+ break;
565
+ }
566
+
567
+ // calculate byte offsets given the tensor shape and type
568
+ info.t.nb[0] = type_size;
569
+ info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
570
+ for (int j = 2; j < LM_GGML_MAX_DIMS; ++j) {
571
+ info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
572
+ }
573
+ }
574
+ if (!ok) {
575
+ break;
576
+ }
577
+
578
+ // tensor data offset within buffer
579
+ ok = ok && gr.read(info.offset);
580
+
581
+ ctx->info.push_back(info);
582
+ }
583
+
584
+ if (!ok) {
585
+ fprintf(stderr, "%s: failed to read tensor info\n", __func__);
586
+ lm_gguf_free(ctx);
587
+ return nullptr;
588
+ }
589
+ LM_GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
590
+
591
+ // we require the data section to be aligned, so take into account any padding
592
+ if (fseek(file, LM_GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
593
+ fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
594
+ lm_gguf_free(ctx);
595
+ return nullptr;
596
+ }
597
+
598
+ // store the current file offset - this is where the data section starts
599
+ ctx->offset = ftell(file);
600
+
601
+ // compute the total size of the data section, taking into account the alignment
602
+ {
603
+ ctx->size = 0;
604
+ for (size_t i = 0; i < ctx->info.size(); ++i) {
605
+ const lm_gguf_tensor_info & ti = ctx->info[i];
606
+ if (ti.offset != ctx->size) {
607
+ fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
608
+ __func__, ti.t.name, ti.offset, ctx->size);
609
+ fprintf(stderr, "%s: failed to read tensor data\n", __func__);
610
+ lm_gguf_free(ctx);
611
+ return nullptr;
612
+ }
613
+ ctx->size += LM_GGML_PAD(lm_ggml_nbytes(&ti.t), ctx->alignment);
614
+ }
615
+ }
616
+
617
+ // load the tensor data only if requested
618
+ if (params.ctx != nullptr) {
619
+ // if the provided lm_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
620
+ // otherwise, we load the binary blob into the created lm_ggml_context as well, and point the "data" members of
621
+ // the lm_ggml_tensor structs to the appropriate locations in the binary blob
622
+
623
+ // compute the exact size needed for the new lm_ggml_context
624
+ const size_t mem_size =
625
+ params.no_alloc ?
626
+ (n_tensors )*lm_ggml_tensor_overhead() :
627
+ (n_tensors + 1)*lm_ggml_tensor_overhead() + ctx->size;
628
+
629
+ struct lm_ggml_init_params pdata = {
630
+ /*mem_size =*/ mem_size,
631
+ /*mem_buffer =*/ nullptr,
632
+ /*no_alloc =*/ params.no_alloc,
633
+ };
634
+
635
+ *params.ctx = lm_ggml_init(pdata);
636
+ if (*params.ctx == nullptr) {
637
+ fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
638
+ lm_gguf_free(ctx);
639
+ return nullptr;
640
+ }
641
+
642
+ struct lm_ggml_context * ctx_data = *params.ctx;
643
+
644
+ struct lm_ggml_tensor * data = nullptr;
645
+
646
+ if (!params.no_alloc) {
647
+ data = lm_ggml_new_tensor_1d(ctx_data, LM_GGML_TYPE_I8, ctx->size);
648
+
649
+ ok = ok && data != nullptr;
650
+
651
+ if (ok) {
652
+ lm_ggml_set_name(data, "GGUF tensor data binary blob");
653
+ }
654
+
655
+ // read the binary blob with the tensor data
656
+ ok = ok && gr.read(data->data, ctx->size);
657
+
658
+ if (!ok) {
659
+ fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
660
+ lm_ggml_free(ctx_data);
661
+ *params.ctx = nullptr;
662
+ lm_gguf_free(ctx);
663
+ return nullptr;
664
+ }
665
+
666
+ ctx->data = data->data;
667
+ }
668
+
669
+ lm_ggml_set_no_alloc(ctx_data, true);
670
+
671
+ // create the tensors
672
+ for (size_t i = 0; i < ctx->info.size(); ++i) {
673
+ const struct lm_gguf_tensor_info & info = ctx->info[i];
674
+
675
+ struct lm_ggml_tensor * cur = lm_ggml_new_tensor(ctx_data, info.t.type, LM_GGML_MAX_DIMS, info.t.ne);
676
+
677
+ ok = ok && cur != nullptr;
678
+
679
+ if (!ok) {
680
+ break;
681
+ }
682
+
683
+ lm_ggml_set_name(cur, info.t.name);
684
+
685
+ // point the data member to the appropriate location in the binary blob using the tensor info
686
+ if (!params.no_alloc) {
687
+ cur->data = (char *) data->data + info.offset;
688
+ }
689
+ }
690
+
691
+ if (!ok) {
692
+ fprintf(stderr, "%s: failed to create tensors\n", __func__);
693
+ lm_ggml_free(ctx_data);
694
+ *params.ctx = nullptr;
695
+ lm_gguf_free(ctx);
696
+ return nullptr;
697
+ }
698
+
699
+ lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
700
+ }
701
+
702
+ return ctx;
703
+ }
704
+
705
+ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
706
+ FILE * file = lm_ggml_fopen(fname, "rb");
707
+
708
+ if (!file) {
709
+ fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
710
+ return nullptr;
711
+ }
712
+
713
+ struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
714
+ fclose(file);
715
+ return result;
716
+ }
717
+
718
+ void lm_gguf_free(struct lm_gguf_context * ctx) {
719
+ if (ctx == nullptr) {
720
+ return;
721
+ }
722
+ delete ctx;
723
+ }
724
+
725
+ const char * lm_gguf_type_name(enum lm_gguf_type type) {
726
+ auto it = LM_GGUF_TYPE_NAME.find(type);
727
+ return it == LM_GGUF_TYPE_NAME.end() ? nullptr : it->second;
728
+ }
729
+
730
+ uint32_t lm_gguf_get_version(const struct lm_gguf_context * ctx) {
731
+ return ctx->version;
732
+ }
733
+
734
+ size_t lm_gguf_get_alignment(const struct lm_gguf_context * ctx) {
735
+ return ctx->alignment;
736
+ }
737
+
738
+ size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx) {
739
+ return ctx->offset;
740
+ }
741
+
742
+ int64_t lm_gguf_get_n_kv(const struct lm_gguf_context * ctx) {
743
+ return ctx->kv.size();
744
+ }
745
+
746
+ int64_t lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key) {
747
+ // return -1 if key not found
748
+ int64_t keyfound = -1;
749
+
750
+ const int64_t n_kv = lm_gguf_get_n_kv(ctx);
751
+
752
+ for (int64_t i = 0; i < n_kv; ++i) {
753
+ if (strcmp(key, lm_gguf_get_key(ctx, i)) == 0) {
754
+ keyfound = i;
755
+ break;
756
+ }
757
+ }
758
+
759
+ return keyfound;
760
+ }
761
+
762
+ const char * lm_gguf_get_key(const struct lm_gguf_context * ctx, int64_t key_id) {
763
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
764
+ return ctx->kv[key_id].get_key().c_str();
765
+ }
766
+
767
+ enum lm_gguf_type lm_gguf_get_kv_type(const struct lm_gguf_context * ctx, int64_t key_id) {
768
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
769
+ return ctx->kv[key_id].is_array ? LM_GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
770
+ }
771
+
772
+ enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int64_t key_id) {
773
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
774
+ LM_GGML_ASSERT(ctx->kv[key_id].is_array);
775
+ return ctx->kv[key_id].get_type();
776
+ }
777
+
778
+ const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int64_t key_id) {
779
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
780
+ LM_GGML_ASSERT(ctx->kv[key_id].get_type() != LM_GGUF_TYPE_STRING);
781
+ return ctx->kv[key_id].data.data();
782
+ }
783
+
784
+ const char * lm_gguf_get_arr_str(const struct lm_gguf_context * ctx, int64_t key_id, size_t i) {
785
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
786
+ LM_GGML_ASSERT(ctx->kv[key_id].get_type() == LM_GGUF_TYPE_STRING);
787
+ return ctx->kv[key_id].data_string[i].c_str();
788
+ }
789
+
790
+ size_t lm_gguf_get_arr_n(const struct lm_gguf_context * ctx, int64_t key_id) {
791
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
792
+
793
+ if (ctx->kv[key_id].type == LM_GGUF_TYPE_STRING) {
794
+ return ctx->kv[key_id].data_string.size();
795
+ }
796
+
797
+ const size_t type_size = lm_gguf_type_size(ctx->kv[key_id].type);
798
+ LM_GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
799
+ return ctx->kv[key_id].data.size() / type_size;
800
+ }
801
+
802
+ uint8_t lm_gguf_get_val_u8(const struct lm_gguf_context * ctx, int64_t key_id) {
803
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
804
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
805
+ return ctx->kv[key_id].get_val<uint8_t>();
806
+ }
807
+
808
+ int8_t lm_gguf_get_val_i8(const struct lm_gguf_context * ctx, int64_t key_id) {
809
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
810
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
811
+ return ctx->kv[key_id].get_val<int8_t>();
812
+ }
813
+
814
+ uint16_t lm_gguf_get_val_u16(const struct lm_gguf_context * ctx, int64_t key_id) {
815
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
816
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
817
+ return ctx->kv[key_id].get_val<uint16_t>();
818
+ }
819
+
820
+ int16_t lm_gguf_get_val_i16(const struct lm_gguf_context * ctx, int64_t key_id) {
821
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
822
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
823
+ return ctx->kv[key_id].get_val<int16_t>();
824
+ }
825
+
826
+ uint32_t lm_gguf_get_val_u32(const struct lm_gguf_context * ctx, int64_t key_id) {
827
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
828
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
829
+ return ctx->kv[key_id].get_val<uint32_t>();
830
+ }
831
+
832
+ int32_t lm_gguf_get_val_i32(const struct lm_gguf_context * ctx, int64_t key_id) {
833
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
834
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
835
+ return ctx->kv[key_id].get_val<int32_t>();
836
+ }
837
+
838
+ float lm_gguf_get_val_f32(const struct lm_gguf_context * ctx, int64_t key_id) {
839
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
840
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
841
+ return ctx->kv[key_id].get_val<float>();
842
+ }
843
+
844
+ uint64_t lm_gguf_get_val_u64(const struct lm_gguf_context * ctx, int64_t key_id) {
845
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
846
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
847
+ return ctx->kv[key_id].get_val<uint64_t>();
848
+ }
849
+
850
+ int64_t lm_gguf_get_val_i64(const struct lm_gguf_context * ctx, int64_t key_id) {
851
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
852
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
853
+ return ctx->kv[key_id].get_val<int64_t>();
854
+ }
855
+
856
+ double lm_gguf_get_val_f64(const struct lm_gguf_context * ctx, int64_t key_id) {
857
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
858
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
859
+ return ctx->kv[key_id].get_val<double>();
860
+ }
861
+
862
+ bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int64_t key_id) {
863
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
864
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
865
+ return ctx->kv[key_id].get_val<bool>();
866
+ }
867
+
868
+ const char * lm_gguf_get_val_str(const struct lm_gguf_context * ctx, int64_t key_id) {
869
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
870
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
871
+ return ctx->kv[key_id].get_val<std::string>().c_str();
872
+ }
873
+
874
+ const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int64_t key_id) {
875
+ LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
876
+ LM_GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
877
+ LM_GGML_ASSERT(ctx->kv[key_id].get_type() != LM_GGUF_TYPE_STRING);
878
+ return ctx->kv[key_id].data.data();
879
+ }
880
+
881
+ int64_t lm_gguf_get_n_tensors(const struct lm_gguf_context * ctx) {
882
+ return ctx->info.size();
883
+ }
884
+
885
+ int64_t lm_gguf_find_tensor(const struct lm_gguf_context * ctx, const char * name) {
886
+ // return -1 if tensor not found
887
+ int64_t tensor_id = -1;
888
+
889
+ const int64_t n_tensors = lm_gguf_get_n_tensors(ctx);
890
+
891
+ for (int64_t i = 0; i < n_tensors; ++i) {
892
+ if (strcmp(name, lm_gguf_get_tensor_name(ctx, i)) == 0) {
893
+ tensor_id = i;
894
+ break;
895
+ }
896
+ }
897
+
898
+ return tensor_id;
899
+ }
900
+
901
+ size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int64_t tensor_id) {
902
+ LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
903
+ return ctx->info[tensor_id].offset;
904
+ }
905
+
906
+ const char * lm_gguf_get_tensor_name(const struct lm_gguf_context * ctx, int64_t tensor_id) {
907
+ LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
908
+ return ctx->info[tensor_id].t.name;
909
+ }
910
+
911
+ enum lm_ggml_type lm_gguf_get_tensor_type(const struct lm_gguf_context * ctx, int64_t tensor_id) {
912
+ LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
913
+ return ctx->info[tensor_id].t.type;
914
+ }
915
+
916
+ size_t lm_gguf_get_tensor_size(const struct lm_gguf_context * ctx, int64_t tensor_id) {
917
+ LM_GGML_ASSERT(tensor_id >= 0 && tensor_id < lm_gguf_get_n_tensors(ctx));
918
+ return lm_ggml_nbytes(&ctx->info[tensor_id].t);
919
+ }
920
+
921
+ int64_t lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key) {
922
+ const int64_t key_id = lm_gguf_find_key(ctx, key);
923
+ if (key_id >= 0) {
924
+ ctx->kv.erase(ctx->kv.begin() + key_id);
925
+ }
926
+ return key_id;
927
+ }
928
+
929
+ template<typename T>
930
+ static void lm_gguf_check_reserved_keys(const std::string & key, const T val) {
931
+ if (key == LM_GGUF_KEY_GENERAL_ALIGNMENT) {
932
+ if constexpr (std::is_same<T, uint32_t>::value) {
933
+ LM_GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && LM_GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
934
+ } else {
935
+ LM_GGML_ABORT(LM_GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
936
+ }
937
+ }
938
+ }
939
+
940
+ void lm_gguf_set_val_u8(struct lm_gguf_context * ctx, const char * key, uint8_t val) {
941
+ lm_gguf_check_reserved_keys(key, val);
942
+ lm_gguf_remove_key(ctx, key);
943
+ ctx->kv.emplace_back(key, val);
944
+ }
945
+
946
+ void lm_gguf_set_val_i8(struct lm_gguf_context * ctx, const char * key, int8_t val) {
947
+ lm_gguf_check_reserved_keys(key, val);
948
+ lm_gguf_remove_key(ctx, key);
949
+ ctx->kv.emplace_back(key, val);
950
+ }
951
+
952
+ void lm_gguf_set_val_u16(struct lm_gguf_context * ctx, const char * key, uint16_t val) {
953
+ lm_gguf_check_reserved_keys(key, val);
954
+ lm_gguf_remove_key(ctx, key);
955
+ ctx->kv.emplace_back(key, val);
956
+ }
957
+
958
+ void lm_gguf_set_val_i16(struct lm_gguf_context * ctx, const char * key, int16_t val) {
959
+ lm_gguf_check_reserved_keys(key, val);
960
+ lm_gguf_remove_key(ctx, key);
961
+ ctx->kv.emplace_back(key, val);
962
+ }
963
+
964
+ void lm_gguf_set_val_u32(struct lm_gguf_context * ctx, const char * key, uint32_t val) {
965
+ lm_gguf_check_reserved_keys(key, val);
966
+ lm_gguf_remove_key(ctx, key);
967
+ ctx->kv.emplace_back(key, val);
968
+ }
969
+
970
+ void lm_gguf_set_val_i32(struct lm_gguf_context * ctx, const char * key, int32_t val) {
971
+ lm_gguf_check_reserved_keys(key, val);
972
+ lm_gguf_remove_key(ctx, key);
973
+ ctx->kv.emplace_back(key, val);
974
+ }
975
+
976
+ void lm_gguf_set_val_f32(struct lm_gguf_context * ctx, const char * key, float val) {
977
+ lm_gguf_check_reserved_keys(key, val);
978
+ lm_gguf_remove_key(ctx, key);
979
+ ctx->kv.emplace_back(key, val);
980
+ }
981
+
982
+ void lm_gguf_set_val_u64(struct lm_gguf_context * ctx, const char * key, uint64_t val) {
983
+ lm_gguf_check_reserved_keys(key, val);
984
+ lm_gguf_remove_key(ctx, key);
985
+ ctx->kv.emplace_back(key, val);
986
+ }
987
+
988
+ void lm_gguf_set_val_i64(struct lm_gguf_context * ctx, const char * key, int64_t val) {
989
+ lm_gguf_check_reserved_keys(key, val);
990
+ lm_gguf_remove_key(ctx, key);
991
+ ctx->kv.emplace_back(key, val);
992
+ }
993
+
994
+ void lm_gguf_set_val_f64(struct lm_gguf_context * ctx, const char * key, double val) {
995
+ lm_gguf_check_reserved_keys(key, val);
996
+ lm_gguf_remove_key(ctx, key);
997
+ ctx->kv.emplace_back(key, val);
998
+ }
999
+
1000
+ void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val) {
1001
+ lm_gguf_check_reserved_keys(key, val);
1002
+ lm_gguf_remove_key(ctx, key);
1003
+ ctx->kv.emplace_back(key, val);
1004
+ }
1005
+
1006
+ void lm_gguf_set_val_str(struct lm_gguf_context * ctx, const char * key, const char * val) {
1007
+ lm_gguf_check_reserved_keys(key, val);
1008
+ lm_gguf_remove_key(ctx, key);
1009
+ ctx->kv.emplace_back(key, std::string(val));
1010
+ }
1011
+
1012
+ void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, size_t n) {
1013
+ lm_gguf_check_reserved_keys(key, data);
1014
+ lm_gguf_remove_key(ctx, key);
1015
+
1016
+ const size_t nbytes = n*lm_gguf_type_size(type);
1017
+ std::vector<int8_t> tmp(nbytes);
1018
+ if (!tmp.empty()) {
1019
+ memcpy(tmp.data(), data, nbytes);
1020
+ }
1021
+ ctx->kv.emplace_back(key, tmp);
1022
+ ctx->kv.back().cast(type);
1023
+ }
1024
+
1025
+ void lm_gguf_set_arr_str(struct lm_gguf_context * ctx, const char * key, const char ** data, size_t n) {
1026
+ lm_gguf_check_reserved_keys(key, data);
1027
+ lm_gguf_remove_key(ctx, key);
1028
+
1029
+ std::vector<std::string> tmp(n);
1030
+ for (size_t i = 0; i < n; ++i) {
1031
+ tmp[i] = data[i];
1032
+ }
1033
+ ctx->kv.emplace_back(key, tmp);
1034
+ }
1035
+
1036
+ // set or add KV pairs from another context
1037
+ void lm_gguf_set_kv(struct lm_gguf_context * ctx, const struct lm_gguf_context * src) {
1038
+ const int64_t n_kv = lm_gguf_get_n_kv(src);
1039
+ for (int64_t i = 0; i < n_kv; ++i) {
1040
+ const struct lm_gguf_kv & kv = src->kv[i];
1041
+
1042
+ if (!kv.is_array) {
1043
+ switch (kv.get_type()) {
1044
+ case LM_GGUF_TYPE_UINT8: lm_gguf_set_val_u8 (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>()); break;
1045
+ case LM_GGUF_TYPE_INT8: lm_gguf_set_val_i8 (ctx, kv.get_key().c_str(), kv.get_val<int8_t>()); break;
1046
+ case LM_GGUF_TYPE_UINT16: lm_gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>()); break;
1047
+ case LM_GGUF_TYPE_INT16: lm_gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>()); break;
1048
+ case LM_GGUF_TYPE_UINT32: lm_gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>()); break;
1049
+ case LM_GGUF_TYPE_INT32: lm_gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>()); break;
1050
+ case LM_GGUF_TYPE_FLOAT32: lm_gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>()); break;
1051
+ case LM_GGUF_TYPE_UINT64: lm_gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>()); break;
1052
+ case LM_GGUF_TYPE_INT64: lm_gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>()); break;
1053
+ case LM_GGUF_TYPE_FLOAT64: lm_gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>()); break;
1054
+ case LM_GGUF_TYPE_BOOL: lm_gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>()); break;
1055
+ case LM_GGUF_TYPE_STRING: lm_gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
1056
+ case LM_GGUF_TYPE_ARRAY:
1057
+ default: LM_GGML_ABORT("invalid type");
1058
+ }
1059
+ continue;
1060
+ }
1061
+
1062
+ const size_t ne = kv.get_ne();
1063
+
1064
+ switch (kv.get_type()) {
1065
+ case LM_GGUF_TYPE_UINT8:
1066
+ case LM_GGUF_TYPE_INT8:
1067
+ case LM_GGUF_TYPE_UINT16:
1068
+ case LM_GGUF_TYPE_INT16:
1069
+ case LM_GGUF_TYPE_UINT32:
1070
+ case LM_GGUF_TYPE_INT32:
1071
+ case LM_GGUF_TYPE_FLOAT32:
1072
+ case LM_GGUF_TYPE_UINT64:
1073
+ case LM_GGUF_TYPE_INT64:
1074
+ case LM_GGUF_TYPE_FLOAT64:
1075
+ case LM_GGUF_TYPE_BOOL: {
1076
+ lm_gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
1077
+ } break;
1078
+ case LM_GGUF_TYPE_STRING: {
1079
+ std::vector<const char *> tmp(ne);
1080
+ for (size_t j = 0; j < ne; ++j) {
1081
+ tmp[j] = kv.data_string[j].c_str();
1082
+ }
1083
+ lm_gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
1084
+ } break;
1085
+ case LM_GGUF_TYPE_ARRAY:
1086
+ default: LM_GGML_ABORT("invalid type");
1087
+ }
1088
+ }
1089
+ }
1090
+
1091
+ void lm_gguf_add_tensor(
1092
+ struct lm_gguf_context * ctx,
1093
+ const struct lm_ggml_tensor * tensor) {
1094
+ LM_GGML_ASSERT(tensor);
1095
+ if (lm_gguf_find_tensor(ctx, tensor->name) != -1) {
1096
+ LM_GGML_ABORT("duplicate tensor name: %s", tensor->name);
1097
+ }
1098
+
1099
+ struct lm_gguf_tensor_info ti;
1100
+ ti.t = *tensor;
1101
+ ti.offset = ctx->info.empty() ? 0 :
1102
+ ctx->info.back().offset + LM_GGML_PAD(lm_ggml_nbytes(&ctx->info.back().t), ctx->alignment);
1103
+ ctx->info.push_back(ti);
1104
+ }
1105
+
1106
+ void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type) {
1107
+ const int64_t tensor_id = lm_gguf_find_tensor(ctx, name);
1108
+ if (tensor_id < 0) {
1109
+ LM_GGML_ABORT("tensor not found: %s", name);
1110
+ }
1111
+ struct lm_ggml_tensor * tensor = &ctx->info[tensor_id].t;
1112
+ const size_t type_size = lm_ggml_type_size(type);
1113
+ const int64_t blck_size = lm_ggml_blck_size(type);
1114
+
1115
+ tensor->type = type;
1116
+ LM_GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
1117
+
1118
+ tensor->nb[0] = type_size;
1119
+ tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
1120
+ for (int i = 2; i < LM_GGML_MAX_DIMS; i++) {
1121
+ tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
1122
+ }
1123
+
1124
+ // update offsets
1125
+ const int64_t n_tensors = lm_gguf_get_n_tensors(ctx);
1126
+ for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
1127
+ ctx->info[i].offset = ctx->info[i - 1].offset + LM_GGML_PAD(lm_ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
1128
+ }
1129
+ }
1130
+
1131
+ void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data) {
1132
+ const int64_t tensor_id = lm_gguf_find_tensor(ctx, name);
1133
+ if (tensor_id < 0) {
1134
+ LM_GGML_ABORT("tensor not found: %s", name);
1135
+ }
1136
+
1137
+ ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
1138
+ }
1139
+
1140
+ struct lm_gguf_writer {
1141
+ std::vector<int8_t> & buf;
1142
+
1143
+ lm_gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
1144
+
1145
+ template <typename T>
1146
+ void write(const T & val) const {
1147
+ for (size_t i = 0; i < sizeof(val); ++i) {
1148
+ buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
1149
+ }
1150
+ }
1151
+
1152
+ void write(const std::vector<int8_t> & val) const {
1153
+ buf.insert(buf.end(), val.begin(), val.end());
1154
+ }
1155
+
1156
+ void write(const bool & val) const {
1157
+ const int8_t val8 = val ? 1 : 0;
1158
+ write(val8);
1159
+ }
1160
+
1161
+ void write(const std::string & val) const {
1162
+ {
1163
+ const uint64_t n = val.length();
1164
+ write(n);
1165
+ }
1166
+ for (size_t i = 0; i < val.length(); ++i) {
1167
+ buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]);
1168
+ }
1169
+ }
1170
+
1171
+ void write(const char * val) const {
1172
+ write(std::string(val));
1173
+ }
1174
+
1175
+ void write(const enum lm_ggml_type & val) const {
1176
+ write(int32_t(val));
1177
+ }
1178
+
1179
+ void write(const enum lm_gguf_type & val) const {
1180
+ write(int32_t(val));
1181
+ }
1182
+
1183
+ void write(const struct lm_gguf_kv & kv) const {
1184
+ const uint64_t ne = kv.get_ne();
1185
+
1186
+ write(kv.get_key());
1187
+
1188
+ if (kv.is_array) {
1189
+ write(LM_GGUF_TYPE_ARRAY);
1190
+ write(kv.get_type());
1191
+ write(ne);
1192
+ } else {
1193
+ write(kv.get_type());
1194
+ }
1195
+
1196
+ switch (kv.get_type()) {
1197
+ case LM_GGUF_TYPE_UINT8:
1198
+ case LM_GGUF_TYPE_INT8:
1199
+ case LM_GGUF_TYPE_UINT16:
1200
+ case LM_GGUF_TYPE_INT16:
1201
+ case LM_GGUF_TYPE_UINT32:
1202
+ case LM_GGUF_TYPE_INT32:
1203
+ case LM_GGUF_TYPE_FLOAT32:
1204
+ case LM_GGUF_TYPE_UINT64:
1205
+ case LM_GGUF_TYPE_INT64:
1206
+ case LM_GGUF_TYPE_FLOAT64: {
1207
+ write(kv.data);
1208
+ } break;
1209
+ case LM_GGUF_TYPE_BOOL: {
1210
+ for (size_t i = 0; i < ne; ++i) {
1211
+ write(kv.get_val<bool>(i));
1212
+ }
1213
+ } break;
1214
+ case LM_GGUF_TYPE_STRING: {
1215
+ for (size_t i = 0; i < ne; ++i) {
1216
+ write(kv.get_val<std::string>(i));
1217
+ }
1218
+ } break;
1219
+ case LM_GGUF_TYPE_ARRAY:
1220
+ default: LM_GGML_ABORT("invalid type");
1221
+ }
1222
+ }
1223
+
1224
+ void write_tensor_meta(const struct lm_gguf_tensor_info & info) const {
1225
+ write(info.t.name);
1226
+
1227
+ const uint32_t n_dims = lm_ggml_n_dims(&info.t);
1228
+ write(n_dims);
1229
+
1230
+ for (uint32_t j = 0; j < n_dims; ++j) {
1231
+ write(info.t.ne[j]);
1232
+ }
1233
+ write(info.t.type);
1234
+ write(info.offset);
1235
+ }
1236
+
1237
+ void pad(const size_t alignment) const {
1238
+ while (buf.size() % alignment != 0) {
1239
+ const int8_t zero = 0;
1240
+ write(zero);
1241
+ }
1242
+ }
1243
+
1244
+ void write_tensor_data(const struct lm_gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
1245
+ LM_GGML_ASSERT(buf.size() - offset_data == info.offset);
1246
+
1247
+ LM_GGML_ASSERT(lm_ggml_is_contiguous(&info.t));
1248
+ const size_t offset = buf.size();
1249
+ const size_t nbytes = lm_ggml_nbytes(&info.t);
1250
+
1251
+ buf.resize(offset + nbytes);
1252
+ if (info.t.buffer) {
1253
+ lm_ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
1254
+ } else {
1255
+ LM_GGML_ASSERT(info.t.data);
1256
+ memcpy(buf.data() + offset, info.t.data, nbytes);
1257
+ }
1258
+
1259
+ pad(alignment);
1260
+ }
1261
+ };
1262
+
1263
+ void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
1264
+ const struct lm_gguf_writer gw(buf);
1265
+
1266
+ const int64_t n_kv = lm_gguf_get_n_kv(ctx);
1267
+ const int64_t n_tensors = lm_gguf_get_n_tensors(ctx);
1268
+
1269
+ // write header
1270
+ gw.write(LM_GGUF_MAGIC[0]);
1271
+ gw.write(LM_GGUF_MAGIC[1]);
1272
+ gw.write(LM_GGUF_MAGIC[2]);
1273
+ gw.write(LM_GGUF_MAGIC[3]);
1274
+ gw.write(ctx->version);
1275
+ gw.write(n_tensors);
1276
+ gw.write(n_kv);
1277
+
1278
+ // write key-value pairs
1279
+ for (int64_t i = 0; i < n_kv; ++i) {
1280
+ gw.write(ctx->kv[i]);
1281
+ }
1282
+
1283
+ // write tensor info
1284
+ for (int64_t i = 0; i < n_tensors; ++i) {
1285
+ gw.write_tensor_meta(ctx->info[i]);
1286
+ }
1287
+
1288
+ // we require the data section to be aligned
1289
+ gw.pad(ctx->alignment);
1290
+
1291
+ if (only_meta) {
1292
+ return;
1293
+ }
1294
+
1295
+ const size_t offset_data = gw.buf.size();
1296
+
1297
+ // write tensor data
1298
+ for (int64_t i = 0; i < n_tensors; ++i) {
1299
+ gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
1300
+ }
1301
+ }
1302
+
1303
+ bool lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta) {
1304
+ FILE * file = lm_ggml_fopen(fname, "wb");
1305
+
1306
+ if (!file) {
1307
+ fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1308
+ return false;
1309
+ }
1310
+
1311
+ std::vector<int8_t> buf;
1312
+ lm_gguf_write_to_buf(ctx, buf, only_meta);
1313
+ const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size();
1314
+ fclose(file);
1315
+ return ok;
1316
+ }
1317
+
1318
+ size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx) {
1319
+ // only return size
1320
+ std::vector<int8_t> buf;
1321
+ lm_gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1322
+ return buf.size();
1323
+ }
1324
+
1325
+ void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data) {
1326
+ std::vector<int8_t> buf;
1327
+ lm_gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1328
+ memcpy(data, buf.data(), buf.size());
1329
+ }