cui-llama.rn 1.3.5 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/android/src/main/CMakeLists.txt +14 -8
  2. package/android/src/main/jni.cpp +38 -37
  3. package/cpp/common.cpp +43 -26
  4. package/cpp/common.h +18 -11
  5. package/cpp/ggml-backend-reg.cpp +5 -0
  6. package/cpp/ggml-backend.cpp +5 -2
  7. package/cpp/ggml-cpp.h +1 -0
  8. package/cpp/ggml-cpu-aarch64.cpp +6 -1
  9. package/cpp/ggml-cpu-quants.c +5 -1
  10. package/cpp/ggml-impl.h +11 -16
  11. package/cpp/ggml-metal.m +2 -2
  12. package/cpp/ggml.c +0 -1276
  13. package/cpp/ggml.h +0 -140
  14. package/cpp/gguf.cpp +1325 -0
  15. package/cpp/gguf.h +202 -0
  16. package/cpp/llama-adapter.cpp +346 -0
  17. package/cpp/llama-adapter.h +73 -0
  18. package/cpp/llama-arch.cpp +1434 -0
  19. package/cpp/llama-arch.h +395 -0
  20. package/cpp/llama-batch.cpp +368 -0
  21. package/cpp/llama-batch.h +88 -0
  22. package/cpp/llama-chat.cpp +567 -0
  23. package/cpp/llama-chat.h +51 -0
  24. package/cpp/llama-context.cpp +1771 -0
  25. package/cpp/llama-context.h +128 -0
  26. package/cpp/llama-cparams.cpp +1 -0
  27. package/cpp/llama-cparams.h +37 -0
  28. package/cpp/llama-cpp.h +30 -0
  29. package/cpp/llama-grammar.cpp +1 -0
  30. package/cpp/llama-grammar.h +3 -1
  31. package/cpp/llama-hparams.cpp +71 -0
  32. package/cpp/llama-hparams.h +140 -0
  33. package/cpp/llama-impl.cpp +167 -0
  34. package/cpp/llama-impl.h +16 -136
  35. package/cpp/llama-kv-cache.cpp +718 -0
  36. package/cpp/llama-kv-cache.h +218 -0
  37. package/cpp/llama-mmap.cpp +589 -0
  38. package/cpp/llama-mmap.h +67 -0
  39. package/cpp/llama-model-loader.cpp +1011 -0
  40. package/cpp/llama-model-loader.h +158 -0
  41. package/cpp/llama-model.cpp +2202 -0
  42. package/cpp/llama-model.h +391 -0
  43. package/cpp/llama-sampling.cpp +117 -4
  44. package/cpp/llama-vocab.cpp +21 -28
  45. package/cpp/llama-vocab.h +13 -1
  46. package/cpp/llama.cpp +8437 -19421
  47. package/cpp/llama.cpp.rej +23 -0
  48. package/cpp/llama.h +31 -6
  49. package/cpp/rn-llama.hpp +39 -37
  50. package/cpp/sgemm.cpp +776 -70
  51. package/cpp/unicode.cpp +6 -0
  52. package/package.json +1 -1
package/cpp/ggml.c CHANGED
@@ -1601,15 +1601,8 @@ static struct lm_ggml_tensor * lm_ggml_new_tensor_impl(
1601
1601
 
1602
1602
  struct lm_ggml_tensor * const result = (struct lm_ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1603
1603
 
1604
- #ifdef __clang__
1605
- // temporary until lm_ggml_tensor::backend is removed
1606
- #pragma clang diagnostic push
1607
- #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1608
- #endif
1609
-
1610
1604
  *result = (struct lm_ggml_tensor) {
1611
1605
  /*.type =*/ type,
1612
- /*.backend =*/ LM_GGML_BACKEND_TYPE_CPU,
1613
1606
  /*.buffer =*/ NULL,
1614
1607
  /*.ne =*/ { 1, 1, 1, 1 },
1615
1608
  /*.nb =*/ { 0, 0, 0, 0 },
@@ -1625,10 +1618,6 @@ static struct lm_ggml_tensor * lm_ggml_new_tensor_impl(
1625
1618
  /*.padding =*/ { 0 },
1626
1619
  };
1627
1620
 
1628
- #ifdef __clang__
1629
- #pragma clang diagnostic pop
1630
- #endif
1631
-
1632
1621
  // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1633
1622
  //LM_GGML_ASSERT_ALIGNED(result->data);
1634
1623
 
@@ -6430,1271 +6419,6 @@ size_t lm_ggml_quantize_chunk(
6430
6419
 
6431
6420
  ////////////////////////////////////////////////////////////////////////////////
6432
6421
 
6433
- struct lm_gguf_str {
6434
- uint64_t n; // GGUFv2
6435
- char * data;
6436
- };
6437
-
6438
- static const size_t LM_GGUF_TYPE_SIZE[LM_GGUF_TYPE_COUNT] = {
6439
- [LM_GGUF_TYPE_UINT8] = sizeof(uint8_t),
6440
- [LM_GGUF_TYPE_INT8] = sizeof(int8_t),
6441
- [LM_GGUF_TYPE_UINT16] = sizeof(uint16_t),
6442
- [LM_GGUF_TYPE_INT16] = sizeof(int16_t),
6443
- [LM_GGUF_TYPE_UINT32] = sizeof(uint32_t),
6444
- [LM_GGUF_TYPE_INT32] = sizeof(int32_t),
6445
- [LM_GGUF_TYPE_FLOAT32] = sizeof(float),
6446
- [LM_GGUF_TYPE_BOOL] = sizeof(bool),
6447
- [LM_GGUF_TYPE_STRING] = sizeof(struct lm_gguf_str),
6448
- [LM_GGUF_TYPE_UINT64] = sizeof(uint64_t),
6449
- [LM_GGUF_TYPE_INT64] = sizeof(int64_t),
6450
- [LM_GGUF_TYPE_FLOAT64] = sizeof(double),
6451
- [LM_GGUF_TYPE_ARRAY] = 0, // undefined
6452
- };
6453
- static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
6454
-
6455
- static const char * LM_GGUF_TYPE_NAME[LM_GGUF_TYPE_COUNT] = {
6456
- [LM_GGUF_TYPE_UINT8] = "u8",
6457
- [LM_GGUF_TYPE_INT8] = "i8",
6458
- [LM_GGUF_TYPE_UINT16] = "u16",
6459
- [LM_GGUF_TYPE_INT16] = "i16",
6460
- [LM_GGUF_TYPE_UINT32] = "u32",
6461
- [LM_GGUF_TYPE_INT32] = "i32",
6462
- [LM_GGUF_TYPE_FLOAT32] = "f32",
6463
- [LM_GGUF_TYPE_BOOL] = "bool",
6464
- [LM_GGUF_TYPE_STRING] = "str",
6465
- [LM_GGUF_TYPE_ARRAY] = "arr",
6466
- [LM_GGUF_TYPE_UINT64] = "u64",
6467
- [LM_GGUF_TYPE_INT64] = "i64",
6468
- [LM_GGUF_TYPE_FLOAT64] = "f64",
6469
- };
6470
- static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
6471
-
6472
- union lm_gguf_value {
6473
- uint8_t uint8;
6474
- int8_t int8;
6475
- uint16_t uint16;
6476
- int16_t int16;
6477
- uint32_t uint32;
6478
- int32_t int32;
6479
- float float32;
6480
- uint64_t uint64;
6481
- int64_t int64;
6482
- double float64;
6483
- bool bool_;
6484
-
6485
- struct lm_gguf_str str;
6486
-
6487
- struct {
6488
- enum lm_gguf_type type;
6489
-
6490
- uint64_t n; // GGUFv2
6491
- void * data;
6492
- } arr;
6493
- };
6494
-
6495
- struct lm_gguf_kv {
6496
- struct lm_gguf_str key;
6497
-
6498
- enum lm_gguf_type type;
6499
- union lm_gguf_value value;
6500
- };
6501
-
6502
- struct lm_gguf_header {
6503
- char magic[4];
6504
-
6505
- uint32_t version;
6506
- uint64_t n_tensors; // GGUFv2
6507
- uint64_t n_kv; // GGUFv2
6508
- };
6509
-
6510
- struct lm_gguf_tensor_info {
6511
- struct lm_gguf_str name;
6512
-
6513
- uint32_t n_dims;
6514
- uint64_t ne[LM_GGML_MAX_DIMS];
6515
-
6516
- enum lm_ggml_type type;
6517
-
6518
- uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
6519
-
6520
- // for writing API
6521
- const void * data;
6522
- size_t size;
6523
- };
6524
-
6525
- struct lm_gguf_context {
6526
- struct lm_gguf_header header;
6527
-
6528
- struct lm_gguf_kv * kv;
6529
- struct lm_gguf_tensor_info * infos;
6530
-
6531
- size_t alignment;
6532
- size_t offset; // offset of `data` from beginning of file
6533
- size_t size; // size of `data` in bytes
6534
-
6535
- //uint8_t * padding;
6536
- void * data;
6537
- };
6538
-
6539
- size_t lm_gguf_type_size(enum lm_gguf_type type) {
6540
- LM_GGML_ASSERT(0 <= type && type < LM_GGUF_TYPE_COUNT);
6541
- return LM_GGUF_TYPE_SIZE[type];
6542
- }
6543
-
6544
- static bool lm_gguf_tensor_info_sanitize(struct lm_gguf_tensor_info * info) {
6545
- if (info->n_dims > LM_GGML_MAX_DIMS) {
6546
- fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims);
6547
- return false;
6548
- }
6549
-
6550
- if (info->type < 0 || info->type >= LM_GGML_TYPE_COUNT) {
6551
- fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type);
6552
- return false;
6553
- }
6554
-
6555
- if (strlen(info->name.data) >= LM_GGML_MAX_NAME) {
6556
- fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data);
6557
- return false;
6558
- }
6559
-
6560
- for (uint32_t i = 0; i < info->n_dims; ++i) {
6561
- if (info->ne[i] <= 0) {
6562
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]);
6563
- return false;
6564
- }
6565
- }
6566
-
6567
- // prevent overflow for total number of elements
6568
- if (INT64_MAX/info->ne[1] <= info->ne[0]) {
6569
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]);
6570
- return false;
6571
- }
6572
-
6573
- if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) {
6574
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]);
6575
- return false;
6576
- }
6577
-
6578
- if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) {
6579
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]);
6580
- return false;
6581
- }
6582
-
6583
- return true;
6584
- }
6585
-
6586
- static bool lm_gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
6587
- const size_t n = fread(dst, 1, size, file);
6588
- *offset += n;
6589
- return n == size;
6590
- }
6591
-
6592
- static bool lm_gguf_fread_str(FILE * file, struct lm_gguf_str * p, size_t * offset) {
6593
- p->n = 0;
6594
- p->data = NULL;
6595
-
6596
- bool ok = true;
6597
-
6598
- ok = ok && lm_gguf_fread_el(file, &p->n, sizeof(p->n), offset);
6599
-
6600
- // early exit if string length is invalid, prevents from integer overflow
6601
- if (p->n == SIZE_MAX) {
6602
- fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
6603
- return false;
6604
- }
6605
-
6606
- p->data = calloc(p->n + 1, 1);
6607
- if (!p->data) {
6608
- fprintf(stderr, "%s: failed to allocate memory for string of length %" PRIu64 "\n", __func__, p->n);
6609
- return false;
6610
- }
6611
-
6612
- ok = ok && lm_gguf_fread_el(file, p->data, p->n, offset);
6613
-
6614
- return ok;
6615
- }
6616
-
6617
- static void lm_gguf_free_kv(struct lm_gguf_kv * kv) {
6618
- if (kv->key.data) {
6619
- LM_GGML_FREE(kv->key.data);
6620
- }
6621
-
6622
- if (kv->type == LM_GGUF_TYPE_STRING) {
6623
- if (kv->value.str.data) {
6624
- LM_GGML_FREE(kv->value.str.data);
6625
- }
6626
- }
6627
-
6628
- if (kv->type == LM_GGUF_TYPE_ARRAY) {
6629
- if (kv->value.arr.data) {
6630
- if (kv->value.arr.type == LM_GGUF_TYPE_STRING) {
6631
- for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
6632
- struct lm_gguf_str * str = &((struct lm_gguf_str *) kv->value.arr.data)[j];
6633
- if (str->data) {
6634
- LM_GGML_FREE(str->data);
6635
- }
6636
- }
6637
- }
6638
- LM_GGML_FREE(kv->value.arr.data);
6639
- }
6640
- }
6641
- }
6642
-
6643
- struct lm_gguf_context * lm_gguf_init_empty(void) {
6644
- struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
6645
- if (!ctx) {
6646
- fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6647
- return NULL;
6648
- }
6649
-
6650
- memcpy(ctx->header.magic, LM_GGUF_MAGIC, sizeof(ctx->header.magic));
6651
- ctx->header.version = LM_GGUF_VERSION;
6652
- ctx->header.n_tensors = 0;
6653
- ctx->header.n_kv = 0;
6654
-
6655
- ctx->kv = NULL;
6656
- ctx->infos = NULL;
6657
-
6658
- ctx->alignment = LM_GGUF_DEFAULT_ALIGNMENT;
6659
- ctx->offset = 0;
6660
- ctx->size = 0;
6661
-
6662
- ctx->data = NULL;
6663
-
6664
- return ctx;
6665
- }
6666
-
6667
- struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
6668
- // offset from start of file
6669
- size_t offset = 0;
6670
-
6671
- char magic[4];
6672
-
6673
- // check the magic before making allocations
6674
- {
6675
- lm_gguf_fread_el(file, &magic, sizeof(magic), &offset);
6676
-
6677
- for (uint32_t i = 0; i < sizeof(magic); i++) {
6678
- if (magic[i] != LM_GGUF_MAGIC[i]) {
6679
- fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
6680
- return NULL;
6681
- }
6682
- }
6683
- }
6684
-
6685
- bool ok = true;
6686
-
6687
- struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
6688
- if (!ctx) {
6689
- fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6690
- return NULL;
6691
- }
6692
-
6693
- // read the header
6694
- {
6695
- strncpy(ctx->header.magic, magic, 4);
6696
-
6697
- ctx->kv = NULL;
6698
- ctx->infos = NULL;
6699
- ctx->data = NULL;
6700
-
6701
- ok = ok && lm_gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
6702
- ok = ok && lm_gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
6703
- ok = ok && lm_gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
6704
-
6705
- if (ctx->header.version == 1) {
6706
- fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
6707
- lm_gguf_free(ctx);
6708
- return NULL;
6709
- }
6710
-
6711
- // sanity-checks to prevent from integer/buffer overflows
6712
-
6713
- ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct lm_gguf_tensor_info));
6714
- ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/lm_ggml_tensor_overhead());
6715
- ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct lm_gguf_kv));
6716
-
6717
- if (!ok) {
6718
- fprintf(stderr, "%s: failed to read header\n", __func__);
6719
- lm_gguf_free(ctx);
6720
- return NULL;
6721
- }
6722
- }
6723
-
6724
- // read the kv pairs
6725
- {
6726
- const uint64_t n_kv = ctx->header.n_kv;
6727
-
6728
- if (n_kv > 0) {
6729
- ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
6730
- if (!ctx->kv) {
6731
- fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6732
- lm_gguf_free(ctx);
6733
- return NULL;
6734
- }
6735
- }
6736
-
6737
- for (uint64_t i = 0; i < n_kv; ++i) {
6738
- struct lm_gguf_kv * kv = &ctx->kv[i];
6739
-
6740
- //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
6741
-
6742
- ok = ok && lm_gguf_fread_str(file, &kv->key, &offset);
6743
- ok = ok && lm_gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
6744
-
6745
- //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
6746
-
6747
- switch (kv->type) {
6748
- case LM_GGUF_TYPE_UINT8: ok = ok && lm_gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
6749
- case LM_GGUF_TYPE_INT8: ok = ok && lm_gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
6750
- case LM_GGUF_TYPE_UINT16: ok = ok && lm_gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
6751
- case LM_GGUF_TYPE_INT16: ok = ok && lm_gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
6752
- case LM_GGUF_TYPE_UINT32: ok = ok && lm_gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
6753
- case LM_GGUF_TYPE_INT32: ok = ok && lm_gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
6754
- case LM_GGUF_TYPE_FLOAT32: ok = ok && lm_gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
6755
- case LM_GGUF_TYPE_UINT64: ok = ok && lm_gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
6756
- case LM_GGUF_TYPE_INT64: ok = ok && lm_gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
6757
- case LM_GGUF_TYPE_FLOAT64: ok = ok && lm_gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
6758
- case LM_GGUF_TYPE_BOOL: ok = ok && lm_gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
6759
- case LM_GGUF_TYPE_STRING: ok = ok && lm_gguf_fread_str(file, &kv->value.str, &offset); break;
6760
- case LM_GGUF_TYPE_ARRAY:
6761
- {
6762
- ok = ok && lm_gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
6763
- ok = ok && lm_gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
6764
-
6765
- switch (kv->value.arr.type) {
6766
- case LM_GGUF_TYPE_UINT8:
6767
- case LM_GGUF_TYPE_INT8:
6768
- case LM_GGUF_TYPE_UINT16:
6769
- case LM_GGUF_TYPE_INT16:
6770
- case LM_GGUF_TYPE_UINT32:
6771
- case LM_GGUF_TYPE_INT32:
6772
- case LM_GGUF_TYPE_FLOAT32:
6773
- case LM_GGUF_TYPE_UINT64:
6774
- case LM_GGUF_TYPE_INT64:
6775
- case LM_GGUF_TYPE_FLOAT64:
6776
- case LM_GGUF_TYPE_BOOL:
6777
- {
6778
- // prevent from integer overflow in the malloc below
6779
- if (kv->value.arr.n >= SIZE_MAX/lm_gguf_type_size(kv->value.arr.type)) {
6780
- fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6781
- lm_gguf_free(ctx);
6782
- return NULL;
6783
- }
6784
-
6785
- kv->value.arr.data = calloc(kv->value.arr.n, lm_gguf_type_size(kv->value.arr.type));
6786
- if (!kv->value.arr.data) {
6787
- fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6788
- lm_gguf_free(ctx);
6789
- return NULL;
6790
- }
6791
-
6792
- ok = ok && lm_gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * lm_gguf_type_size(kv->value.arr.type), &offset);
6793
- } break;
6794
- case LM_GGUF_TYPE_STRING:
6795
- {
6796
- // prevent from integer overflow in the malloc below
6797
- if (kv->value.arr.n >= SIZE_MAX/sizeof(struct lm_gguf_str)) {
6798
- fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6799
- lm_gguf_free(ctx);
6800
- return NULL;
6801
- }
6802
-
6803
- kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct lm_gguf_str));
6804
- if (!kv->value.arr.data) {
6805
- fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6806
- lm_gguf_free(ctx);
6807
- return NULL;
6808
- }
6809
-
6810
- for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
6811
- ok = ok && lm_gguf_fread_str(file, &((struct lm_gguf_str *) kv->value.arr.data)[j], &offset);
6812
- }
6813
- } break;
6814
- case LM_GGUF_TYPE_ARRAY:
6815
- default:
6816
- {
6817
- fprintf(stderr, "%s: invalid array type %d\n", __func__, kv->value.arr.type);
6818
- ok = false;
6819
- } break;
6820
- }
6821
- } break;
6822
- default:
6823
- {
6824
- fprintf(stderr, "%s: invalid type %d\n", __func__, kv->type);
6825
- ok = false;
6826
- } break;
6827
- }
6828
-
6829
- if (!ok) {
6830
- break;
6831
- }
6832
- }
6833
-
6834
- if (!ok) {
6835
- fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
6836
- lm_gguf_free(ctx);
6837
- return NULL;
6838
- }
6839
- }
6840
-
6841
- // read the tensor infos
6842
- if (ctx->header.n_tensors > 0) {
6843
- ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct lm_gguf_tensor_info));
6844
- if (!ctx->infos) {
6845
- fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
6846
- lm_gguf_free(ctx);
6847
- return NULL;
6848
- }
6849
-
6850
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
6851
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
6852
-
6853
- for (int j = 0; j < LM_GGML_MAX_DIMS; ++j) {
6854
- info->ne[j] = 1;
6855
- }
6856
-
6857
- ok = ok && lm_gguf_fread_str(file, &info->name, &offset);
6858
- ok = ok && lm_gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
6859
-
6860
- ok = ok && (info->n_dims <= LM_GGML_MAX_DIMS);
6861
-
6862
- for (uint32_t j = 0; j < info->n_dims; ++j) {
6863
- ok = ok && lm_gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
6864
- }
6865
-
6866
- ok = ok && lm_gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
6867
- ok = ok && lm_gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
6868
-
6869
- ok = ok && lm_gguf_tensor_info_sanitize(info);
6870
-
6871
- // make sure there is no duplicated tensor names
6872
- for (uint64_t j = 0; j < i && ok; ++j) {
6873
- if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
6874
- fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
6875
- ok = false;
6876
- }
6877
- }
6878
-
6879
- if (!ok) {
6880
- fprintf(stderr, "%s: failed to read tensor info\n", __func__);
6881
- lm_gguf_free(ctx);
6882
- return NULL;
6883
- }
6884
- }
6885
- }
6886
-
6887
- ctx->alignment = LM_GGUF_DEFAULT_ALIGNMENT;
6888
-
6889
- int alignment_idx = lm_gguf_find_key(ctx, "general.alignment");
6890
- if (alignment_idx != -1) {
6891
- ctx->alignment = lm_gguf_get_val_u32(ctx, alignment_idx);
6892
- }
6893
-
6894
- // we require the data section to be aligned, so take into account any padding
6895
- {
6896
- const size_t offset_pad = offset % ctx->alignment;
6897
-
6898
- if (offset_pad != 0) {
6899
- offset += ctx->alignment - offset_pad;
6900
- fseek(file, offset, SEEK_SET);
6901
- }
6902
- }
6903
-
6904
- // store the current file offset - this is where the data section starts
6905
- ctx->offset = offset;
6906
-
6907
- // compute the total size of the data section, taking into account the alignment
6908
- {
6909
- ctx->size = 0;
6910
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
6911
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
6912
-
6913
- const int64_t ne =
6914
- (int64_t) info->ne[0] *
6915
- (int64_t) info->ne[1] *
6916
- (int64_t) info->ne[2] *
6917
- (int64_t) info->ne[3];
6918
-
6919
- if (lm_ggml_blck_size(info->type) == 0 ) {
6920
- // this tensor type support have been removed:
6921
- fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
6922
- __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type));
6923
- lm_gguf_free(ctx);
6924
- return NULL;
6925
- }
6926
-
6927
- if (ne % lm_ggml_blck_size(info->type) != 0) {
6928
- fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
6929
- __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type), ne, lm_ggml_blck_size(info->type));
6930
- lm_gguf_free(ctx);
6931
- return NULL;
6932
- }
6933
-
6934
- const size_t size_cur = lm_ggml_row_size(info->type, ne);
6935
-
6936
- ctx->size += LM_GGML_PAD(size_cur, ctx->alignment);
6937
- }
6938
- }
6939
-
6940
- // load the tensor data only if requested
6941
- if (params.ctx != NULL) {
6942
- // if the provided lm_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
6943
- // otherwise, we load the binary blob into the created lm_ggml_context as well, and point the "data" members of
6944
- // the lm_ggml_tensor structs to the appropriate locations in the binary blob
6945
-
6946
- // compute the exact size needed for the new lm_ggml_context
6947
- const size_t mem_size =
6948
- params.no_alloc ?
6949
- (ctx->header.n_tensors )*lm_ggml_tensor_overhead() :
6950
- (ctx->header.n_tensors + 1)*lm_ggml_tensor_overhead() + ctx->size;
6951
-
6952
- struct lm_ggml_init_params pdata = {
6953
- .mem_size = mem_size,
6954
- .mem_buffer = NULL,
6955
- .no_alloc = params.no_alloc,
6956
- };
6957
-
6958
- *params.ctx = lm_ggml_init(pdata);
6959
- if (*params.ctx == NULL) {
6960
- fprintf(stderr, "%s: failed to initialize context\n", __func__);
6961
- lm_gguf_free(ctx);
6962
- return NULL;
6963
- }
6964
-
6965
- struct lm_ggml_context * ctx_data = *params.ctx;
6966
-
6967
- struct lm_ggml_tensor * data = NULL;
6968
-
6969
- if (!params.no_alloc) {
6970
- data = lm_ggml_new_tensor_1d(ctx_data, LM_GGML_TYPE_I8, ctx->size);
6971
-
6972
- ok = ok && data != NULL;
6973
-
6974
- // read the binary blob with the tensor data
6975
- ok = ok && lm_gguf_fread_el(file, data->data, ctx->size, &offset);
6976
-
6977
- if (!ok) {
6978
- fprintf(stderr, "%s: failed to read tensor data\n", __func__);
6979
- lm_ggml_free(ctx_data);
6980
- lm_gguf_free(ctx);
6981
- return NULL;
6982
- }
6983
-
6984
- ctx->data = data->data;
6985
- }
6986
-
6987
- lm_ggml_set_no_alloc(ctx_data, true);
6988
-
6989
- // create the tensors
6990
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
6991
- const int64_t ne[LM_GGML_MAX_DIMS] = {
6992
- ctx->infos[i].ne[0],
6993
- ctx->infos[i].ne[1],
6994
- ctx->infos[i].ne[2],
6995
- ctx->infos[i].ne[3],
6996
- };
6997
-
6998
- struct lm_ggml_tensor * cur = lm_ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne);
6999
-
7000
- ok = ok && cur != NULL;
7001
-
7002
- if (!ok) {
7003
- break;
7004
- }
7005
-
7006
- lm_ggml_set_name(cur, ctx->infos[i].name.data);
7007
-
7008
- // point the data member to the appropriate location in the binary blob using the tensor infos
7009
- if (!params.no_alloc) {
7010
- //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
7011
- cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
7012
- }
7013
- }
7014
-
7015
- if (!ok) {
7016
- fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
7017
- lm_ggml_free(ctx_data);
7018
- lm_gguf_free(ctx);
7019
- return NULL;
7020
- }
7021
-
7022
- lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
7023
- }
7024
-
7025
- return ctx;
7026
- }
7027
-
7028
- struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
7029
- FILE * file = lm_ggml_fopen(fname, "rb");
7030
- if (!file) {
7031
- fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
7032
- return NULL;
7033
- }
7034
-
7035
- struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
7036
- fclose(file);
7037
- return result;
7038
- }
7039
-
7040
- void lm_gguf_free(struct lm_gguf_context * ctx) {
7041
- if (ctx == NULL) {
7042
- return;
7043
- }
7044
-
7045
- if (ctx->kv) {
7046
- // free string memory - not great..
7047
- for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
7048
- lm_gguf_free_kv(&ctx->kv[i]);
7049
- }
7050
-
7051
- LM_GGML_FREE(ctx->kv);
7052
- }
7053
-
7054
- if (ctx->infos) {
7055
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
7056
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
7057
-
7058
- if (info->name.data) {
7059
- LM_GGML_FREE(info->name.data);
7060
- }
7061
- }
7062
-
7063
- LM_GGML_FREE(ctx->infos);
7064
- }
7065
-
7066
- LM_GGML_FREE(ctx);
7067
- }
7068
-
7069
- const char * lm_gguf_type_name(enum lm_gguf_type type) {
7070
- return LM_GGUF_TYPE_NAME[type];
7071
- }
7072
-
7073
- int lm_gguf_get_version(const struct lm_gguf_context * ctx) {
7074
- return ctx->header.version;
7075
- }
7076
-
7077
- size_t lm_gguf_get_alignment(const struct lm_gguf_context * ctx) {
7078
- return ctx->alignment;
7079
- }
7080
-
7081
- size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx) {
7082
- return ctx->offset;
7083
- }
7084
-
7085
- void * lm_gguf_get_data(const struct lm_gguf_context * ctx) {
7086
- return ctx->data;
7087
- }
7088
-
7089
- int lm_gguf_get_n_kv(const struct lm_gguf_context * ctx) {
7090
- return ctx->header.n_kv;
7091
- }
7092
-
7093
- int lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key) {
7094
- // return -1 if key not found
7095
- int keyfound = -1;
7096
-
7097
- const int n_kv = lm_gguf_get_n_kv(ctx);
7098
-
7099
- for (int i = 0; i < n_kv; ++i) {
7100
- if (strcmp(key, lm_gguf_get_key(ctx, i)) == 0) {
7101
- keyfound = i;
7102
- break;
7103
- }
7104
- }
7105
-
7106
- return keyfound;
7107
- }
7108
-
7109
- const char * lm_gguf_get_key(const struct lm_gguf_context * ctx, int key_id) {
7110
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7111
- return ctx->kv[key_id].key.data;
7112
- }
7113
-
7114
- enum lm_gguf_type lm_gguf_get_kv_type(const struct lm_gguf_context * ctx, int key_id) {
7115
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7116
- return ctx->kv[key_id].type;
7117
- }
7118
-
7119
- enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int key_id) {
7120
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7121
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7122
- return ctx->kv[key_id].value.arr.type;
7123
- }
7124
-
7125
- const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int key_id) {
7126
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7127
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7128
- return ctx->kv[key_id].value.arr.data;
7129
- }
7130
-
7131
- const char * lm_gguf_get_arr_str(const struct lm_gguf_context * ctx, int key_id, int i) {
7132
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7133
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7134
- struct lm_gguf_kv * kv = &ctx->kv[key_id];
7135
- struct lm_gguf_str * str = &((struct lm_gguf_str *) kv->value.arr.data)[i];
7136
- return str->data;
7137
- }
7138
-
7139
- int lm_gguf_get_arr_n(const struct lm_gguf_context * ctx, int key_id) {
7140
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7141
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7142
- return ctx->kv[key_id].value.arr.n;
7143
- }
7144
-
7145
- uint8_t lm_gguf_get_val_u8(const struct lm_gguf_context * ctx, int key_id) {
7146
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7147
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT8);
7148
- return ctx->kv[key_id].value.uint8;
7149
- }
7150
-
7151
- int8_t lm_gguf_get_val_i8(const struct lm_gguf_context * ctx, int key_id) {
7152
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7153
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT8);
7154
- return ctx->kv[key_id].value.int8;
7155
- }
7156
-
7157
- uint16_t lm_gguf_get_val_u16(const struct lm_gguf_context * ctx, int key_id) {
7158
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7159
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT16);
7160
- return ctx->kv[key_id].value.uint16;
7161
- }
7162
-
7163
- int16_t lm_gguf_get_val_i16(const struct lm_gguf_context * ctx, int key_id) {
7164
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7165
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT16);
7166
- return ctx->kv[key_id].value.int16;
7167
- }
7168
-
7169
- uint32_t lm_gguf_get_val_u32(const struct lm_gguf_context * ctx, int key_id) {
7170
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7171
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT32);
7172
- return ctx->kv[key_id].value.uint32;
7173
- }
7174
-
7175
- int32_t lm_gguf_get_val_i32(const struct lm_gguf_context * ctx, int key_id) {
7176
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7177
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT32);
7178
- return ctx->kv[key_id].value.int32;
7179
- }
7180
-
7181
- float lm_gguf_get_val_f32(const struct lm_gguf_context * ctx, int key_id) {
7182
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7183
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_FLOAT32);
7184
- return ctx->kv[key_id].value.float32;
7185
- }
7186
-
7187
- uint64_t lm_gguf_get_val_u64(const struct lm_gguf_context * ctx, int key_id) {
7188
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7189
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT64);
7190
- return ctx->kv[key_id].value.uint64;
7191
- }
7192
-
7193
- int64_t lm_gguf_get_val_i64(const struct lm_gguf_context * ctx, int key_id) {
7194
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7195
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT64);
7196
- return ctx->kv[key_id].value.int64;
7197
- }
7198
-
7199
- double lm_gguf_get_val_f64(const struct lm_gguf_context * ctx, int key_id) {
7200
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7201
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_FLOAT64);
7202
- return ctx->kv[key_id].value.float64;
7203
- }
7204
-
7205
- bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int key_id) {
7206
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7207
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_BOOL);
7208
- return ctx->kv[key_id].value.bool_;
7209
- }
7210
-
7211
- const char * lm_gguf_get_val_str(const struct lm_gguf_context * ctx, int key_id) {
7212
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7213
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_STRING);
7214
- return ctx->kv[key_id].value.str.data;
7215
- }
7216
-
7217
- const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int key_id) {
7218
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7219
- LM_GGML_ASSERT(ctx->kv[key_id].type != LM_GGUF_TYPE_ARRAY);
7220
- LM_GGML_ASSERT(ctx->kv[key_id].type != LM_GGUF_TYPE_STRING);
7221
- return &ctx->kv[key_id].value;
7222
- }
7223
-
7224
- int lm_gguf_get_n_tensors(const struct lm_gguf_context * ctx) {
7225
- return ctx->header.n_tensors;
7226
- }
7227
-
7228
- int lm_gguf_find_tensor(const struct lm_gguf_context * ctx, const char * name) {
7229
- // return -1 if tensor not found
7230
- int tensorfound = -1;
7231
-
7232
- const int n_tensors = lm_gguf_get_n_tensors(ctx);
7233
-
7234
- for (int i = 0; i < n_tensors; ++i) {
7235
- if (strcmp(name, lm_gguf_get_tensor_name(ctx, i)) == 0) {
7236
- tensorfound = i;
7237
- break;
7238
- }
7239
- }
7240
-
7241
- return tensorfound;
7242
- }
7243
-
7244
- size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int i) {
7245
- return ctx->infos[i].offset;
7246
- }
7247
-
7248
- char * lm_gguf_get_tensor_name(const struct lm_gguf_context * ctx, int i) {
7249
- return ctx->infos[i].name.data;
7250
- }
7251
-
7252
- enum lm_ggml_type lm_gguf_get_tensor_type(const struct lm_gguf_context * ctx, int i) {
7253
- return ctx->infos[i].type;
7254
- }
7255
-
7256
- // returns the index
7257
- static int lm_gguf_get_or_add_key(struct lm_gguf_context * ctx, const char * key) {
7258
- const int idx = lm_gguf_find_key(ctx, key);
7259
- if (idx >= 0) {
7260
- return idx;
7261
- }
7262
-
7263
- const int n_kv = lm_gguf_get_n_kv(ctx);
7264
-
7265
- ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct lm_gguf_kv));
7266
- ctx->kv[n_kv].key.n = strlen(key);
7267
- ctx->kv[n_kv].key.data = strdup(key);
7268
- ctx->header.n_kv++;
7269
-
7270
- return n_kv;
7271
- }
7272
-
7273
- void lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key) {
7274
- const int idx = lm_gguf_find_key(ctx, key);
7275
- if (idx >= 0) {
7276
- const int n_kv = lm_gguf_get_n_kv(ctx);
7277
- lm_gguf_free_kv(&ctx->kv[idx]);
7278
- for (int i = idx; i < n_kv-1; ++i) {
7279
- ctx->kv[i] = ctx->kv[i+1];
7280
- }
7281
- ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct lm_gguf_kv));
7282
- ctx->header.n_kv--;
7283
- }
7284
- }
7285
-
7286
- void lm_gguf_set_val_u8(struct lm_gguf_context * ctx, const char * key, uint8_t val) {
7287
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7288
-
7289
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT8;
7290
- ctx->kv[idx].value.uint8 = val;
7291
- }
7292
-
7293
- void lm_gguf_set_val_i8(struct lm_gguf_context * ctx, const char * key, int8_t val) {
7294
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7295
-
7296
- ctx->kv[idx].type = LM_GGUF_TYPE_INT8;
7297
- ctx->kv[idx].value.int8 = val;
7298
- }
7299
-
7300
- void lm_gguf_set_val_u16(struct lm_gguf_context * ctx, const char * key, uint16_t val) {
7301
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7302
-
7303
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT16;
7304
- ctx->kv[idx].value.uint16 = val;
7305
- }
7306
-
7307
- void lm_gguf_set_val_i16(struct lm_gguf_context * ctx, const char * key, int16_t val) {
7308
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7309
-
7310
- ctx->kv[idx].type = LM_GGUF_TYPE_INT16;
7311
- ctx->kv[idx].value.int16 = val;
7312
- }
7313
-
7314
- void lm_gguf_set_val_u32(struct lm_gguf_context * ctx, const char * key, uint32_t val) {
7315
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7316
-
7317
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT32;
7318
- ctx->kv[idx].value.uint32 = val;
7319
- }
7320
-
7321
- void lm_gguf_set_val_i32(struct lm_gguf_context * ctx, const char * key, int32_t val) {
7322
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7323
-
7324
- ctx->kv[idx].type = LM_GGUF_TYPE_INT32;
7325
- ctx->kv[idx].value.int32 = val;
7326
- }
7327
-
7328
- void lm_gguf_set_val_f32(struct lm_gguf_context * ctx, const char * key, float val) {
7329
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7330
-
7331
- ctx->kv[idx].type = LM_GGUF_TYPE_FLOAT32;
7332
- ctx->kv[idx].value.float32 = val;
7333
- }
7334
-
7335
- void lm_gguf_set_val_u64(struct lm_gguf_context * ctx, const char * key, uint64_t val) {
7336
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7337
-
7338
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT64;
7339
- ctx->kv[idx].value.uint64 = val;
7340
- }
7341
-
7342
- void lm_gguf_set_val_i64(struct lm_gguf_context * ctx, const char * key, int64_t val) {
7343
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7344
-
7345
- ctx->kv[idx].type = LM_GGUF_TYPE_INT64;
7346
- ctx->kv[idx].value.int64 = val;
7347
- }
7348
-
7349
- void lm_gguf_set_val_f64(struct lm_gguf_context * ctx, const char * key, double val) {
7350
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7351
-
7352
- ctx->kv[idx].type = LM_GGUF_TYPE_FLOAT64;
7353
- ctx->kv[idx].value.float64 = val;
7354
- }
7355
-
7356
- void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val) {
7357
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7358
-
7359
- ctx->kv[idx].type = LM_GGUF_TYPE_BOOL;
7360
- ctx->kv[idx].value.bool_ = val;
7361
- }
7362
-
7363
- void lm_gguf_set_val_str(struct lm_gguf_context * ctx, const char * key, const char * val) {
7364
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7365
-
7366
- ctx->kv[idx].type = LM_GGUF_TYPE_STRING;
7367
- ctx->kv[idx].value.str.n = strlen(val);
7368
- ctx->kv[idx].value.str.data = strdup(val);
7369
- }
7370
-
7371
- void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, int n) {
7372
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7373
-
7374
- ctx->kv[idx].type = LM_GGUF_TYPE_ARRAY;
7375
- ctx->kv[idx].value.arr.type = type;
7376
- ctx->kv[idx].value.arr.n = n;
7377
- ctx->kv[idx].value.arr.data = LM_GGML_CALLOC(n, lm_gguf_type_size(type));
7378
- memcpy(ctx->kv[idx].value.arr.data, data, n*lm_gguf_type_size(type));
7379
- }
7380
-
7381
- void lm_gguf_set_arr_str(struct lm_gguf_context * ctx, const char * key, const char ** data, int n) {
7382
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7383
-
7384
- ctx->kv[idx].type = LM_GGUF_TYPE_ARRAY;
7385
- ctx->kv[idx].value.arr.type = LM_GGUF_TYPE_STRING;
7386
- ctx->kv[idx].value.arr.n = n;
7387
- ctx->kv[idx].value.arr.data = LM_GGML_CALLOC(n, sizeof(struct lm_gguf_str));
7388
- for (int i = 0; i < n; i++) {
7389
- struct lm_gguf_str * str = &((struct lm_gguf_str *)ctx->kv[idx].value.arr.data)[i];
7390
- str->n = strlen(data[i]);
7391
- str->data = strdup(data[i]);
7392
- }
7393
- }
7394
-
7395
- // set or add KV pairs from another context
7396
- void lm_gguf_set_kv(struct lm_gguf_context * ctx, struct lm_gguf_context * src) {
7397
- for (uint32_t i = 0; i < src->header.n_kv; i++) {
7398
- switch (src->kv[i].type) {
7399
- case LM_GGUF_TYPE_UINT8: lm_gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break;
7400
- case LM_GGUF_TYPE_INT8: lm_gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break;
7401
- case LM_GGUF_TYPE_UINT16: lm_gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break;
7402
- case LM_GGUF_TYPE_INT16: lm_gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break;
7403
- case LM_GGUF_TYPE_UINT32: lm_gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
7404
- case LM_GGUF_TYPE_INT32: lm_gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
7405
- case LM_GGUF_TYPE_FLOAT32: lm_gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
7406
- case LM_GGUF_TYPE_UINT64: lm_gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
7407
- case LM_GGUF_TYPE_INT64: lm_gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
7408
- case LM_GGUF_TYPE_FLOAT64: lm_gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
7409
- case LM_GGUF_TYPE_BOOL: lm_gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
7410
- case LM_GGUF_TYPE_STRING: lm_gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
7411
- case LM_GGUF_TYPE_ARRAY:
7412
- {
7413
- if (src->kv[i].value.arr.type == LM_GGUF_TYPE_STRING) {
7414
- const char ** data = LM_GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
7415
- for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
7416
- data[j] = ((struct lm_gguf_str *)src->kv[i].value.arr.data)[j].data;
7417
- }
7418
- lm_gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
7419
- LM_GGML_FREE((void *)data);
7420
- } else if (src->kv[i].value.arr.type == LM_GGUF_TYPE_ARRAY) {
7421
- LM_GGML_ABORT("nested arrays not supported");
7422
- } else {
7423
- lm_gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
7424
- }
7425
- } break;
7426
- default: LM_GGML_ABORT("invalid type");
7427
- }
7428
- }
7429
- }
7430
-
7431
- void lm_gguf_add_tensor(
7432
- struct lm_gguf_context * ctx,
7433
- const struct lm_ggml_tensor * tensor) {
7434
- LM_GGML_ASSERT(tensor);
7435
- if (lm_gguf_find_tensor(ctx, tensor->name) != -1) {
7436
- LM_GGML_ABORT("duplicated tensor name");
7437
- }
7438
-
7439
- const int idx = ctx->header.n_tensors;
7440
- ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct lm_gguf_tensor_info));
7441
-
7442
- ctx->infos[idx].name.n = strlen(tensor->name);
7443
- ctx->infos[idx].name.data = strdup(tensor->name);
7444
-
7445
- for (int i = 0; i < LM_GGML_MAX_DIMS; ++i) {
7446
- ctx->infos[idx].ne[i] = 1;
7447
- }
7448
-
7449
- ctx->infos[idx].n_dims = lm_ggml_n_dims(tensor);
7450
- for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
7451
- ctx->infos[idx].ne[i] = tensor->ne[i];
7452
- }
7453
-
7454
- ctx->infos[idx].type = tensor->type;
7455
- ctx->infos[idx].offset = 0;
7456
- ctx->infos[idx].data = tensor->data;
7457
- ctx->infos[idx].size = lm_ggml_nbytes(tensor);
7458
-
7459
- if (ctx->header.n_tensors > 0) {
7460
- ctx->infos[idx].offset = ctx->infos[idx - 1].offset + LM_GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
7461
- }
7462
-
7463
- ctx->header.n_tensors++;
7464
- }
7465
-
7466
- void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type) {
7467
- const int idx = lm_gguf_find_tensor(ctx, name);
7468
- if (idx < 0) {
7469
- LM_GGML_ABORT("tensor not found");
7470
- }
7471
-
7472
- ctx->infos[idx].type = type;
7473
- }
7474
-
7475
- void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data, size_t size) {
7476
- const int idx = lm_gguf_find_tensor(ctx, name);
7477
- if (idx < 0) {
7478
- LM_GGML_ABORT("tensor not found");
7479
- }
7480
-
7481
- ctx->infos[idx].data = data;
7482
- ctx->infos[idx].size = size;
7483
-
7484
- // update offsets
7485
- for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
7486
- ctx->infos[i].offset = ctx->infos[i - 1].offset + LM_GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
7487
- }
7488
- }
7489
-
7490
- //static void lm_gguf_fwrite_str(FILE * file, const struct lm_gguf_str * val) {
7491
- // fwrite(&val->n, sizeof(val->n), 1, file);
7492
- // fwrite(val->data, sizeof(char), val->n, file);
7493
- //}
7494
- //
7495
- //static void lm_gguf_fwrite_el(FILE * file, const void * val, size_t size) {
7496
- // fwrite(val, sizeof(char), size, file);
7497
- //}
7498
-
7499
- struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7500
- struct lm_gguf_buf buf = {
7501
- /*buf.data =*/ size == 0 ? NULL : LM_GGML_CALLOC(1, size),
7502
- /*buf.size =*/ size,
7503
- /*buf.offset =*/ 0,
7504
- };
7505
-
7506
- return buf;
7507
- }
7508
-
7509
- void lm_gguf_buf_free(struct lm_gguf_buf buf) {
7510
- if (buf.data) {
7511
- LM_GGML_FREE(buf.data);
7512
- }
7513
- }
7514
-
7515
- static void lm_gguf_buf_grow(struct lm_gguf_buf * buf, size_t size) {
7516
- if (buf->offset + size > buf->size) {
7517
- buf->size = 1.5*(buf->offset + size);
7518
- if (buf->data) {
7519
- buf->data = realloc(buf->data, buf->size);
7520
- }
7521
- }
7522
- }
7523
-
7524
- static void lm_gguf_bwrite_str(struct lm_gguf_buf * buf, const struct lm_gguf_str * val) {
7525
- lm_gguf_buf_grow(buf, sizeof(val->n) + val->n);
7526
-
7527
- if (buf->data) {
7528
- memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
7529
- }
7530
- buf->offset += sizeof(val->n);
7531
-
7532
- if (buf->data) {
7533
- memcpy((char *) buf->data + buf->offset, val->data, val->n);
7534
- }
7535
- buf->offset += val->n;
7536
- }
7537
-
7538
- static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t el_size) {
7539
- lm_gguf_buf_grow(buf, el_size);
7540
-
7541
- if (buf->data) {
7542
- memcpy((char *) buf->data + buf->offset, val, el_size);
7543
- }
7544
- buf->offset += el_size;
7545
- }
7546
-
7547
- void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
7548
- // write header
7549
- lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
7550
- lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
7551
- lm_gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
7552
- lm_gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
7553
-
7554
- // write key-value pairs
7555
- for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
7556
- struct lm_gguf_kv * kv = &ctx->kv[i];
7557
-
7558
- lm_gguf_bwrite_str(buf, &kv->key);
7559
- lm_gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
7560
-
7561
- switch (kv->type) {
7562
- case LM_GGUF_TYPE_UINT8: lm_gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
7563
- case LM_GGUF_TYPE_INT8: lm_gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
7564
- case LM_GGUF_TYPE_UINT16: lm_gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
7565
- case LM_GGUF_TYPE_INT16: lm_gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;
7566
- case LM_GGUF_TYPE_UINT32: lm_gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
7567
- case LM_GGUF_TYPE_INT32: lm_gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
7568
- case LM_GGUF_TYPE_FLOAT32: lm_gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
7569
- case LM_GGUF_TYPE_UINT64: lm_gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
7570
- case LM_GGUF_TYPE_INT64: lm_gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
7571
- case LM_GGUF_TYPE_FLOAT64: lm_gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
7572
- case LM_GGUF_TYPE_BOOL: lm_gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
7573
- case LM_GGUF_TYPE_STRING: lm_gguf_bwrite_str(buf, &kv->value.str ); break;
7574
- case LM_GGUF_TYPE_ARRAY:
7575
- {
7576
- lm_gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
7577
- lm_gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) );
7578
-
7579
- switch (kv->value.arr.type) {
7580
- case LM_GGUF_TYPE_UINT8:
7581
- case LM_GGUF_TYPE_INT8:
7582
- case LM_GGUF_TYPE_UINT16:
7583
- case LM_GGUF_TYPE_INT16:
7584
- case LM_GGUF_TYPE_UINT32:
7585
- case LM_GGUF_TYPE_INT32:
7586
- case LM_GGUF_TYPE_FLOAT32:
7587
- case LM_GGUF_TYPE_UINT64:
7588
- case LM_GGUF_TYPE_INT64:
7589
- case LM_GGUF_TYPE_FLOAT64:
7590
- case LM_GGUF_TYPE_BOOL:
7591
- {
7592
- lm_gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * lm_gguf_type_size(kv->value.arr.type));
7593
- } break;
7594
- case LM_GGUF_TYPE_STRING:
7595
- {
7596
- for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
7597
- lm_gguf_bwrite_str(buf, &((struct lm_gguf_str *) kv->value.arr.data)[j]);
7598
- }
7599
- } break;
7600
- case LM_GGUF_TYPE_ARRAY:
7601
- default: LM_GGML_ABORT("invalid type");
7602
- }
7603
- } break;
7604
- default: LM_GGML_ABORT("invalid type");
7605
- }
7606
- }
7607
-
7608
- // write tensor infos
7609
- for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
7610
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
7611
-
7612
- lm_gguf_bwrite_str(buf, &info->name);
7613
- lm_gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
7614
- for (uint32_t j = 0; j < info->n_dims; ++j) {
7615
- lm_gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
7616
- }
7617
- lm_gguf_bwrite_el(buf, &info->type, sizeof(info->type));
7618
- lm_gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
7619
- }
7620
-
7621
- // we require the data section to be aligned, so take into account any padding
7622
- {
7623
- const size_t offset = buf->offset;
7624
- const size_t offset_pad = LM_GGML_PAD(offset, ctx->alignment);
7625
-
7626
- if (offset_pad != offset) {
7627
- uint8_t pad = 0;
7628
- for (size_t i = 0; i < offset_pad - offset; ++i) {
7629
- lm_gguf_bwrite_el(buf, &pad, sizeof(pad));
7630
- }
7631
- }
7632
- }
7633
-
7634
- if (only_meta) {
7635
- return;
7636
- }
7637
-
7638
- size_t offset = 0;
7639
-
7640
- // write tensor data
7641
- for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
7642
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
7643
-
7644
- const size_t size = info->size;
7645
- const size_t size_pad = LM_GGML_PAD(size, ctx->alignment);
7646
-
7647
- lm_gguf_bwrite_el(buf, info->data, size);
7648
-
7649
- if (size_pad != size) {
7650
- uint8_t pad = 0;
7651
- for (size_t j = 0; j < size_pad - size; ++j) {
7652
- lm_gguf_bwrite_el(buf, &pad, sizeof(pad));
7653
- }
7654
- }
7655
-
7656
- LM_GGML_ASSERT(offset == info->offset);
7657
-
7658
- offset += size_pad;
7659
- }
7660
- }
7661
-
7662
- void lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta) {
7663
- FILE * file = lm_ggml_fopen(fname, "wb");
7664
- if (!file) {
7665
- LM_GGML_ABORT("failed to open file for writing");
7666
- }
7667
-
7668
- struct lm_gguf_buf buf = lm_gguf_buf_init(16*1024);
7669
-
7670
- lm_gguf_write_to_buf(ctx, &buf, only_meta);
7671
-
7672
- fwrite(buf.data, 1, buf.offset, file);
7673
-
7674
- lm_gguf_buf_free(buf);
7675
-
7676
- fclose(file);
7677
- }
7678
-
7679
- size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx) {
7680
- // no allocs - only compute size
7681
- struct lm_gguf_buf buf = lm_gguf_buf_init(0);
7682
-
7683
- lm_gguf_write_to_buf(ctx, &buf, true);
7684
-
7685
- return buf.offset;
7686
- }
7687
-
7688
- void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data) {
7689
- struct lm_gguf_buf buf = lm_gguf_buf_init(16*1024);
7690
-
7691
- lm_gguf_write_to_buf(ctx, &buf, true);
7692
-
7693
- memcpy(data, buf.data, buf.offset);
7694
-
7695
- lm_gguf_buf_free(buf);
7696
- }
7697
-
7698
6422
  void lm_ggml_log_set(lm_ggml_log_callback log_callback, void * user_data) {
7699
6423
  g_logger_state.log_callback = log_callback ? log_callback : lm_ggml_log_callback_default;
7700
6424
  g_logger_state.log_callback_user_data = user_data;