cui-llama.rn 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/android/src/main/CMakeLists.txt +14 -8
  2. package/android/src/main/jni.cpp +38 -37
  3. package/cpp/common.cpp +50 -30
  4. package/cpp/common.h +32 -13
  5. package/cpp/ggml-alloc.c +0 -1
  6. package/cpp/ggml-backend-reg.cpp +79 -49
  7. package/cpp/ggml-backend.cpp +5 -2
  8. package/cpp/ggml-cpp.h +1 -0
  9. package/cpp/ggml-cpu-aarch64.cpp +57 -72
  10. package/cpp/ggml-cpu-quants.c +5 -1
  11. package/cpp/ggml-cpu.c +6 -6
  12. package/cpp/ggml-cpu.cpp +9 -0
  13. package/cpp/ggml-impl.h +11 -0
  14. package/cpp/ggml-metal.m +2 -2
  15. package/cpp/ggml.c +129 -1388
  16. package/cpp/ggml.h +29 -152
  17. package/cpp/gguf.cpp +1325 -0
  18. package/cpp/gguf.h +202 -0
  19. package/cpp/llama-adapter.cpp +346 -0
  20. package/cpp/llama-adapter.h +73 -0
  21. package/cpp/llama-arch.cpp +1434 -0
  22. package/cpp/llama-arch.h +395 -0
  23. package/cpp/llama-batch.cpp +368 -0
  24. package/cpp/llama-batch.h +88 -0
  25. package/cpp/llama-chat.cpp +567 -0
  26. package/cpp/llama-chat.h +51 -0
  27. package/cpp/llama-context.cpp +1771 -0
  28. package/cpp/llama-context.h +128 -0
  29. package/cpp/llama-cparams.cpp +1 -0
  30. package/cpp/llama-cparams.h +37 -0
  31. package/cpp/llama-cpp.h +30 -0
  32. package/cpp/llama-grammar.cpp +16 -15
  33. package/cpp/llama-grammar.h +5 -6
  34. package/cpp/llama-hparams.cpp +71 -0
  35. package/cpp/llama-hparams.h +140 -0
  36. package/cpp/llama-impl.cpp +167 -0
  37. package/cpp/llama-impl.h +16 -136
  38. package/cpp/llama-kv-cache.cpp +718 -0
  39. package/cpp/llama-kv-cache.h +218 -0
  40. package/cpp/llama-mmap.cpp +589 -0
  41. package/cpp/llama-mmap.h +67 -0
  42. package/cpp/llama-model-loader.cpp +1011 -0
  43. package/cpp/llama-model-loader.h +158 -0
  44. package/cpp/llama-model.cpp +2202 -0
  45. package/cpp/llama-model.h +391 -0
  46. package/cpp/llama-sampling.cpp +117 -4
  47. package/cpp/llama-vocab.cpp +26 -29
  48. package/cpp/llama-vocab.h +14 -2
  49. package/cpp/llama.cpp +8839 -19131
  50. package/cpp/llama.cpp.rej +23 -0
  51. package/cpp/llama.h +31 -9
  52. package/cpp/rn-llama.hpp +39 -37
  53. package/cpp/sgemm.cpp +1091 -378
  54. package/cpp/sgemm.h +2 -2
  55. package/cpp/unicode.cpp +6 -0
  56. package/package.json +1 -1
package/cpp/ggml.c CHANGED
@@ -1601,15 +1601,8 @@ static struct lm_ggml_tensor * lm_ggml_new_tensor_impl(
1601
1601
 
1602
1602
  struct lm_ggml_tensor * const result = (struct lm_ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1603
1603
 
1604
- #ifdef __clang__
1605
- // temporary until lm_ggml_tensor::backend is removed
1606
- #pragma clang diagnostic push
1607
- #pragma clang diagnostic ignored "-Wdeprecated-declarations"
1608
- #endif
1609
-
1610
1604
  *result = (struct lm_ggml_tensor) {
1611
1605
  /*.type =*/ type,
1612
- /*.backend =*/ LM_GGML_BACKEND_TYPE_CPU,
1613
1606
  /*.buffer =*/ NULL,
1614
1607
  /*.ne =*/ { 1, 1, 1, 1 },
1615
1608
  /*.nb =*/ { 0, 0, 0, 0 },
@@ -1625,10 +1618,6 @@ static struct lm_ggml_tensor * lm_ggml_new_tensor_impl(
1625
1618
  /*.padding =*/ { 0 },
1626
1619
  };
1627
1620
 
1628
- #ifdef __clang__
1629
- #pragma clang diagnostic pop
1630
- #endif
1631
-
1632
1621
  // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1633
1622
  //LM_GGML_ASSERT_ALIGNED(result->data);
1634
1623
 
@@ -3773,13 +3762,84 @@ struct lm_ggml_tensor * lm_ggml_clamp(
3773
3762
  return result;
3774
3763
  }
3775
3764
 
3776
- // lm_ggml_conv_1d
3777
-
3778
3765
  static int64_t lm_ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3779
3766
  return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
3780
3767
  }
3781
3768
 
3782
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
3769
+ // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
3770
+ // a: [OC,IC, KH, KW]
3771
+ // b: [N, IC, IH, IW]
3772
+ // result: [N, OH, OW, IC*KH*KW]
3773
+ struct lm_ggml_tensor * lm_ggml_im2col(
3774
+ struct lm_ggml_context * ctx,
3775
+ struct lm_ggml_tensor * a,
3776
+ struct lm_ggml_tensor * b,
3777
+ int s0,
3778
+ int s1,
3779
+ int p0,
3780
+ int p1,
3781
+ int d0,
3782
+ int d1,
3783
+ bool is_2D,
3784
+ enum lm_ggml_type dst_type) {
3785
+ if (is_2D) {
3786
+ LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
3787
+ } else {
3788
+ //LM_GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
3789
+ LM_GGML_ASSERT(b->ne[1] == a->ne[1]);
3790
+ LM_GGML_ASSERT(b->ne[3] == 1);
3791
+ }
3792
+
3793
+ const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
3794
+ const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
3795
+
3796
+ LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
3797
+ LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
3798
+
3799
+ const int64_t ne[4] = {
3800
+ is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
3801
+ OW,
3802
+ is_2D ? OH : b->ne[2],
3803
+ is_2D ? b->ne[3] : 1,
3804
+ };
3805
+
3806
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
3807
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3808
+ lm_ggml_set_op_params(result, params, sizeof(params));
3809
+
3810
+ result->op = LM_GGML_OP_IM2COL;
3811
+ result->src[0] = a;
3812
+ result->src[1] = b;
3813
+
3814
+ return result;
3815
+ }
3816
+
3817
+ struct lm_ggml_tensor * lm_ggml_im2col_back(
3818
+ struct lm_ggml_context * ctx,
3819
+ struct lm_ggml_tensor * a,
3820
+ struct lm_ggml_tensor * b,
3821
+ int64_t * ne,
3822
+ int s0,
3823
+ int s1,
3824
+ int p0,
3825
+ int p1,
3826
+ int d0,
3827
+ int d1,
3828
+ bool is_2D) {
3829
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3830
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3831
+ lm_ggml_set_op_params(result, params, sizeof(params));
3832
+
3833
+ result->op = LM_GGML_OP_IM2COL_BACK;
3834
+ result->src[0] = a;
3835
+ result->src[1] = b;
3836
+
3837
+ return result;
3838
+ }
3839
+
3840
+ // lm_ggml_conv_1d
3841
+
3842
+ struct lm_ggml_tensor * lm_ggml_conv_1d(
3783
3843
  struct lm_ggml_context * ctx,
3784
3844
  struct lm_ggml_tensor * a,
3785
3845
  struct lm_ggml_tensor * b,
@@ -3809,137 +3869,75 @@ struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
3809
3869
  return lm_ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
3810
3870
  }
3811
3871
 
3812
- // lm_ggml_conv_transpose_1d
3813
-
3814
- static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3815
- return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
3816
- }
3872
+ // lm_ggml_conv_1d_dw
3817
3873
 
3818
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
3874
+ struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
3819
3875
  struct lm_ggml_context * ctx,
3820
3876
  struct lm_ggml_tensor * a,
3821
3877
  struct lm_ggml_tensor * b,
3822
3878
  int s0,
3823
3879
  int p0,
3824
3880
  int d0) {
3825
- LM_GGML_ASSERT(lm_ggml_is_matrix(b));
3826
- LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
3827
- LM_GGML_ASSERT(a->ne[3] == 1);
3828
-
3829
- LM_GGML_ASSERT(p0 == 0);
3830
- LM_GGML_ASSERT(d0 == 1);
3881
+ struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
3882
+ struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
3831
3883
 
3832
- const int64_t ne[4] = {
3833
- lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
3834
- a->ne[1], b->ne[2], 1,
3835
- };
3836
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3884
+ struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, LM_GGML_TYPE_F16);
3837
3885
 
3838
- int32_t params[] = { s0, p0, d0 };
3839
- lm_ggml_set_op_params(result, params, sizeof(params));
3886
+ struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, im2col, a);
3840
3887
 
3841
- result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
3842
- result->src[0] = a;
3843
- result->src[1] = b;
3888
+ result = lm_ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
3844
3889
 
3845
3890
  return result;
3846
3891
  }
3847
3892
 
3848
- // lm_ggml_conv_depthwise
3893
+ // lm_ggml_conv_1d_dw_ph
3849
3894
 
3850
- struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
3895
+ struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
3851
3896
  struct lm_ggml_context * ctx,
3852
3897
  struct lm_ggml_tensor * a,
3853
3898
  struct lm_ggml_tensor * b,
3854
3899
  int s0,
3855
- int s1,
3856
- int p0,
3857
- int p1,
3858
- int d0,
3859
- int d1) {
3860
- struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
3861
- struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
3862
- lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
3863
- s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
3864
- struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
3900
+ int d0) {
3901
+ return lm_ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
3902
+ }
3865
3903
 
3866
- new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
3867
- struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
3868
- result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
3904
+ // lm_ggml_conv_transpose_1d
3869
3905
 
3870
- return result;
3906
+ static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3907
+ return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
3871
3908
  }
3872
- // lm_ggml_conv_2d
3873
3909
 
3874
- // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
3875
- // a: [OC,IC, KH, KW]
3876
- // b: [N, IC, IH, IW]
3877
- // result: [N, OH, OW, IC*KH*KW]
3878
- struct lm_ggml_tensor * lm_ggml_im2col(
3910
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
3879
3911
  struct lm_ggml_context * ctx,
3880
3912
  struct lm_ggml_tensor * a,
3881
3913
  struct lm_ggml_tensor * b,
3882
3914
  int s0,
3883
- int s1,
3884
3915
  int p0,
3885
- int p1,
3886
- int d0,
3887
- int d1,
3888
- bool is_2D,
3889
- enum lm_ggml_type dst_type) {
3890
- if(is_2D) {
3891
- LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
3892
- } else {
3893
- LM_GGML_ASSERT(a->ne[1] == b->ne[1]);
3894
- LM_GGML_ASSERT(b->ne[3] == 1);
3895
- }
3896
-
3897
- const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
3898
- const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
3916
+ int d0) {
3917
+ LM_GGML_ASSERT(lm_ggml_is_matrix(b));
3918
+ LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
3919
+ LM_GGML_ASSERT(a->ne[3] == 1);
3899
3920
 
3900
- LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
3901
- LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
3921
+ LM_GGML_ASSERT(p0 == 0);
3922
+ LM_GGML_ASSERT(d0 == 1);
3902
3923
 
3903
3924
  const int64_t ne[4] = {
3904
- is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
3905
- OW,
3906
- is_2D ? OH : b->ne[2],
3907
- is_2D ? b->ne[3] : 1,
3925
+ lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
3926
+ a->ne[1], b->ne[2], 1,
3908
3927
  };
3928
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3909
3929
 
3910
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
3911
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3930
+ int32_t params[] = { s0, p0, d0 };
3912
3931
  lm_ggml_set_op_params(result, params, sizeof(params));
3913
3932
 
3914
- result->op = LM_GGML_OP_IM2COL;
3933
+ result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
3915
3934
  result->src[0] = a;
3916
3935
  result->src[1] = b;
3917
3936
 
3918
3937
  return result;
3919
3938
  }
3920
3939
 
3921
- struct lm_ggml_tensor * lm_ggml_im2col_back(
3922
- struct lm_ggml_context * ctx,
3923
- struct lm_ggml_tensor * a,
3924
- struct lm_ggml_tensor * b,
3925
- int64_t * ne,
3926
- int s0,
3927
- int s1,
3928
- int p0,
3929
- int p1,
3930
- int d0,
3931
- int d1,
3932
- bool is_2D) {
3933
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3934
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3935
- lm_ggml_set_op_params(result, params, sizeof(params));
3936
-
3937
- result->op = LM_GGML_OP_IM2COL_BACK;
3938
- result->src[0] = a;
3939
- result->src[1] = b;
3940
-
3941
- return result;
3942
- }
3940
+ // lm_ggml_conv_2d
3943
3941
 
3944
3942
  // a: [OC,IC, KH, KW]
3945
3943
  // b: [N, IC, IH, IW]
@@ -3986,6 +3984,31 @@ struct lm_ggml_tensor * lm_ggml_conv_2d_s1_ph(
3986
3984
  return lm_ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
3987
3985
  }
3988
3986
 
3987
+ // lm_ggml_conv_2d_dw
3988
+
3989
+ struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
3990
+ struct lm_ggml_context * ctx,
3991
+ struct lm_ggml_tensor * a,
3992
+ struct lm_ggml_tensor * b,
3993
+ int s0,
3994
+ int s1,
3995
+ int p0,
3996
+ int p1,
3997
+ int d0,
3998
+ int d1) {
3999
+ struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4000
+ struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
4001
+ lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4002
+ s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4003
+ struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4004
+
4005
+ new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4006
+ struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
4007
+ result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4008
+
4009
+ return result;
4010
+ }
4011
+
3989
4012
  // lm_ggml_conv_transpose_2d_p0
3990
4013
 
3991
4014
  static int64_t lm_ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
@@ -6050,12 +6073,12 @@ struct lm_ggml_tensor * lm_ggml_graph_get_tensor(const struct lm_ggml_cgraph * c
6050
6073
 
6051
6074
  struct lm_ggml_tensor * lm_ggml_graph_get_grad(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
6052
6075
  const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
6053
- return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL;
6076
+ return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
6054
6077
  }
6055
6078
 
6056
6079
  struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
6057
6080
  const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
6058
- return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL;
6081
+ return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
6059
6082
  }
6060
6083
 
6061
6084
  void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph) {
@@ -6396,1288 +6419,6 @@ size_t lm_ggml_quantize_chunk(
6396
6419
 
6397
6420
  ////////////////////////////////////////////////////////////////////////////////
6398
6421
 
6399
- struct lm_gguf_str {
6400
- uint64_t n; // GGUFv2
6401
- char * data;
6402
- };
6403
-
6404
- static const size_t LM_GGUF_TYPE_SIZE[LM_GGUF_TYPE_COUNT] = {
6405
- [LM_GGUF_TYPE_UINT8] = sizeof(uint8_t),
6406
- [LM_GGUF_TYPE_INT8] = sizeof(int8_t),
6407
- [LM_GGUF_TYPE_UINT16] = sizeof(uint16_t),
6408
- [LM_GGUF_TYPE_INT16] = sizeof(int16_t),
6409
- [LM_GGUF_TYPE_UINT32] = sizeof(uint32_t),
6410
- [LM_GGUF_TYPE_INT32] = sizeof(int32_t),
6411
- [LM_GGUF_TYPE_FLOAT32] = sizeof(float),
6412
- [LM_GGUF_TYPE_BOOL] = sizeof(bool),
6413
- [LM_GGUF_TYPE_STRING] = sizeof(struct lm_gguf_str),
6414
- [LM_GGUF_TYPE_UINT64] = sizeof(uint64_t),
6415
- [LM_GGUF_TYPE_INT64] = sizeof(int64_t),
6416
- [LM_GGUF_TYPE_FLOAT64] = sizeof(double),
6417
- [LM_GGUF_TYPE_ARRAY] = 0, // undefined
6418
- };
6419
- static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
6420
-
6421
- static const char * LM_GGUF_TYPE_NAME[LM_GGUF_TYPE_COUNT] = {
6422
- [LM_GGUF_TYPE_UINT8] = "u8",
6423
- [LM_GGUF_TYPE_INT8] = "i8",
6424
- [LM_GGUF_TYPE_UINT16] = "u16",
6425
- [LM_GGUF_TYPE_INT16] = "i16",
6426
- [LM_GGUF_TYPE_UINT32] = "u32",
6427
- [LM_GGUF_TYPE_INT32] = "i32",
6428
- [LM_GGUF_TYPE_FLOAT32] = "f32",
6429
- [LM_GGUF_TYPE_BOOL] = "bool",
6430
- [LM_GGUF_TYPE_STRING] = "str",
6431
- [LM_GGUF_TYPE_ARRAY] = "arr",
6432
- [LM_GGUF_TYPE_UINT64] = "u64",
6433
- [LM_GGUF_TYPE_INT64] = "i64",
6434
- [LM_GGUF_TYPE_FLOAT64] = "f64",
6435
- };
6436
- static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13");
6437
-
6438
- union lm_gguf_value {
6439
- uint8_t uint8;
6440
- int8_t int8;
6441
- uint16_t uint16;
6442
- int16_t int16;
6443
- uint32_t uint32;
6444
- int32_t int32;
6445
- float float32;
6446
- uint64_t uint64;
6447
- int64_t int64;
6448
- double float64;
6449
- bool bool_;
6450
-
6451
- struct lm_gguf_str str;
6452
-
6453
- struct {
6454
- enum lm_gguf_type type;
6455
-
6456
- uint64_t n; // GGUFv2
6457
- void * data;
6458
- } arr;
6459
- };
6460
-
6461
- struct lm_gguf_kv {
6462
- struct lm_gguf_str key;
6463
-
6464
- enum lm_gguf_type type;
6465
- union lm_gguf_value value;
6466
- };
6467
-
6468
- struct lm_gguf_header {
6469
- char magic[4];
6470
-
6471
- uint32_t version;
6472
- uint64_t n_tensors; // GGUFv2
6473
- uint64_t n_kv; // GGUFv2
6474
- };
6475
-
6476
- struct lm_gguf_tensor_info {
6477
- struct lm_gguf_str name;
6478
-
6479
- uint32_t n_dims;
6480
- uint64_t ne[LM_GGML_MAX_DIMS];
6481
-
6482
- enum lm_ggml_type type;
6483
-
6484
- uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
6485
-
6486
- // for writing API
6487
- const void * data;
6488
- size_t size;
6489
- };
6490
-
6491
- struct lm_gguf_context {
6492
- struct lm_gguf_header header;
6493
-
6494
- struct lm_gguf_kv * kv;
6495
- struct lm_gguf_tensor_info * infos;
6496
-
6497
- size_t alignment;
6498
- size_t offset; // offset of `data` from beginning of file
6499
- size_t size; // size of `data` in bytes
6500
-
6501
- //uint8_t * padding;
6502
- void * data;
6503
- };
6504
-
6505
- static size_t lm_gguf_type_size(enum lm_gguf_type type) {
6506
- LM_GGML_ASSERT(0 <= type && type < LM_GGUF_TYPE_COUNT);
6507
- return LM_GGUF_TYPE_SIZE[type];
6508
- }
6509
-
6510
- static bool lm_gguf_tensor_info_sanitize(struct lm_gguf_tensor_info * info) {
6511
- if (info->n_dims > LM_GGML_MAX_DIMS) {
6512
- fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims);
6513
- return false;
6514
- }
6515
-
6516
- if (info->type < 0 || info->type >= LM_GGML_TYPE_COUNT) {
6517
- fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type);
6518
- return false;
6519
- }
6520
-
6521
- if (strlen(info->name.data) >= LM_GGML_MAX_NAME) {
6522
- fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data);
6523
- return false;
6524
- }
6525
-
6526
- for (uint32_t i = 0; i < info->n_dims; ++i) {
6527
- if (info->ne[i] <= 0) {
6528
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]);
6529
- return false;
6530
- }
6531
- }
6532
-
6533
- // prevent overflow for total number of elements
6534
- if (INT64_MAX/info->ne[1] <= info->ne[0]) {
6535
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]);
6536
- return false;
6537
- }
6538
-
6539
- if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) {
6540
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]);
6541
- return false;
6542
- }
6543
-
6544
- if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) {
6545
- fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]);
6546
- return false;
6547
- }
6548
-
6549
- return true;
6550
- }
6551
-
6552
- static bool lm_gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
6553
- const size_t n = fread(dst, 1, size, file);
6554
- *offset += n;
6555
- return n == size;
6556
- }
6557
-
6558
- static bool lm_gguf_fread_str(FILE * file, struct lm_gguf_str * p, size_t * offset) {
6559
- p->n = 0;
6560
- p->data = NULL;
6561
-
6562
- bool ok = true;
6563
-
6564
- ok = ok && lm_gguf_fread_el(file, &p->n, sizeof(p->n), offset);
6565
-
6566
- // early exit if string length is invalid, prevents from integer overflow
6567
- if (p->n == SIZE_MAX) {
6568
- fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
6569
- return false;
6570
- }
6571
-
6572
- p->data = calloc(p->n + 1, 1);
6573
- if (!p->data) {
6574
- fprintf(stderr, "%s: failed to allocate memory for string of length %" PRIu64 "\n", __func__, p->n);
6575
- return false;
6576
- }
6577
-
6578
- ok = ok && lm_gguf_fread_el(file, p->data, p->n, offset);
6579
-
6580
- return ok;
6581
- }
6582
-
6583
- static void lm_gguf_free_kv(struct lm_gguf_kv * kv) {
6584
- if (kv->key.data) {
6585
- LM_GGML_FREE(kv->key.data);
6586
- }
6587
-
6588
- if (kv->type == LM_GGUF_TYPE_STRING) {
6589
- if (kv->value.str.data) {
6590
- LM_GGML_FREE(kv->value.str.data);
6591
- }
6592
- }
6593
-
6594
- if (kv->type == LM_GGUF_TYPE_ARRAY) {
6595
- if (kv->value.arr.data) {
6596
- if (kv->value.arr.type == LM_GGUF_TYPE_STRING) {
6597
- for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
6598
- struct lm_gguf_str * str = &((struct lm_gguf_str *) kv->value.arr.data)[j];
6599
- if (str->data) {
6600
- LM_GGML_FREE(str->data);
6601
- }
6602
- }
6603
- }
6604
- LM_GGML_FREE(kv->value.arr.data);
6605
- }
6606
- }
6607
- }
6608
-
6609
- struct lm_gguf_context * lm_gguf_init_empty(void) {
6610
- struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
6611
- if (!ctx) {
6612
- fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6613
- return NULL;
6614
- }
6615
-
6616
- memcpy(ctx->header.magic, LM_GGUF_MAGIC, sizeof(ctx->header.magic));
6617
- ctx->header.version = LM_GGUF_VERSION;
6618
- ctx->header.n_tensors = 0;
6619
- ctx->header.n_kv = 0;
6620
-
6621
- ctx->kv = NULL;
6622
- ctx->infos = NULL;
6623
-
6624
- ctx->alignment = LM_GGUF_DEFAULT_ALIGNMENT;
6625
- ctx->offset = 0;
6626
- ctx->size = 0;
6627
-
6628
- ctx->data = NULL;
6629
-
6630
- return ctx;
6631
- }
6632
-
6633
- struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
6634
- FILE * file = lm_ggml_fopen(fname, "rb");
6635
- if (!file) {
6636
- fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
6637
- return NULL;
6638
- }
6639
-
6640
- // offset from start of file
6641
- size_t offset = 0;
6642
-
6643
- char magic[4];
6644
-
6645
- // check the magic before making allocations
6646
- {
6647
- lm_gguf_fread_el(file, &magic, sizeof(magic), &offset);
6648
-
6649
- for (uint32_t i = 0; i < sizeof(magic); i++) {
6650
- if (magic[i] != LM_GGUF_MAGIC[i]) {
6651
- fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
6652
- fclose(file);
6653
- return NULL;
6654
- }
6655
- }
6656
- }
6657
-
6658
- bool ok = true;
6659
-
6660
- struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
6661
- if (!ctx) {
6662
- fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6663
- fclose(file);
6664
- return NULL;
6665
- }
6666
-
6667
- // read the header
6668
- {
6669
- strncpy(ctx->header.magic, magic, 4);
6670
-
6671
- ctx->kv = NULL;
6672
- ctx->infos = NULL;
6673
- ctx->data = NULL;
6674
-
6675
- ok = ok && lm_gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
6676
- ok = ok && lm_gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
6677
- ok = ok && lm_gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
6678
-
6679
- if (ctx->header.version == 1) {
6680
- fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
6681
- fclose(file);
6682
- lm_gguf_free(ctx);
6683
- return NULL;
6684
- }
6685
-
6686
- // sanity-checks to prevent from integer/buffer overflows
6687
-
6688
- ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct lm_gguf_tensor_info));
6689
- ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/lm_ggml_tensor_overhead());
6690
- ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct lm_gguf_kv));
6691
-
6692
- if (!ok) {
6693
- fprintf(stderr, "%s: failed to read header\n", __func__);
6694
- fclose(file);
6695
- lm_gguf_free(ctx);
6696
- return NULL;
6697
- }
6698
- }
6699
-
6700
- // read the kv pairs
6701
- {
6702
- const uint64_t n_kv = ctx->header.n_kv;
6703
-
6704
- ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
6705
- if (!ctx->kv) {
6706
- fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6707
- fclose(file);
6708
- lm_gguf_free(ctx);
6709
- return NULL;
6710
- }
6711
-
6712
- for (uint64_t i = 0; i < n_kv; ++i) {
6713
- struct lm_gguf_kv * kv = &ctx->kv[i];
6714
-
6715
- //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
6716
-
6717
- ok = ok && lm_gguf_fread_str(file, &kv->key, &offset);
6718
- ok = ok && lm_gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
6719
-
6720
- //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
6721
-
6722
- switch (kv->type) {
6723
- case LM_GGUF_TYPE_UINT8: ok = ok && lm_gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
6724
- case LM_GGUF_TYPE_INT8: ok = ok && lm_gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
6725
- case LM_GGUF_TYPE_UINT16: ok = ok && lm_gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
6726
- case LM_GGUF_TYPE_INT16: ok = ok && lm_gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
6727
- case LM_GGUF_TYPE_UINT32: ok = ok && lm_gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
6728
- case LM_GGUF_TYPE_INT32: ok = ok && lm_gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
6729
- case LM_GGUF_TYPE_FLOAT32: ok = ok && lm_gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
6730
- case LM_GGUF_TYPE_UINT64: ok = ok && lm_gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
6731
- case LM_GGUF_TYPE_INT64: ok = ok && lm_gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
6732
- case LM_GGUF_TYPE_FLOAT64: ok = ok && lm_gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
6733
- case LM_GGUF_TYPE_BOOL: ok = ok && lm_gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
6734
- case LM_GGUF_TYPE_STRING: ok = ok && lm_gguf_fread_str(file, &kv->value.str, &offset); break;
6735
- case LM_GGUF_TYPE_ARRAY:
6736
- {
6737
- ok = ok && lm_gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
6738
- ok = ok && lm_gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
6739
-
6740
- switch (kv->value.arr.type) {
6741
- case LM_GGUF_TYPE_UINT8:
6742
- case LM_GGUF_TYPE_INT8:
6743
- case LM_GGUF_TYPE_UINT16:
6744
- case LM_GGUF_TYPE_INT16:
6745
- case LM_GGUF_TYPE_UINT32:
6746
- case LM_GGUF_TYPE_INT32:
6747
- case LM_GGUF_TYPE_FLOAT32:
6748
- case LM_GGUF_TYPE_UINT64:
6749
- case LM_GGUF_TYPE_INT64:
6750
- case LM_GGUF_TYPE_FLOAT64:
6751
- case LM_GGUF_TYPE_BOOL:
6752
- {
6753
- // prevent from integer overflow in the malloc below
6754
- if (kv->value.arr.n >= SIZE_MAX/lm_gguf_type_size(kv->value.arr.type)) {
6755
- fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6756
- fclose(file);
6757
- lm_gguf_free(ctx);
6758
- return NULL;
6759
- }
6760
-
6761
- kv->value.arr.data = calloc(kv->value.arr.n, lm_gguf_type_size(kv->value.arr.type));
6762
- if (!kv->value.arr.data) {
6763
- fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6764
- fclose(file);
6765
- lm_gguf_free(ctx);
6766
- return NULL;
6767
- }
6768
-
6769
- ok = ok && lm_gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * lm_gguf_type_size(kv->value.arr.type), &offset);
6770
- } break;
6771
- case LM_GGUF_TYPE_STRING:
6772
- {
6773
- // prevent from integer overflow in the malloc below
6774
- if (kv->value.arr.n >= SIZE_MAX/sizeof(struct lm_gguf_str)) {
6775
- fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6776
- fclose(file);
6777
- lm_gguf_free(ctx);
6778
- return NULL;
6779
- }
6780
-
6781
- kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct lm_gguf_str));
6782
- if (!kv->value.arr.data) {
6783
- fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6784
- fclose(file);
6785
- lm_gguf_free(ctx);
6786
- return NULL;
6787
- }
6788
-
6789
- for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
6790
- ok = ok && lm_gguf_fread_str(file, &((struct lm_gguf_str *) kv->value.arr.data)[j], &offset);
6791
- }
6792
- } break;
6793
- case LM_GGUF_TYPE_ARRAY:
6794
- default:
6795
- {
6796
- fprintf(stderr, "%s: invalid array type %d\n", __func__, kv->value.arr.type);
6797
- ok = false;
6798
- } break;
6799
- }
6800
- } break;
6801
- default:
6802
- {
6803
- fprintf(stderr, "%s: invalid type %d\n", __func__, kv->type);
6804
- ok = false;
6805
- } break;
6806
- }
6807
-
6808
- if (!ok) {
6809
- break;
6810
- }
6811
- }
6812
-
6813
- if (!ok) {
6814
- fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
6815
- fclose(file);
6816
- lm_gguf_free(ctx);
6817
- return NULL;
6818
- }
6819
- }
6820
-
6821
- // read the tensor infos
6822
- if (ctx->header.n_tensors > 0) {
6823
- ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct lm_gguf_tensor_info));
6824
- if (!ctx->infos) {
6825
- fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
6826
- fclose(file);
6827
- lm_gguf_free(ctx);
6828
- return NULL;
6829
- }
6830
-
6831
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
6832
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
6833
-
6834
- for (int j = 0; j < LM_GGML_MAX_DIMS; ++j) {
6835
- info->ne[j] = 1;
6836
- }
6837
-
6838
- ok = ok && lm_gguf_fread_str(file, &info->name, &offset);
6839
- ok = ok && lm_gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
6840
-
6841
- ok = ok && (info->n_dims <= LM_GGML_MAX_DIMS);
6842
-
6843
- for (uint32_t j = 0; j < info->n_dims; ++j) {
6844
- ok = ok && lm_gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
6845
- }
6846
-
6847
- ok = ok && lm_gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
6848
- ok = ok && lm_gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
6849
-
6850
- ok = ok && lm_gguf_tensor_info_sanitize(info);
6851
-
6852
- // make sure there is no duplicated tensor names
6853
- for (uint64_t j = 0; j < i && ok; ++j) {
6854
- if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
6855
- fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
6856
- ok = false;
6857
- }
6858
- }
6859
-
6860
- if (!ok) {
6861
- fprintf(stderr, "%s: failed to read tensor info\n", __func__);
6862
- fclose(file);
6863
- lm_gguf_free(ctx);
6864
- return NULL;
6865
- }
6866
- }
6867
- }
6868
-
6869
- ctx->alignment = LM_GGUF_DEFAULT_ALIGNMENT;
6870
-
6871
- int alignment_idx = lm_gguf_find_key(ctx, "general.alignment");
6872
- if (alignment_idx != -1) {
6873
- ctx->alignment = lm_gguf_get_val_u32(ctx, alignment_idx);
6874
- }
6875
-
6876
- // we require the data section to be aligned, so take into account any padding
6877
- {
6878
- const size_t offset_pad = offset % ctx->alignment;
6879
-
6880
- if (offset_pad != 0) {
6881
- offset += ctx->alignment - offset_pad;
6882
- fseek(file, offset, SEEK_SET);
6883
- }
6884
- }
6885
-
6886
- // store the current file offset - this is where the data section starts
6887
- ctx->offset = offset;
6888
-
6889
- // compute the total size of the data section, taking into account the alignment
6890
- {
6891
- ctx->size = 0;
6892
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
6893
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
6894
-
6895
- const int64_t ne =
6896
- (int64_t) info->ne[0] *
6897
- (int64_t) info->ne[1] *
6898
- (int64_t) info->ne[2] *
6899
- (int64_t) info->ne[3];
6900
-
6901
- if (lm_ggml_blck_size(info->type) == 0 ) {
6902
- // this tensor type support have been removed:
6903
- fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
6904
- __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type));
6905
- fclose(file);
6906
- lm_gguf_free(ctx);
6907
- return NULL;
6908
- }
6909
-
6910
- if (ne % lm_ggml_blck_size(info->type) != 0) {
6911
- fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
6912
- __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type), ne, lm_ggml_blck_size(info->type));
6913
- fclose(file);
6914
- lm_gguf_free(ctx);
6915
- return NULL;
6916
- }
6917
-
6918
- const size_t size_cur = lm_ggml_row_size(info->type, ne);
6919
-
6920
- ctx->size += LM_GGML_PAD(size_cur, ctx->alignment);
6921
- }
6922
- }
6923
-
6924
- // load the tensor data only if requested
6925
- if (params.ctx != NULL) {
6926
- // if the provided lm_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
6927
- // otherwise, we load the binary blob into the created lm_ggml_context as well, and point the "data" members of
6928
- // the lm_ggml_tensor structs to the appropriate locations in the binary blob
6929
-
6930
- // compute the exact size needed for the new lm_ggml_context
6931
- const size_t mem_size =
6932
- params.no_alloc ?
6933
- (ctx->header.n_tensors )*lm_ggml_tensor_overhead() :
6934
- (ctx->header.n_tensors + 1)*lm_ggml_tensor_overhead() + ctx->size;
6935
-
6936
- struct lm_ggml_init_params pdata = {
6937
- .mem_size = mem_size,
6938
- .mem_buffer = NULL,
6939
- .no_alloc = params.no_alloc,
6940
- };
6941
-
6942
- *params.ctx = lm_ggml_init(pdata);
6943
- if (*params.ctx == NULL) {
6944
- fprintf(stderr, "%s: failed to initialize context\n", __func__);
6945
- fclose(file);
6946
- lm_gguf_free(ctx);
6947
- return NULL;
6948
- }
6949
-
6950
- struct lm_ggml_context * ctx_data = *params.ctx;
6951
-
6952
- struct lm_ggml_tensor * data = NULL;
6953
-
6954
- if (!params.no_alloc) {
6955
- data = lm_ggml_new_tensor_1d(ctx_data, LM_GGML_TYPE_I8, ctx->size);
6956
-
6957
- ok = ok && data != NULL;
6958
-
6959
- // read the binary blob with the tensor data
6960
- ok = ok && lm_gguf_fread_el(file, data->data, ctx->size, &offset);
6961
-
6962
- if (!ok) {
6963
- fprintf(stderr, "%s: failed to read tensor data\n", __func__);
6964
- fclose(file);
6965
- lm_ggml_free(ctx_data);
6966
- lm_gguf_free(ctx);
6967
- return NULL;
6968
- }
6969
-
6970
- ctx->data = data->data;
6971
- }
6972
-
6973
- lm_ggml_set_no_alloc(ctx_data, true);
6974
-
6975
- // create the tensors
6976
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
6977
- const int64_t ne[LM_GGML_MAX_DIMS] = {
6978
- ctx->infos[i].ne[0],
6979
- ctx->infos[i].ne[1],
6980
- ctx->infos[i].ne[2],
6981
- ctx->infos[i].ne[3],
6982
- };
6983
-
6984
- struct lm_ggml_tensor * cur = lm_ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne);
6985
-
6986
- ok = ok && cur != NULL;
6987
-
6988
- if (!ok) {
6989
- break;
6990
- }
6991
-
6992
- lm_ggml_set_name(cur, ctx->infos[i].name.data);
6993
-
6994
- // point the data member to the appropriate location in the binary blob using the tensor infos
6995
- if (!params.no_alloc) {
6996
- //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
6997
- cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
6998
- }
6999
- }
7000
-
7001
- if (!ok) {
7002
- fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
7003
- fclose(file);
7004
- lm_ggml_free(ctx_data);
7005
- lm_gguf_free(ctx);
7006
- return NULL;
7007
- }
7008
-
7009
- lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
7010
- }
7011
-
7012
- fclose(file);
7013
-
7014
- return ctx;
7015
- }
7016
-
7017
- void lm_gguf_free(struct lm_gguf_context * ctx) {
7018
- if (ctx == NULL) {
7019
- return;
7020
- }
7021
-
7022
- if (ctx->kv) {
7023
- // free string memory - not great..
7024
- for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
7025
- lm_gguf_free_kv(&ctx->kv[i]);
7026
- }
7027
-
7028
- LM_GGML_FREE(ctx->kv);
7029
- }
7030
-
7031
- if (ctx->infos) {
7032
- for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
7033
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
7034
-
7035
- if (info->name.data) {
7036
- LM_GGML_FREE(info->name.data);
7037
- }
7038
- }
7039
-
7040
- LM_GGML_FREE(ctx->infos);
7041
- }
7042
-
7043
- LM_GGML_FREE(ctx);
7044
- }
7045
-
7046
- const char * lm_gguf_type_name(enum lm_gguf_type type) {
7047
- return LM_GGUF_TYPE_NAME[type];
7048
- }
7049
-
7050
- int lm_gguf_get_version(const struct lm_gguf_context * ctx) {
7051
- return ctx->header.version;
7052
- }
7053
-
7054
- size_t lm_gguf_get_alignment(const struct lm_gguf_context * ctx) {
7055
- return ctx->alignment;
7056
- }
7057
-
7058
- size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx) {
7059
- return ctx->offset;
7060
- }
7061
-
7062
- void * lm_gguf_get_data(const struct lm_gguf_context * ctx) {
7063
- return ctx->data;
7064
- }
7065
-
7066
- int lm_gguf_get_n_kv(const struct lm_gguf_context * ctx) {
7067
- return ctx->header.n_kv;
7068
- }
7069
-
7070
- int lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key) {
7071
- // return -1 if key not found
7072
- int keyfound = -1;
7073
-
7074
- const int n_kv = lm_gguf_get_n_kv(ctx);
7075
-
7076
- for (int i = 0; i < n_kv; ++i) {
7077
- if (strcmp(key, lm_gguf_get_key(ctx, i)) == 0) {
7078
- keyfound = i;
7079
- break;
7080
- }
7081
- }
7082
-
7083
- return keyfound;
7084
- }
7085
-
7086
- const char * lm_gguf_get_key(const struct lm_gguf_context * ctx, int key_id) {
7087
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7088
- return ctx->kv[key_id].key.data;
7089
- }
7090
-
7091
- enum lm_gguf_type lm_gguf_get_kv_type(const struct lm_gguf_context * ctx, int key_id) {
7092
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7093
- return ctx->kv[key_id].type;
7094
- }
7095
-
7096
- enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int key_id) {
7097
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7098
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7099
- return ctx->kv[key_id].value.arr.type;
7100
- }
7101
-
7102
- const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int key_id) {
7103
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7104
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7105
- return ctx->kv[key_id].value.arr.data;
7106
- }
7107
-
7108
- const char * lm_gguf_get_arr_str(const struct lm_gguf_context * ctx, int key_id, int i) {
7109
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7110
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7111
- struct lm_gguf_kv * kv = &ctx->kv[key_id];
7112
- struct lm_gguf_str * str = &((struct lm_gguf_str *) kv->value.arr.data)[i];
7113
- return str->data;
7114
- }
7115
-
7116
- int lm_gguf_get_arr_n(const struct lm_gguf_context * ctx, int key_id) {
7117
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7118
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY);
7119
- return ctx->kv[key_id].value.arr.n;
7120
- }
7121
-
7122
- uint8_t lm_gguf_get_val_u8(const struct lm_gguf_context * ctx, int key_id) {
7123
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7124
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT8);
7125
- return ctx->kv[key_id].value.uint8;
7126
- }
7127
-
7128
- int8_t lm_gguf_get_val_i8(const struct lm_gguf_context * ctx, int key_id) {
7129
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7130
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT8);
7131
- return ctx->kv[key_id].value.int8;
7132
- }
7133
-
7134
- uint16_t lm_gguf_get_val_u16(const struct lm_gguf_context * ctx, int key_id) {
7135
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7136
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT16);
7137
- return ctx->kv[key_id].value.uint16;
7138
- }
7139
-
7140
- int16_t lm_gguf_get_val_i16(const struct lm_gguf_context * ctx, int key_id) {
7141
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7142
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT16);
7143
- return ctx->kv[key_id].value.int16;
7144
- }
7145
-
7146
- uint32_t lm_gguf_get_val_u32(const struct lm_gguf_context * ctx, int key_id) {
7147
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7148
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT32);
7149
- return ctx->kv[key_id].value.uint32;
7150
- }
7151
-
7152
- int32_t lm_gguf_get_val_i32(const struct lm_gguf_context * ctx, int key_id) {
7153
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7154
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT32);
7155
- return ctx->kv[key_id].value.int32;
7156
- }
7157
-
7158
- float lm_gguf_get_val_f32(const struct lm_gguf_context * ctx, int key_id) {
7159
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7160
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_FLOAT32);
7161
- return ctx->kv[key_id].value.float32;
7162
- }
7163
-
7164
- uint64_t lm_gguf_get_val_u64(const struct lm_gguf_context * ctx, int key_id) {
7165
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7166
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT64);
7167
- return ctx->kv[key_id].value.uint64;
7168
- }
7169
-
7170
- int64_t lm_gguf_get_val_i64(const struct lm_gguf_context * ctx, int key_id) {
7171
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7172
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT64);
7173
- return ctx->kv[key_id].value.int64;
7174
- }
7175
-
7176
- double lm_gguf_get_val_f64(const struct lm_gguf_context * ctx, int key_id) {
7177
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7178
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_FLOAT64);
7179
- return ctx->kv[key_id].value.float64;
7180
- }
7181
-
7182
- bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int key_id) {
7183
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7184
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_BOOL);
7185
- return ctx->kv[key_id].value.bool_;
7186
- }
7187
-
7188
- const char * lm_gguf_get_val_str(const struct lm_gguf_context * ctx, int key_id) {
7189
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7190
- LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_STRING);
7191
- return ctx->kv[key_id].value.str.data;
7192
- }
7193
-
7194
- const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int key_id) {
7195
- LM_GGML_ASSERT(key_id >= 0 && key_id < lm_gguf_get_n_kv(ctx));
7196
- LM_GGML_ASSERT(ctx->kv[key_id].type != LM_GGUF_TYPE_ARRAY);
7197
- LM_GGML_ASSERT(ctx->kv[key_id].type != LM_GGUF_TYPE_STRING);
7198
- return &ctx->kv[key_id].value;
7199
- }
7200
-
7201
- int lm_gguf_get_n_tensors(const struct lm_gguf_context * ctx) {
7202
- return ctx->header.n_tensors;
7203
- }
7204
-
7205
- int lm_gguf_find_tensor(const struct lm_gguf_context * ctx, const char * name) {
7206
- // return -1 if tensor not found
7207
- int tensorfound = -1;
7208
-
7209
- const int n_tensors = lm_gguf_get_n_tensors(ctx);
7210
-
7211
- for (int i = 0; i < n_tensors; ++i) {
7212
- if (strcmp(name, lm_gguf_get_tensor_name(ctx, i)) == 0) {
7213
- tensorfound = i;
7214
- break;
7215
- }
7216
- }
7217
-
7218
- return tensorfound;
7219
- }
7220
-
7221
- size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int i) {
7222
- return ctx->infos[i].offset;
7223
- }
7224
-
7225
- char * lm_gguf_get_tensor_name(const struct lm_gguf_context * ctx, int i) {
7226
- return ctx->infos[i].name.data;
7227
- }
7228
-
7229
- enum lm_ggml_type lm_gguf_get_tensor_type(const struct lm_gguf_context * ctx, int i) {
7230
- return ctx->infos[i].type;
7231
- }
7232
-
7233
- // returns the index
7234
- static int lm_gguf_get_or_add_key(struct lm_gguf_context * ctx, const char * key) {
7235
- const int idx = lm_gguf_find_key(ctx, key);
7236
- if (idx >= 0) {
7237
- return idx;
7238
- }
7239
-
7240
- const int n_kv = lm_gguf_get_n_kv(ctx);
7241
-
7242
- ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct lm_gguf_kv));
7243
- ctx->kv[n_kv].key.n = strlen(key);
7244
- ctx->kv[n_kv].key.data = strdup(key);
7245
- ctx->header.n_kv++;
7246
-
7247
- return n_kv;
7248
- }
7249
-
7250
- void lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key) {
7251
- const int idx = lm_gguf_find_key(ctx, key);
7252
- if (idx >= 0) {
7253
- const int n_kv = lm_gguf_get_n_kv(ctx);
7254
- lm_gguf_free_kv(&ctx->kv[idx]);
7255
- for (int i = idx; i < n_kv-1; ++i) {
7256
- ctx->kv[i] = ctx->kv[i+1];
7257
- }
7258
- ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct lm_gguf_kv));
7259
- ctx->header.n_kv--;
7260
- }
7261
- }
7262
-
7263
- void lm_gguf_set_val_u8(struct lm_gguf_context * ctx, const char * key, uint8_t val) {
7264
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7265
-
7266
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT8;
7267
- ctx->kv[idx].value.uint8 = val;
7268
- }
7269
-
7270
- void lm_gguf_set_val_i8(struct lm_gguf_context * ctx, const char * key, int8_t val) {
7271
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7272
-
7273
- ctx->kv[idx].type = LM_GGUF_TYPE_INT8;
7274
- ctx->kv[idx].value.int8 = val;
7275
- }
7276
-
7277
- void lm_gguf_set_val_u16(struct lm_gguf_context * ctx, const char * key, uint16_t val) {
7278
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7279
-
7280
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT16;
7281
- ctx->kv[idx].value.uint16 = val;
7282
- }
7283
-
7284
- void lm_gguf_set_val_i16(struct lm_gguf_context * ctx, const char * key, int16_t val) {
7285
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7286
-
7287
- ctx->kv[idx].type = LM_GGUF_TYPE_INT16;
7288
- ctx->kv[idx].value.int16 = val;
7289
- }
7290
-
7291
- void lm_gguf_set_val_u32(struct lm_gguf_context * ctx, const char * key, uint32_t val) {
7292
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7293
-
7294
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT32;
7295
- ctx->kv[idx].value.uint32 = val;
7296
- }
7297
-
7298
- void lm_gguf_set_val_i32(struct lm_gguf_context * ctx, const char * key, int32_t val) {
7299
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7300
-
7301
- ctx->kv[idx].type = LM_GGUF_TYPE_INT32;
7302
- ctx->kv[idx].value.int32 = val;
7303
- }
7304
-
7305
- void lm_gguf_set_val_f32(struct lm_gguf_context * ctx, const char * key, float val) {
7306
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7307
-
7308
- ctx->kv[idx].type = LM_GGUF_TYPE_FLOAT32;
7309
- ctx->kv[idx].value.float32 = val;
7310
- }
7311
-
7312
- void lm_gguf_set_val_u64(struct lm_gguf_context * ctx, const char * key, uint64_t val) {
7313
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7314
-
7315
- ctx->kv[idx].type = LM_GGUF_TYPE_UINT64;
7316
- ctx->kv[idx].value.uint64 = val;
7317
- }
7318
-
7319
- void lm_gguf_set_val_i64(struct lm_gguf_context * ctx, const char * key, int64_t val) {
7320
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7321
-
7322
- ctx->kv[idx].type = LM_GGUF_TYPE_INT64;
7323
- ctx->kv[idx].value.int64 = val;
7324
- }
7325
-
7326
- void lm_gguf_set_val_f64(struct lm_gguf_context * ctx, const char * key, double val) {
7327
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7328
-
7329
- ctx->kv[idx].type = LM_GGUF_TYPE_FLOAT64;
7330
- ctx->kv[idx].value.float64 = val;
7331
- }
7332
-
7333
- void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val) {
7334
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7335
-
7336
- ctx->kv[idx].type = LM_GGUF_TYPE_BOOL;
7337
- ctx->kv[idx].value.bool_ = val;
7338
- }
7339
-
7340
- void lm_gguf_set_val_str(struct lm_gguf_context * ctx, const char * key, const char * val) {
7341
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7342
-
7343
- ctx->kv[idx].type = LM_GGUF_TYPE_STRING;
7344
- ctx->kv[idx].value.str.n = strlen(val);
7345
- ctx->kv[idx].value.str.data = strdup(val);
7346
- }
7347
-
7348
- void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, int n) {
7349
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7350
-
7351
- ctx->kv[idx].type = LM_GGUF_TYPE_ARRAY;
7352
- ctx->kv[idx].value.arr.type = type;
7353
- ctx->kv[idx].value.arr.n = n;
7354
- ctx->kv[idx].value.arr.data = LM_GGML_CALLOC(n, lm_gguf_type_size(type));
7355
- memcpy(ctx->kv[idx].value.arr.data, data, n*lm_gguf_type_size(type));
7356
- }
7357
-
7358
- void lm_gguf_set_arr_str(struct lm_gguf_context * ctx, const char * key, const char ** data, int n) {
7359
- const int idx = lm_gguf_get_or_add_key(ctx, key);
7360
-
7361
- ctx->kv[idx].type = LM_GGUF_TYPE_ARRAY;
7362
- ctx->kv[idx].value.arr.type = LM_GGUF_TYPE_STRING;
7363
- ctx->kv[idx].value.arr.n = n;
7364
- ctx->kv[idx].value.arr.data = LM_GGML_CALLOC(n, sizeof(struct lm_gguf_str));
7365
- for (int i = 0; i < n; i++) {
7366
- struct lm_gguf_str * str = &((struct lm_gguf_str *)ctx->kv[idx].value.arr.data)[i];
7367
- str->n = strlen(data[i]);
7368
- str->data = strdup(data[i]);
7369
- }
7370
- }
7371
-
7372
- // set or add KV pairs from another context
7373
- void lm_gguf_set_kv(struct lm_gguf_context * ctx, struct lm_gguf_context * src) {
7374
- for (uint32_t i = 0; i < src->header.n_kv; i++) {
7375
- switch (src->kv[i].type) {
7376
- case LM_GGUF_TYPE_UINT8: lm_gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break;
7377
- case LM_GGUF_TYPE_INT8: lm_gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break;
7378
- case LM_GGUF_TYPE_UINT16: lm_gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break;
7379
- case LM_GGUF_TYPE_INT16: lm_gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break;
7380
- case LM_GGUF_TYPE_UINT32: lm_gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
7381
- case LM_GGUF_TYPE_INT32: lm_gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
7382
- case LM_GGUF_TYPE_FLOAT32: lm_gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
7383
- case LM_GGUF_TYPE_UINT64: lm_gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
7384
- case LM_GGUF_TYPE_INT64: lm_gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
7385
- case LM_GGUF_TYPE_FLOAT64: lm_gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
7386
- case LM_GGUF_TYPE_BOOL: lm_gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
7387
- case LM_GGUF_TYPE_STRING: lm_gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
7388
- case LM_GGUF_TYPE_ARRAY:
7389
- {
7390
- if (src->kv[i].value.arr.type == LM_GGUF_TYPE_STRING) {
7391
- const char ** data = LM_GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
7392
- for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
7393
- data[j] = ((struct lm_gguf_str *)src->kv[i].value.arr.data)[j].data;
7394
- }
7395
- lm_gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
7396
- LM_GGML_FREE((void *)data);
7397
- } else if (src->kv[i].value.arr.type == LM_GGUF_TYPE_ARRAY) {
7398
- LM_GGML_ABORT("nested arrays not supported");
7399
- } else {
7400
- lm_gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
7401
- }
7402
- } break;
7403
- default: LM_GGML_ABORT("invalid type");
7404
- }
7405
- }
7406
- }
7407
-
7408
- void lm_gguf_add_tensor(
7409
- struct lm_gguf_context * ctx,
7410
- const struct lm_ggml_tensor * tensor) {
7411
- LM_GGML_ASSERT(tensor);
7412
- if (lm_gguf_find_tensor(ctx, tensor->name) != -1) {
7413
- LM_GGML_ABORT("duplicated tensor name");
7414
- }
7415
-
7416
- const int idx = ctx->header.n_tensors;
7417
- ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct lm_gguf_tensor_info));
7418
-
7419
- ctx->infos[idx].name.n = strlen(tensor->name);
7420
- ctx->infos[idx].name.data = strdup(tensor->name);
7421
-
7422
- for (int i = 0; i < LM_GGML_MAX_DIMS; ++i) {
7423
- ctx->infos[idx].ne[i] = 1;
7424
- }
7425
-
7426
- ctx->infos[idx].n_dims = lm_ggml_n_dims(tensor);
7427
- for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
7428
- ctx->infos[idx].ne[i] = tensor->ne[i];
7429
- }
7430
-
7431
- ctx->infos[idx].type = tensor->type;
7432
- ctx->infos[idx].offset = 0;
7433
- ctx->infos[idx].data = tensor->data;
7434
- ctx->infos[idx].size = lm_ggml_nbytes(tensor);
7435
-
7436
- if (ctx->header.n_tensors > 0) {
7437
- ctx->infos[idx].offset = ctx->infos[idx - 1].offset + LM_GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
7438
- }
7439
-
7440
- ctx->header.n_tensors++;
7441
- }
7442
-
7443
- void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type) {
7444
- const int idx = lm_gguf_find_tensor(ctx, name);
7445
- if (idx < 0) {
7446
- LM_GGML_ABORT("tensor not found");
7447
- }
7448
-
7449
- ctx->infos[idx].type = type;
7450
- }
7451
-
7452
- void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data, size_t size) {
7453
- const int idx = lm_gguf_find_tensor(ctx, name);
7454
- if (idx < 0) {
7455
- LM_GGML_ABORT("tensor not found");
7456
- }
7457
-
7458
- ctx->infos[idx].data = data;
7459
- ctx->infos[idx].size = size;
7460
-
7461
- // update offsets
7462
- for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
7463
- ctx->infos[i].offset = ctx->infos[i - 1].offset + LM_GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
7464
- }
7465
- }
7466
-
7467
- //static void lm_gguf_fwrite_str(FILE * file, const struct lm_gguf_str * val) {
7468
- // fwrite(&val->n, sizeof(val->n), 1, file);
7469
- // fwrite(val->data, sizeof(char), val->n, file);
7470
- //}
7471
- //
7472
- //static void lm_gguf_fwrite_el(FILE * file, const void * val, size_t size) {
7473
- // fwrite(val, sizeof(char), size, file);
7474
- //}
7475
-
7476
- struct lm_gguf_buf {
7477
- void * data;
7478
- size_t size;
7479
- size_t offset;
7480
- };
7481
-
7482
- static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7483
- struct lm_gguf_buf buf = {
7484
- /*buf.data =*/ size == 0 ? NULL : LM_GGML_CALLOC(1, size),
7485
- /*buf.size =*/ size,
7486
- /*buf.offset =*/ 0,
7487
- };
7488
-
7489
- return buf;
7490
- }
7491
-
7492
- static void lm_gguf_buf_free(struct lm_gguf_buf buf) {
7493
- if (buf.data) {
7494
- LM_GGML_FREE(buf.data);
7495
- }
7496
- }
7497
-
7498
- static void lm_gguf_buf_grow(struct lm_gguf_buf * buf, size_t size) {
7499
- if (buf->offset + size > buf->size) {
7500
- buf->size = 1.5*(buf->offset + size);
7501
- if (buf->data) {
7502
- buf->data = realloc(buf->data, buf->size);
7503
- }
7504
- }
7505
- }
7506
-
7507
- static void lm_gguf_bwrite_str(struct lm_gguf_buf * buf, const struct lm_gguf_str * val) {
7508
- lm_gguf_buf_grow(buf, sizeof(val->n) + val->n);
7509
-
7510
- if (buf->data) {
7511
- memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
7512
- }
7513
- buf->offset += sizeof(val->n);
7514
-
7515
- if (buf->data) {
7516
- memcpy((char *) buf->data + buf->offset, val->data, val->n);
7517
- }
7518
- buf->offset += val->n;
7519
- }
7520
-
7521
- static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t el_size) {
7522
- lm_gguf_buf_grow(buf, el_size);
7523
-
7524
- if (buf->data) {
7525
- memcpy((char *) buf->data + buf->offset, val, el_size);
7526
- }
7527
- buf->offset += el_size;
7528
- }
7529
-
7530
- static void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
7531
- // write header
7532
- lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
7533
- lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
7534
- lm_gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
7535
- lm_gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
7536
-
7537
- // write key-value pairs
7538
- for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
7539
- struct lm_gguf_kv * kv = &ctx->kv[i];
7540
-
7541
- lm_gguf_bwrite_str(buf, &kv->key);
7542
- lm_gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
7543
-
7544
- switch (kv->type) {
7545
- case LM_GGUF_TYPE_UINT8: lm_gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
7546
- case LM_GGUF_TYPE_INT8: lm_gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
7547
- case LM_GGUF_TYPE_UINT16: lm_gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
7548
- case LM_GGUF_TYPE_INT16: lm_gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;
7549
- case LM_GGUF_TYPE_UINT32: lm_gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
7550
- case LM_GGUF_TYPE_INT32: lm_gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
7551
- case LM_GGUF_TYPE_FLOAT32: lm_gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
7552
- case LM_GGUF_TYPE_UINT64: lm_gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
7553
- case LM_GGUF_TYPE_INT64: lm_gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
7554
- case LM_GGUF_TYPE_FLOAT64: lm_gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
7555
- case LM_GGUF_TYPE_BOOL: lm_gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
7556
- case LM_GGUF_TYPE_STRING: lm_gguf_bwrite_str(buf, &kv->value.str ); break;
7557
- case LM_GGUF_TYPE_ARRAY:
7558
- {
7559
- lm_gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
7560
- lm_gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) );
7561
-
7562
- switch (kv->value.arr.type) {
7563
- case LM_GGUF_TYPE_UINT8:
7564
- case LM_GGUF_TYPE_INT8:
7565
- case LM_GGUF_TYPE_UINT16:
7566
- case LM_GGUF_TYPE_INT16:
7567
- case LM_GGUF_TYPE_UINT32:
7568
- case LM_GGUF_TYPE_INT32:
7569
- case LM_GGUF_TYPE_FLOAT32:
7570
- case LM_GGUF_TYPE_UINT64:
7571
- case LM_GGUF_TYPE_INT64:
7572
- case LM_GGUF_TYPE_FLOAT64:
7573
- case LM_GGUF_TYPE_BOOL:
7574
- {
7575
- lm_gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * lm_gguf_type_size(kv->value.arr.type));
7576
- } break;
7577
- case LM_GGUF_TYPE_STRING:
7578
- {
7579
- for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
7580
- lm_gguf_bwrite_str(buf, &((struct lm_gguf_str *) kv->value.arr.data)[j]);
7581
- }
7582
- } break;
7583
- case LM_GGUF_TYPE_ARRAY:
7584
- default: LM_GGML_ABORT("invalid type");
7585
- }
7586
- } break;
7587
- default: LM_GGML_ABORT("invalid type");
7588
- }
7589
- }
7590
-
7591
- // write tensor infos
7592
- for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
7593
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
7594
-
7595
- lm_gguf_bwrite_str(buf, &info->name);
7596
- lm_gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
7597
- for (uint32_t j = 0; j < info->n_dims; ++j) {
7598
- lm_gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
7599
- }
7600
- lm_gguf_bwrite_el(buf, &info->type, sizeof(info->type));
7601
- lm_gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
7602
- }
7603
-
7604
- // we require the data section to be aligned, so take into account any padding
7605
- {
7606
- const size_t offset = buf->offset;
7607
- const size_t offset_pad = LM_GGML_PAD(offset, ctx->alignment);
7608
-
7609
- if (offset_pad != offset) {
7610
- uint8_t pad = 0;
7611
- for (size_t i = 0; i < offset_pad - offset; ++i) {
7612
- lm_gguf_bwrite_el(buf, &pad, sizeof(pad));
7613
- }
7614
- }
7615
- }
7616
-
7617
- if (only_meta) {
7618
- return;
7619
- }
7620
-
7621
- size_t offset = 0;
7622
-
7623
- // write tensor data
7624
- for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
7625
- struct lm_gguf_tensor_info * info = &ctx->infos[i];
7626
-
7627
- const size_t size = info->size;
7628
- const size_t size_pad = LM_GGML_PAD(size, ctx->alignment);
7629
-
7630
- lm_gguf_bwrite_el(buf, info->data, size);
7631
-
7632
- if (size_pad != size) {
7633
- uint8_t pad = 0;
7634
- for (size_t j = 0; j < size_pad - size; ++j) {
7635
- lm_gguf_bwrite_el(buf, &pad, sizeof(pad));
7636
- }
7637
- }
7638
-
7639
- LM_GGML_ASSERT(offset == info->offset);
7640
-
7641
- offset += size_pad;
7642
- }
7643
- }
7644
-
7645
- void lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta) {
7646
- FILE * file = lm_ggml_fopen(fname, "wb");
7647
- if (!file) {
7648
- LM_GGML_ABORT("failed to open file for writing");
7649
- }
7650
-
7651
- struct lm_gguf_buf buf = lm_gguf_buf_init(16*1024);
7652
-
7653
- lm_gguf_write_to_buf(ctx, &buf, only_meta);
7654
-
7655
- fwrite(buf.data, 1, buf.offset, file);
7656
-
7657
- lm_gguf_buf_free(buf);
7658
-
7659
- fclose(file);
7660
- }
7661
-
7662
- size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx) {
7663
- // no allocs - only compute size
7664
- struct lm_gguf_buf buf = lm_gguf_buf_init(0);
7665
-
7666
- lm_gguf_write_to_buf(ctx, &buf, true);
7667
-
7668
- return buf.offset;
7669
- }
7670
-
7671
- void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data) {
7672
- struct lm_gguf_buf buf = lm_gguf_buf_init(16*1024);
7673
-
7674
- lm_gguf_write_to_buf(ctx, &buf, true);
7675
-
7676
- memcpy(data, buf.data, buf.offset);
7677
-
7678
- lm_gguf_buf_free(buf);
7679
- }
7680
-
7681
6422
  void lm_ggml_log_set(lm_ggml_log_callback log_callback, void * user_data) {
7682
6423
  g_logger_state.log_callback = log_callback ? log_callback : lm_ggml_log_callback_default;
7683
6424
  g_logger_state.log_callback_user_data = user_data;