cui-llama.rn 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/android/src/main/CMakeLists.txt +5 -7
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
  3. package/android/src/main/jni.cpp +9 -9
  4. package/cpp/common.cpp +28 -44
  5. package/cpp/common.h +35 -14
  6. package/cpp/ggml-alloc.c +0 -1
  7. package/cpp/ggml-backend-impl.h +38 -20
  8. package/cpp/ggml-backend-reg.cpp +246 -92
  9. package/cpp/ggml-backend.h +1 -0
  10. package/cpp/ggml-common.h +42 -48
  11. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +642 -223
  12. package/cpp/ggml-cpu-aarch64.h +2 -26
  13. package/cpp/ggml-cpu-traits.cpp +36 -0
  14. package/cpp/ggml-cpu-traits.h +38 -0
  15. package/cpp/ggml-cpu.c +14122 -13971
  16. package/cpp/ggml-cpu.cpp +627 -715
  17. package/cpp/ggml-cpu.h +0 -17
  18. package/cpp/ggml-impl.h +22 -6
  19. package/cpp/ggml-metal.m +482 -24
  20. package/cpp/ggml-quants.c +0 -9
  21. package/cpp/ggml-threading.h +4 -2
  22. package/cpp/ggml.c +284 -178
  23. package/cpp/ggml.h +73 -25
  24. package/cpp/llama-grammar.cpp +15 -15
  25. package/cpp/llama-grammar.h +2 -5
  26. package/cpp/llama-sampling.cpp +35 -90
  27. package/cpp/llama-vocab.cpp +7 -2
  28. package/cpp/llama-vocab.h +1 -1
  29. package/cpp/llama.cpp +1782 -586
  30. package/cpp/llama.h +20 -19
  31. package/cpp/sampling.cpp +11 -16
  32. package/cpp/sgemm.cpp +265 -258
  33. package/cpp/sgemm.h +2 -2
  34. package/cpp/speculative.cpp +4 -0
  35. package/cpp/unicode.cpp +51 -51
  36. package/cpp/unicode.h +9 -10
  37. package/lib/commonjs/index.js +38 -1
  38. package/lib/commonjs/index.js.map +1 -1
  39. package/lib/module/index.js +36 -0
  40. package/lib/module/index.js.map +1 -1
  41. package/lib/typescript/NativeRNLlama.d.ts +2 -3
  42. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  43. package/lib/typescript/index.d.ts +36 -2
  44. package/lib/typescript/index.d.ts.map +1 -1
  45. package/package.json +1 -1
  46. package/src/NativeRNLlama.ts +3 -3
  47. package/src/index.ts +46 -2
  48. package/cpp/amx/amx.cpp +0 -196
  49. package/cpp/amx/amx.h +0 -20
  50. package/cpp/amx/common.h +0 -101
  51. package/cpp/amx/mmq.cpp +0 -2524
  52. package/cpp/amx/mmq.h +0 -16
  53. package/cpp/ggml-aarch64.c +0 -129
  54. package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml.c CHANGED
@@ -8,7 +8,10 @@
8
8
 
9
9
  // FIXME: required here for quantization functions
10
10
  #include "ggml-quants.h"
11
- #include "ggml-aarch64.h"
11
+
12
+ #ifdef LM_GGML_USE_CPU_HBM
13
+ #include <hbwmalloc.h>
14
+ #endif
12
15
 
13
16
  #if defined(_MSC_VER) || defined(__MINGW32__)
14
17
  #include <malloc.h> // using malloc.h with MSC/MINGW
@@ -801,32 +804,23 @@ static const struct lm_ggml_type_traits type_traits[LM_GGML_TYPE_COUNT] = {
801
804
  .to_float = (lm_ggml_to_float_t) lm_ggml_bf16_to_fp32_row,
802
805
  .from_float_ref = (lm_ggml_from_float_t) lm_ggml_fp32_to_bf16_row_ref,
803
806
  },
804
- [LM_GGML_TYPE_Q4_0_4_4] = {
805
- .type_name = "q4_0_4x4",
806
- .blck_size = QK4_0,
807
- .blck_size_interleave = 4,
808
- .type_size = sizeof(block_q4_0),
809
- .is_quantized = true,
810
- .to_float = NULL,
811
- .from_float_ref = NULL,
807
+ [31] = { // LM_GGML_TYPE_Q4_0_4_4
808
+ .type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
809
+ .blck_size = 0,
810
+ .type_size = 0,
811
+ .is_quantized = false,
812
812
  },
813
- [LM_GGML_TYPE_Q4_0_4_8] = {
814
- .type_name = "q4_0_4x8",
815
- .blck_size = QK4_0,
816
- .blck_size_interleave = 8,
817
- .type_size = sizeof(block_q4_0),
818
- .is_quantized = true,
819
- .to_float = NULL,
820
- .from_float_ref = NULL,
813
+ [32] = { // LM_GGML_TYPE_Q4_0_4_8
814
+ .type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
815
+ .blck_size = 0,
816
+ .type_size = 0,
817
+ .is_quantized = false,
821
818
  },
822
- [LM_GGML_TYPE_Q4_0_8_8] = {
823
- .type_name = "q4_0_8x8",
824
- .blck_size = QK4_0,
825
- .blck_size_interleave = 8,
826
- .type_size = sizeof(block_q4_0),
827
- .is_quantized = true,
828
- .to_float = NULL,
829
- .from_float_ref = NULL,
819
+ [33] = { // LM_GGML_TYPE_Q4_0_8_8
820
+ .type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
821
+ .blck_size = 0,
822
+ .type_size = 0,
823
+ .is_quantized = false,
830
824
  },
831
825
  [LM_GGML_TYPE_TQ1_0] = {
832
826
  .type_name = "tq1_0",
@@ -844,14 +838,23 @@ static const struct lm_ggml_type_traits type_traits[LM_GGML_TYPE_COUNT] = {
844
838
  .to_float = (lm_ggml_to_float_t) dequantize_row_tq2_0,
845
839
  .from_float_ref = (lm_ggml_from_float_t) quantize_row_tq2_0_ref,
846
840
  },
847
- [LM_GGML_TYPE_IQ4_NL_4_4] = {
848
- .type_name = "iq4_nl_4x4",
849
- .blck_size = QK4_NL,
850
- .blck_size_interleave = 4,
851
- .type_size = sizeof(block_iq4_nl),
852
- .is_quantized = true,
853
- .to_float = NULL,
854
- .from_float_ref = NULL,
841
+ [36] = { // LM_GGML_TYPE_IQ4_NL_4_4
842
+ .type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
843
+ .blck_size = 0,
844
+ .type_size = 0,
845
+ .is_quantized = false,
846
+ },
847
+ [37] = { // LM_GGML_TYPE_IQ4_NL_4_8
848
+ .type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
849
+ .blck_size = 0,
850
+ .type_size = 0,
851
+ .is_quantized = false,
852
+ },
853
+ [38] = { // LM_GGML_TYPE_IQ4_NL_8_8
854
+ .type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
855
+ .blck_size = 0,
856
+ .type_size = 0,
857
+ .is_quantized = false,
855
858
  },
856
859
  };
857
860
 
@@ -963,6 +966,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
963
966
  "POOL_2D_BACK",
964
967
  "UPSCALE",
965
968
  "PAD",
969
+ "PAD_REFLECT_1D",
966
970
  "ARANGE",
967
971
  "TIMESTEP_EMBEDDING",
968
972
  "ARGSORT",
@@ -996,7 +1000,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
996
1000
  "OPT_STEP_ADAMW",
997
1001
  };
998
1002
 
999
- static_assert(LM_GGML_OP_COUNT == 81, "LM_GGML_OP_COUNT != 81");
1003
+ static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
1000
1004
 
1001
1005
  static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
1002
1006
  "none",
@@ -1058,6 +1062,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
1058
1062
  "pool_2d_back(x)",
1059
1063
  "upscale(x)",
1060
1064
  "pad(x)",
1065
+ "pad_reflect_1d(x)",
1061
1066
  "arange(start, stop, step)",
1062
1067
  "timestep_embedding(timesteps, dim, max_period)",
1063
1068
  "argsort(x)",
@@ -1091,7 +1096,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
1091
1096
  "adamw(x)",
1092
1097
  };
1093
1098
 
1094
- static_assert(LM_GGML_OP_COUNT == 81, "LM_GGML_OP_COUNT != 81");
1099
+ static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
1095
1100
 
1096
1101
  static_assert(LM_GGML_OP_POOL_COUNT == 2, "LM_GGML_OP_POOL_COUNT != 2");
1097
1102
 
@@ -1281,9 +1286,6 @@ enum lm_ggml_type lm_ggml_ftype_to_lm_ggml_type(enum lm_ggml_ftype ftype) {
1281
1286
  case LM_GGML_FTYPE_MOSTLY_IQ4_XS: wtype = LM_GGML_TYPE_IQ4_XS; break;
1282
1287
  case LM_GGML_FTYPE_MOSTLY_IQ3_S: wtype = LM_GGML_TYPE_IQ3_S; break;
1283
1288
  case LM_GGML_FTYPE_MOSTLY_IQ2_S: wtype = LM_GGML_TYPE_IQ2_S; break;
1284
- case LM_GGML_FTYPE_MOSTLY_Q4_0_4_4: wtype = LM_GGML_TYPE_Q4_0_4_4; break;
1285
- case LM_GGML_FTYPE_MOSTLY_Q4_0_4_8: wtype = LM_GGML_TYPE_Q4_0_4_8; break;
1286
- case LM_GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = LM_GGML_TYPE_Q4_0_8_8; break;
1287
1289
  case LM_GGML_FTYPE_UNKNOWN: wtype = LM_GGML_TYPE_COUNT; break;
1288
1290
  case LM_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = LM_GGML_TYPE_COUNT; break;
1289
1291
  }
@@ -3528,15 +3530,18 @@ static struct lm_ggml_tensor * lm_ggml_rope_impl(
3528
3530
  LM_GGML_ASSERT(c->ne[0] >= n_dims / 2);
3529
3531
  }
3530
3532
 
3533
+ int sections[4] = {0, 0, 0, 0};
3534
+
3531
3535
  struct lm_ggml_tensor * result = inplace ? lm_ggml_view_tensor(ctx, a) : lm_ggml_dup_tensor(ctx, a);
3532
3536
 
3533
- int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
3537
+ int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
3534
3538
  memcpy(params + 5, &freq_base, sizeof(float));
3535
3539
  memcpy(params + 6, &freq_scale, sizeof(float));
3536
3540
  memcpy(params + 7, &ext_factor, sizeof(float));
3537
3541
  memcpy(params + 8, &attn_factor, sizeof(float));
3538
3542
  memcpy(params + 9, &beta_fast, sizeof(float));
3539
3543
  memcpy(params + 10, &beta_slow, sizeof(float));
3544
+ memcpy(params + 11, &sections, sizeof(int)*4);
3540
3545
  lm_ggml_set_op_params(result, params, sizeof(params));
3541
3546
 
3542
3547
  result->op = LM_GGML_OP_ROPE;
@@ -3558,6 +3563,53 @@ struct lm_ggml_tensor * lm_ggml_rope(
3558
3563
  );
3559
3564
  }
3560
3565
 
3566
+ struct lm_ggml_tensor * lm_ggml_rope_multi(
3567
+ struct lm_ggml_context * ctx,
3568
+ struct lm_ggml_tensor * a,
3569
+ struct lm_ggml_tensor * b,
3570
+ struct lm_ggml_tensor * c,
3571
+ int n_dims,
3572
+ int sections[4],
3573
+ int mode,
3574
+ int n_ctx_orig,
3575
+ float freq_base,
3576
+ float freq_scale,
3577
+ float ext_factor,
3578
+ float attn_factor,
3579
+ float beta_fast,
3580
+ float beta_slow) {
3581
+ // Multimodal Rotary Position Embedding
3582
+ LM_GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
3583
+
3584
+ LM_GGML_ASSERT(lm_ggml_is_vector(b));
3585
+ LM_GGML_ASSERT(b->type == LM_GGML_TYPE_I32);
3586
+ LM_GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
3587
+
3588
+ if (c) {
3589
+ LM_GGML_ASSERT(c->type == LM_GGML_TYPE_F32);
3590
+ LM_GGML_ASSERT(c->ne[0] >= n_dims / 2);
3591
+ }
3592
+
3593
+ struct lm_ggml_tensor * result = lm_ggml_dup_tensor(ctx, a);
3594
+
3595
+ int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
3596
+ memcpy(params + 5, &freq_base, sizeof(float));
3597
+ memcpy(params + 6, &freq_scale, sizeof(float));
3598
+ memcpy(params + 7, &ext_factor, sizeof(float));
3599
+ memcpy(params + 8, &attn_factor, sizeof(float));
3600
+ memcpy(params + 9, &beta_fast, sizeof(float));
3601
+ memcpy(params + 10, &beta_slow, sizeof(float));
3602
+ memcpy(&params[11], sections, sizeof(int)*4);
3603
+ lm_ggml_set_op_params(result, params, sizeof(params));
3604
+
3605
+ result->op = LM_GGML_OP_ROPE;
3606
+ result->src[0] = a;
3607
+ result->src[1] = b;
3608
+ result->src[2] = c;
3609
+
3610
+ return result;
3611
+ }
3612
+
3561
3613
  struct lm_ggml_tensor * lm_ggml_rope_inplace(
3562
3614
  struct lm_ggml_context * ctx,
3563
3615
  struct lm_ggml_tensor * a,
@@ -3721,13 +3773,84 @@ struct lm_ggml_tensor * lm_ggml_clamp(
3721
3773
  return result;
3722
3774
  }
3723
3775
 
3724
- // lm_ggml_conv_1d
3725
-
3726
3776
  static int64_t lm_ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3727
3777
  return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
3728
3778
  }
3729
3779
 
3730
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
3780
+ // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
3781
+ // a: [OC,IC, KH, KW]
3782
+ // b: [N, IC, IH, IW]
3783
+ // result: [N, OH, OW, IC*KH*KW]
3784
+ struct lm_ggml_tensor * lm_ggml_im2col(
3785
+ struct lm_ggml_context * ctx,
3786
+ struct lm_ggml_tensor * a,
3787
+ struct lm_ggml_tensor * b,
3788
+ int s0,
3789
+ int s1,
3790
+ int p0,
3791
+ int p1,
3792
+ int d0,
3793
+ int d1,
3794
+ bool is_2D,
3795
+ enum lm_ggml_type dst_type) {
3796
+ if (is_2D) {
3797
+ LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
3798
+ } else {
3799
+ //LM_GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
3800
+ LM_GGML_ASSERT(b->ne[1] == a->ne[1]);
3801
+ LM_GGML_ASSERT(b->ne[3] == 1);
3802
+ }
3803
+
3804
+ const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
3805
+ const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
3806
+
3807
+ LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
3808
+ LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
3809
+
3810
+ const int64_t ne[4] = {
3811
+ is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
3812
+ OW,
3813
+ is_2D ? OH : b->ne[2],
3814
+ is_2D ? b->ne[3] : 1,
3815
+ };
3816
+
3817
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
3818
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3819
+ lm_ggml_set_op_params(result, params, sizeof(params));
3820
+
3821
+ result->op = LM_GGML_OP_IM2COL;
3822
+ result->src[0] = a;
3823
+ result->src[1] = b;
3824
+
3825
+ return result;
3826
+ }
3827
+
3828
+ struct lm_ggml_tensor * lm_ggml_im2col_back(
3829
+ struct lm_ggml_context * ctx,
3830
+ struct lm_ggml_tensor * a,
3831
+ struct lm_ggml_tensor * b,
3832
+ int64_t * ne,
3833
+ int s0,
3834
+ int s1,
3835
+ int p0,
3836
+ int p1,
3837
+ int d0,
3838
+ int d1,
3839
+ bool is_2D) {
3840
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3841
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3842
+ lm_ggml_set_op_params(result, params, sizeof(params));
3843
+
3844
+ result->op = LM_GGML_OP_IM2COL_BACK;
3845
+ result->src[0] = a;
3846
+ result->src[1] = b;
3847
+
3848
+ return result;
3849
+ }
3850
+
3851
+ // lm_ggml_conv_1d
3852
+
3853
+ struct lm_ggml_tensor * lm_ggml_conv_1d(
3731
3854
  struct lm_ggml_context * ctx,
3732
3855
  struct lm_ggml_tensor * a,
3733
3856
  struct lm_ggml_tensor * b,
@@ -3757,137 +3880,75 @@ struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
3757
3880
  return lm_ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
3758
3881
  }
3759
3882
 
3760
- // lm_ggml_conv_transpose_1d
3761
-
3762
- static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3763
- return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
3764
- }
3883
+ // lm_ggml_conv_1d_dw
3765
3884
 
3766
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
3885
+ struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
3767
3886
  struct lm_ggml_context * ctx,
3768
3887
  struct lm_ggml_tensor * a,
3769
3888
  struct lm_ggml_tensor * b,
3770
3889
  int s0,
3771
3890
  int p0,
3772
3891
  int d0) {
3773
- LM_GGML_ASSERT(lm_ggml_is_matrix(b));
3774
- LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
3775
- LM_GGML_ASSERT(a->ne[3] == 1);
3892
+ struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
3893
+ struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
3776
3894
 
3777
- LM_GGML_ASSERT(p0 == 0);
3778
- LM_GGML_ASSERT(d0 == 1);
3779
-
3780
- const int64_t ne[4] = {
3781
- lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
3782
- a->ne[1], b->ne[2], 1,
3783
- };
3784
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3895
+ struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, LM_GGML_TYPE_F16);
3785
3896
 
3786
- int32_t params[] = { s0, p0, d0 };
3787
- lm_ggml_set_op_params(result, params, sizeof(params));
3897
+ struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, im2col, a);
3788
3898
 
3789
- result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
3790
- result->src[0] = a;
3791
- result->src[1] = b;
3899
+ result = lm_ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
3792
3900
 
3793
3901
  return result;
3794
3902
  }
3795
3903
 
3796
- // lm_ggml_conv_depthwise
3904
+ // lm_ggml_conv_1d_dw_ph
3797
3905
 
3798
- struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
3906
+ struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
3799
3907
  struct lm_ggml_context * ctx,
3800
3908
  struct lm_ggml_tensor * a,
3801
3909
  struct lm_ggml_tensor * b,
3802
3910
  int s0,
3803
- int s1,
3804
- int p0,
3805
- int p1,
3806
- int d0,
3807
- int d1) {
3808
- struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
3809
- struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
3810
- lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
3811
- s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
3812
- struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
3911
+ int d0) {
3912
+ return lm_ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
3913
+ }
3813
3914
 
3814
- new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
3815
- struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
3816
- result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
3915
+ // lm_ggml_conv_transpose_1d
3817
3916
 
3818
- return result;
3917
+ static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3918
+ return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
3819
3919
  }
3820
- // lm_ggml_conv_2d
3821
3920
 
3822
- // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
3823
- // a: [OC,IC, KH, KW]
3824
- // b: [N, IC, IH, IW]
3825
- // result: [N, OH, OW, IC*KH*KW]
3826
- struct lm_ggml_tensor * lm_ggml_im2col(
3921
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
3827
3922
  struct lm_ggml_context * ctx,
3828
3923
  struct lm_ggml_tensor * a,
3829
3924
  struct lm_ggml_tensor * b,
3830
3925
  int s0,
3831
- int s1,
3832
3926
  int p0,
3833
- int p1,
3834
- int d0,
3835
- int d1,
3836
- bool is_2D,
3837
- enum lm_ggml_type dst_type) {
3838
- if(is_2D) {
3839
- LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
3840
- } else {
3841
- LM_GGML_ASSERT(a->ne[1] == b->ne[1]);
3842
- LM_GGML_ASSERT(b->ne[3] == 1);
3843
- }
3844
-
3845
- const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
3846
- const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
3927
+ int d0) {
3928
+ LM_GGML_ASSERT(lm_ggml_is_matrix(b));
3929
+ LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
3930
+ LM_GGML_ASSERT(a->ne[3] == 1);
3847
3931
 
3848
- LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
3849
- LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
3932
+ LM_GGML_ASSERT(p0 == 0);
3933
+ LM_GGML_ASSERT(d0 == 1);
3850
3934
 
3851
3935
  const int64_t ne[4] = {
3852
- is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
3853
- OW,
3854
- is_2D ? OH : b->ne[2],
3855
- is_2D ? b->ne[3] : 1,
3936
+ lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
3937
+ a->ne[1], b->ne[2], 1,
3856
3938
  };
3939
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3857
3940
 
3858
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
3859
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3941
+ int32_t params[] = { s0, p0, d0 };
3860
3942
  lm_ggml_set_op_params(result, params, sizeof(params));
3861
3943
 
3862
- result->op = LM_GGML_OP_IM2COL;
3944
+ result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
3863
3945
  result->src[0] = a;
3864
3946
  result->src[1] = b;
3865
3947
 
3866
3948
  return result;
3867
3949
  }
3868
3950
 
3869
- struct lm_ggml_tensor * lm_ggml_im2col_back(
3870
- struct lm_ggml_context * ctx,
3871
- struct lm_ggml_tensor * a,
3872
- struct lm_ggml_tensor * b,
3873
- int64_t * ne,
3874
- int s0,
3875
- int s1,
3876
- int p0,
3877
- int p1,
3878
- int d0,
3879
- int d1,
3880
- bool is_2D) {
3881
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3882
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3883
- lm_ggml_set_op_params(result, params, sizeof(params));
3884
-
3885
- result->op = LM_GGML_OP_IM2COL_BACK;
3886
- result->src[0] = a;
3887
- result->src[1] = b;
3888
-
3889
- return result;
3890
- }
3951
+ // lm_ggml_conv_2d
3891
3952
 
3892
3953
  // a: [OC,IC, KH, KW]
3893
3954
  // b: [N, IC, IH, IW]
@@ -3934,6 +3995,31 @@ struct lm_ggml_tensor * lm_ggml_conv_2d_s1_ph(
3934
3995
  return lm_ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
3935
3996
  }
3936
3997
 
3998
+ // lm_ggml_conv_2d_dw
3999
+
4000
+ struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
4001
+ struct lm_ggml_context * ctx,
4002
+ struct lm_ggml_tensor * a,
4003
+ struct lm_ggml_tensor * b,
4004
+ int s0,
4005
+ int s1,
4006
+ int p0,
4007
+ int p1,
4008
+ int d0,
4009
+ int d1) {
4010
+ struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4011
+ struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
4012
+ lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4013
+ s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4014
+ struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4015
+
4016
+ new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4017
+ struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
4018
+ result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4019
+
4020
+ return result;
4021
+ }
4022
+
3937
4023
  // lm_ggml_conv_transpose_2d_p0
3938
4024
 
3939
4025
  static int64_t lm_ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
@@ -4110,6 +4196,37 @@ struct lm_ggml_tensor * lm_ggml_pad(
4110
4196
  return result;
4111
4197
  }
4112
4198
 
4199
+ // lm_ggml_pad_reflect_1d
4200
+
4201
+ struct lm_ggml_tensor * lm_ggml_pad_reflect_1d(
4202
+ struct lm_ggml_context * ctx,
4203
+ struct lm_ggml_tensor * a,
4204
+ int p0,
4205
+ int p1) {
4206
+ LM_GGML_ASSERT(p0 >= 0);
4207
+ LM_GGML_ASSERT(p1 >= 0);
4208
+
4209
+ LM_GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
4210
+ LM_GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
4211
+
4212
+ LM_GGML_ASSERT(lm_ggml_is_contiguous(a));
4213
+ LM_GGML_ASSERT(a->type == LM_GGML_TYPE_F32);
4214
+
4215
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor_4d(ctx, a->type,
4216
+ a->ne[0] + p0 + p1,
4217
+ a->ne[1],
4218
+ a->ne[2],
4219
+ a->ne[3]);
4220
+
4221
+ int32_t params[] = { p0, p1 };
4222
+ lm_ggml_set_op_params(result, params, sizeof(params));
4223
+
4224
+ result->op = LM_GGML_OP_PAD_REFLECT_1D;
4225
+ result->src[0] = a;
4226
+
4227
+ return result;
4228
+ }
4229
+
4113
4230
  // lm_ggml_arange
4114
4231
 
4115
4232
  struct lm_ggml_tensor * lm_ggml_arange(
@@ -5967,12 +6084,12 @@ struct lm_ggml_tensor * lm_ggml_graph_get_tensor(const struct lm_ggml_cgraph * c
5967
6084
 
5968
6085
  struct lm_ggml_tensor * lm_ggml_graph_get_grad(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
5969
6086
  const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
5970
- return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL;
6087
+ return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
5971
6088
  }
5972
6089
 
5973
6090
  struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
5974
6091
  const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
5975
- return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL;
6092
+ return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
5976
6093
  }
5977
6094
 
5978
6095
  void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph) {
@@ -6284,9 +6401,6 @@ size_t lm_ggml_quantize_chunk(
6284
6401
  case LM_GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
6285
6402
  case LM_GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
6286
6403
  case LM_GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
6287
- case LM_GGML_TYPE_Q4_0_4_4: result = quantize_q4_0_4x4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
6288
- case LM_GGML_TYPE_Q4_0_4_8: result = quantize_q4_0_4x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
6289
- case LM_GGML_TYPE_Q4_0_8_8: result = quantize_q4_0_8x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
6290
6404
  case LM_GGML_TYPE_F16:
6291
6405
  {
6292
6406
  size_t elemsize = sizeof(lm_ggml_fp16_t);
@@ -6422,7 +6536,7 @@ struct lm_gguf_context {
6422
6536
  void * data;
6423
6537
  };
6424
6538
 
6425
- static size_t lm_gguf_type_size(enum lm_gguf_type type) {
6539
+ size_t lm_gguf_type_size(enum lm_gguf_type type) {
6426
6540
  LM_GGML_ASSERT(0 <= type && type < LM_GGUF_TYPE_COUNT);
6427
6541
  return LM_GGUF_TYPE_SIZE[type];
6428
6542
  }
@@ -6550,13 +6664,7 @@ struct lm_gguf_context * lm_gguf_init_empty(void) {
6550
6664
  return ctx;
6551
6665
  }
6552
6666
 
6553
- struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
6554
- FILE * file = lm_ggml_fopen(fname, "rb");
6555
- if (!file) {
6556
- fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
6557
- return NULL;
6558
- }
6559
-
6667
+ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
6560
6668
  // offset from start of file
6561
6669
  size_t offset = 0;
6562
6670
 
@@ -6569,7 +6677,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6569
6677
  for (uint32_t i = 0; i < sizeof(magic); i++) {
6570
6678
  if (magic[i] != LM_GGUF_MAGIC[i]) {
6571
6679
  fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
6572
- fclose(file);
6573
6680
  return NULL;
6574
6681
  }
6575
6682
  }
@@ -6580,7 +6687,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6580
6687
  struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
6581
6688
  if (!ctx) {
6582
6689
  fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6583
- fclose(file);
6584
6690
  return NULL;
6585
6691
  }
6586
6692
 
@@ -6598,7 +6704,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6598
6704
 
6599
6705
  if (ctx->header.version == 1) {
6600
6706
  fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
6601
- fclose(file);
6602
6707
  lm_gguf_free(ctx);
6603
6708
  return NULL;
6604
6709
  }
@@ -6611,7 +6716,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6611
6716
 
6612
6717
  if (!ok) {
6613
6718
  fprintf(stderr, "%s: failed to read header\n", __func__);
6614
- fclose(file);
6615
6719
  lm_gguf_free(ctx);
6616
6720
  return NULL;
6617
6721
  }
@@ -6621,12 +6725,13 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6621
6725
  {
6622
6726
  const uint64_t n_kv = ctx->header.n_kv;
6623
6727
 
6624
- ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
6625
- if (!ctx->kv) {
6626
- fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6627
- fclose(file);
6628
- lm_gguf_free(ctx);
6629
- return NULL;
6728
+ if (n_kv > 0) {
6729
+ ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
6730
+ if (!ctx->kv) {
6731
+ fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6732
+ lm_gguf_free(ctx);
6733
+ return NULL;
6734
+ }
6630
6735
  }
6631
6736
 
6632
6737
  for (uint64_t i = 0; i < n_kv; ++i) {
@@ -6673,7 +6778,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6673
6778
  // prevent from integer overflow in the malloc below
6674
6779
  if (kv->value.arr.n >= SIZE_MAX/lm_gguf_type_size(kv->value.arr.type)) {
6675
6780
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6676
- fclose(file);
6677
6781
  lm_gguf_free(ctx);
6678
6782
  return NULL;
6679
6783
  }
@@ -6681,7 +6785,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6681
6785
  kv->value.arr.data = calloc(kv->value.arr.n, lm_gguf_type_size(kv->value.arr.type));
6682
6786
  if (!kv->value.arr.data) {
6683
6787
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6684
- fclose(file);
6685
6788
  lm_gguf_free(ctx);
6686
6789
  return NULL;
6687
6790
  }
@@ -6693,7 +6796,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6693
6796
  // prevent from integer overflow in the malloc below
6694
6797
  if (kv->value.arr.n >= SIZE_MAX/sizeof(struct lm_gguf_str)) {
6695
6798
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6696
- fclose(file);
6697
6799
  lm_gguf_free(ctx);
6698
6800
  return NULL;
6699
6801
  }
@@ -6701,7 +6803,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6701
6803
  kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct lm_gguf_str));
6702
6804
  if (!kv->value.arr.data) {
6703
6805
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6704
- fclose(file);
6705
6806
  lm_gguf_free(ctx);
6706
6807
  return NULL;
6707
6808
  }
@@ -6732,7 +6833,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6732
6833
 
6733
6834
  if (!ok) {
6734
6835
  fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
6735
- fclose(file);
6736
6836
  lm_gguf_free(ctx);
6737
6837
  return NULL;
6738
6838
  }
@@ -6743,7 +6843,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6743
6843
  ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct lm_gguf_tensor_info));
6744
6844
  if (!ctx->infos) {
6745
6845
  fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
6746
- fclose(file);
6747
6846
  lm_gguf_free(ctx);
6748
6847
  return NULL;
6749
6848
  }
@@ -6779,7 +6878,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6779
6878
 
6780
6879
  if (!ok) {
6781
6880
  fprintf(stderr, "%s: failed to read tensor info\n", __func__);
6782
- fclose(file);
6783
6881
  lm_gguf_free(ctx);
6784
6882
  return NULL;
6785
6883
  }
@@ -6818,10 +6916,17 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6818
6916
  (int64_t) info->ne[2] *
6819
6917
  (int64_t) info->ne[3];
6820
6918
 
6821
- if (lm_ggml_blck_size(info->type) == 0 || ne % lm_ggml_blck_size(info->type) != 0) {
6919
+ if (lm_ggml_blck_size(info->type) == 0 ) {
6920
+ // this tensor type support have been removed:
6921
+ fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
6922
+ __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type));
6923
+ lm_gguf_free(ctx);
6924
+ return NULL;
6925
+ }
6926
+
6927
+ if (ne % lm_ggml_blck_size(info->type) != 0) {
6822
6928
  fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
6823
6929
  __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type), ne, lm_ggml_blck_size(info->type));
6824
- fclose(file);
6825
6930
  lm_gguf_free(ctx);
6826
6931
  return NULL;
6827
6932
  }
@@ -6853,7 +6958,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6853
6958
  *params.ctx = lm_ggml_init(pdata);
6854
6959
  if (*params.ctx == NULL) {
6855
6960
  fprintf(stderr, "%s: failed to initialize context\n", __func__);
6856
- fclose(file);
6857
6961
  lm_gguf_free(ctx);
6858
6962
  return NULL;
6859
6963
  }
@@ -6872,7 +6976,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6872
6976
 
6873
6977
  if (!ok) {
6874
6978
  fprintf(stderr, "%s: failed to read tensor data\n", __func__);
6875
- fclose(file);
6876
6979
  lm_ggml_free(ctx_data);
6877
6980
  lm_gguf_free(ctx);
6878
6981
  return NULL;
@@ -6911,7 +7014,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6911
7014
 
6912
7015
  if (!ok) {
6913
7016
  fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
6914
- fclose(file);
6915
7017
  lm_ggml_free(ctx_data);
6916
7018
  lm_gguf_free(ctx);
6917
7019
  return NULL;
@@ -6920,11 +7022,21 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6920
7022
  lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
6921
7023
  }
6922
7024
 
6923
- fclose(file);
6924
-
6925
7025
  return ctx;
6926
7026
  }
6927
7027
 
7028
+ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
7029
+ FILE * file = lm_ggml_fopen(fname, "rb");
7030
+ if (!file) {
7031
+ fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
7032
+ return NULL;
7033
+ }
7034
+
7035
+ struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
7036
+ fclose(file);
7037
+ return result;
7038
+ }
7039
+
6928
7040
  void lm_gguf_free(struct lm_gguf_context * ctx) {
6929
7041
  if (ctx == NULL) {
6930
7042
  return;
@@ -7384,13 +7496,7 @@ void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, co
7384
7496
  // fwrite(val, sizeof(char), size, file);
7385
7497
  //}
7386
7498
 
7387
- struct lm_gguf_buf {
7388
- void * data;
7389
- size_t size;
7390
- size_t offset;
7391
- };
7392
-
7393
- static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7499
+ struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7394
7500
  struct lm_gguf_buf buf = {
7395
7501
  /*buf.data =*/ size == 0 ? NULL : LM_GGML_CALLOC(1, size),
7396
7502
  /*buf.size =*/ size,
@@ -7400,7 +7506,7 @@ static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7400
7506
  return buf;
7401
7507
  }
7402
7508
 
7403
- static void lm_gguf_buf_free(struct lm_gguf_buf buf) {
7509
+ void lm_gguf_buf_free(struct lm_gguf_buf buf) {
7404
7510
  if (buf.data) {
7405
7511
  LM_GGML_FREE(buf.data);
7406
7512
  }
@@ -7438,7 +7544,7 @@ static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t
7438
7544
  buf->offset += el_size;
7439
7545
  }
7440
7546
 
7441
- static void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
7547
+ void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
7442
7548
  // write header
7443
7549
  lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
7444
7550
  lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));