cui-llama.rn 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/ggml.c CHANGED
@@ -3773,13 +3773,84 @@ struct lm_ggml_tensor * lm_ggml_clamp(
3773
3773
  return result;
3774
3774
  }
3775
3775
 
3776
- // lm_ggml_conv_1d
3777
-
3778
3776
  static int64_t lm_ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3779
3777
  return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
3780
3778
  }
3781
3779
 
3782
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
3780
+ // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
3781
+ // a: [OC,IC, KH, KW]
3782
+ // b: [N, IC, IH, IW]
3783
+ // result: [N, OH, OW, IC*KH*KW]
3784
+ struct lm_ggml_tensor * lm_ggml_im2col(
3785
+ struct lm_ggml_context * ctx,
3786
+ struct lm_ggml_tensor * a,
3787
+ struct lm_ggml_tensor * b,
3788
+ int s0,
3789
+ int s1,
3790
+ int p0,
3791
+ int p1,
3792
+ int d0,
3793
+ int d1,
3794
+ bool is_2D,
3795
+ enum lm_ggml_type dst_type) {
3796
+ if (is_2D) {
3797
+ LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
3798
+ } else {
3799
+ //LM_GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
3800
+ LM_GGML_ASSERT(b->ne[1] == a->ne[1]);
3801
+ LM_GGML_ASSERT(b->ne[3] == 1);
3802
+ }
3803
+
3804
+ const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
3805
+ const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
3806
+
3807
+ LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
3808
+ LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
3809
+
3810
+ const int64_t ne[4] = {
3811
+ is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
3812
+ OW,
3813
+ is_2D ? OH : b->ne[2],
3814
+ is_2D ? b->ne[3] : 1,
3815
+ };
3816
+
3817
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
3818
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3819
+ lm_ggml_set_op_params(result, params, sizeof(params));
3820
+
3821
+ result->op = LM_GGML_OP_IM2COL;
3822
+ result->src[0] = a;
3823
+ result->src[1] = b;
3824
+
3825
+ return result;
3826
+ }
3827
+
3828
+ struct lm_ggml_tensor * lm_ggml_im2col_back(
3829
+ struct lm_ggml_context * ctx,
3830
+ struct lm_ggml_tensor * a,
3831
+ struct lm_ggml_tensor * b,
3832
+ int64_t * ne,
3833
+ int s0,
3834
+ int s1,
3835
+ int p0,
3836
+ int p1,
3837
+ int d0,
3838
+ int d1,
3839
+ bool is_2D) {
3840
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3841
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3842
+ lm_ggml_set_op_params(result, params, sizeof(params));
3843
+
3844
+ result->op = LM_GGML_OP_IM2COL_BACK;
3845
+ result->src[0] = a;
3846
+ result->src[1] = b;
3847
+
3848
+ return result;
3849
+ }
3850
+
3851
+ // lm_ggml_conv_1d
3852
+
3853
+ struct lm_ggml_tensor * lm_ggml_conv_1d(
3783
3854
  struct lm_ggml_context * ctx,
3784
3855
  struct lm_ggml_tensor * a,
3785
3856
  struct lm_ggml_tensor * b,
@@ -3809,137 +3880,75 @@ struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
3809
3880
  return lm_ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
3810
3881
  }
3811
3882
 
3812
- // lm_ggml_conv_transpose_1d
3883
+ // lm_ggml_conv_1d_dw
3813
3884
 
3814
- static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3815
- return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
3816
- }
3817
-
3818
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
3885
+ struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
3819
3886
  struct lm_ggml_context * ctx,
3820
3887
  struct lm_ggml_tensor * a,
3821
3888
  struct lm_ggml_tensor * b,
3822
3889
  int s0,
3823
3890
  int p0,
3824
3891
  int d0) {
3825
- LM_GGML_ASSERT(lm_ggml_is_matrix(b));
3826
- LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
3827
- LM_GGML_ASSERT(a->ne[3] == 1);
3892
+ struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
3893
+ struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
3828
3894
 
3829
- LM_GGML_ASSERT(p0 == 0);
3830
- LM_GGML_ASSERT(d0 == 1);
3895
+ struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, LM_GGML_TYPE_F16);
3831
3896
 
3832
- const int64_t ne[4] = {
3833
- lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
3834
- a->ne[1], b->ne[2], 1,
3835
- };
3836
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3897
+ struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, im2col, a);
3837
3898
 
3838
- int32_t params[] = { s0, p0, d0 };
3839
- lm_ggml_set_op_params(result, params, sizeof(params));
3840
-
3841
- result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
3842
- result->src[0] = a;
3843
- result->src[1] = b;
3899
+ result = lm_ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
3844
3900
 
3845
3901
  return result;
3846
3902
  }
3847
3903
 
3848
- // lm_ggml_conv_depthwise
3904
+ // lm_ggml_conv_1d_dw_ph
3849
3905
 
3850
- struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
3906
+ struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
3851
3907
  struct lm_ggml_context * ctx,
3852
3908
  struct lm_ggml_tensor * a,
3853
3909
  struct lm_ggml_tensor * b,
3854
3910
  int s0,
3855
- int s1,
3856
- int p0,
3857
- int p1,
3858
- int d0,
3859
- int d1) {
3860
- struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
3861
- struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
3862
- lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
3863
- s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
3864
- struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
3911
+ int d0) {
3912
+ return lm_ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
3913
+ }
3865
3914
 
3866
- new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
3867
- struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
3868
- result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
3915
+ // lm_ggml_conv_transpose_1d
3869
3916
 
3870
- return result;
3917
+ static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
3918
+ return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
3871
3919
  }
3872
- // lm_ggml_conv_2d
3873
3920
 
3874
- // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
3875
- // a: [OC,IC, KH, KW]
3876
- // b: [N, IC, IH, IW]
3877
- // result: [N, OH, OW, IC*KH*KW]
3878
- struct lm_ggml_tensor * lm_ggml_im2col(
3921
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
3879
3922
  struct lm_ggml_context * ctx,
3880
3923
  struct lm_ggml_tensor * a,
3881
3924
  struct lm_ggml_tensor * b,
3882
3925
  int s0,
3883
- int s1,
3884
3926
  int p0,
3885
- int p1,
3886
- int d0,
3887
- int d1,
3888
- bool is_2D,
3889
- enum lm_ggml_type dst_type) {
3890
- if(is_2D) {
3891
- LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
3892
- } else {
3893
- LM_GGML_ASSERT(a->ne[1] == b->ne[1]);
3894
- LM_GGML_ASSERT(b->ne[3] == 1);
3895
- }
3896
-
3897
- const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
3898
- const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
3927
+ int d0) {
3928
+ LM_GGML_ASSERT(lm_ggml_is_matrix(b));
3929
+ LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
3930
+ LM_GGML_ASSERT(a->ne[3] == 1);
3899
3931
 
3900
- LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
3901
- LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
3932
+ LM_GGML_ASSERT(p0 == 0);
3933
+ LM_GGML_ASSERT(d0 == 1);
3902
3934
 
3903
3935
  const int64_t ne[4] = {
3904
- is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
3905
- OW,
3906
- is_2D ? OH : b->ne[2],
3907
- is_2D ? b->ne[3] : 1,
3936
+ lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
3937
+ a->ne[1], b->ne[2], 1,
3908
3938
  };
3939
+ struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3909
3940
 
3910
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
3911
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3941
+ int32_t params[] = { s0, p0, d0 };
3912
3942
  lm_ggml_set_op_params(result, params, sizeof(params));
3913
3943
 
3914
- result->op = LM_GGML_OP_IM2COL;
3944
+ result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
3915
3945
  result->src[0] = a;
3916
3946
  result->src[1] = b;
3917
3947
 
3918
3948
  return result;
3919
3949
  }
3920
3950
 
3921
- struct lm_ggml_tensor * lm_ggml_im2col_back(
3922
- struct lm_ggml_context * ctx,
3923
- struct lm_ggml_tensor * a,
3924
- struct lm_ggml_tensor * b,
3925
- int64_t * ne,
3926
- int s0,
3927
- int s1,
3928
- int p0,
3929
- int p1,
3930
- int d0,
3931
- int d1,
3932
- bool is_2D) {
3933
- struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
3934
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
3935
- lm_ggml_set_op_params(result, params, sizeof(params));
3936
-
3937
- result->op = LM_GGML_OP_IM2COL_BACK;
3938
- result->src[0] = a;
3939
- result->src[1] = b;
3940
-
3941
- return result;
3942
- }
3951
+ // lm_ggml_conv_2d
3943
3952
 
3944
3953
  // a: [OC,IC, KH, KW]
3945
3954
  // b: [N, IC, IH, IW]
@@ -3986,6 +3995,31 @@ struct lm_ggml_tensor * lm_ggml_conv_2d_s1_ph(
3986
3995
  return lm_ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
3987
3996
  }
3988
3997
 
3998
+ // lm_ggml_conv_2d_dw
3999
+
4000
+ struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
4001
+ struct lm_ggml_context * ctx,
4002
+ struct lm_ggml_tensor * a,
4003
+ struct lm_ggml_tensor * b,
4004
+ int s0,
4005
+ int s1,
4006
+ int p0,
4007
+ int p1,
4008
+ int d0,
4009
+ int d1) {
4010
+ struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4011
+ struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
4012
+ lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4013
+ s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4014
+ struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4015
+
4016
+ new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4017
+ struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
4018
+ result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4019
+
4020
+ return result;
4021
+ }
4022
+
3989
4023
  // lm_ggml_conv_transpose_2d_p0
3990
4024
 
3991
4025
  static int64_t lm_ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
@@ -6050,12 +6084,12 @@ struct lm_ggml_tensor * lm_ggml_graph_get_tensor(const struct lm_ggml_cgraph * c
6050
6084
 
6051
6085
  struct lm_ggml_tensor * lm_ggml_graph_get_grad(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
6052
6086
  const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
6053
- return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL;
6087
+ return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
6054
6088
  }
6055
6089
 
6056
6090
  struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
6057
6091
  const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
6058
- return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL;
6092
+ return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
6059
6093
  }
6060
6094
 
6061
6095
  void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph) {
@@ -6502,7 +6536,7 @@ struct lm_gguf_context {
6502
6536
  void * data;
6503
6537
  };
6504
6538
 
6505
- static size_t lm_gguf_type_size(enum lm_gguf_type type) {
6539
+ size_t lm_gguf_type_size(enum lm_gguf_type type) {
6506
6540
  LM_GGML_ASSERT(0 <= type && type < LM_GGUF_TYPE_COUNT);
6507
6541
  return LM_GGUF_TYPE_SIZE[type];
6508
6542
  }
@@ -6630,13 +6664,7 @@ struct lm_gguf_context * lm_gguf_init_empty(void) {
6630
6664
  return ctx;
6631
6665
  }
6632
6666
 
6633
- struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
6634
- FILE * file = lm_ggml_fopen(fname, "rb");
6635
- if (!file) {
6636
- fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
6637
- return NULL;
6638
- }
6639
-
6667
+ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
6640
6668
  // offset from start of file
6641
6669
  size_t offset = 0;
6642
6670
 
@@ -6649,7 +6677,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6649
6677
  for (uint32_t i = 0; i < sizeof(magic); i++) {
6650
6678
  if (magic[i] != LM_GGUF_MAGIC[i]) {
6651
6679
  fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
6652
- fclose(file);
6653
6680
  return NULL;
6654
6681
  }
6655
6682
  }
@@ -6660,7 +6687,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6660
6687
  struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
6661
6688
  if (!ctx) {
6662
6689
  fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
6663
- fclose(file);
6664
6690
  return NULL;
6665
6691
  }
6666
6692
 
@@ -6678,7 +6704,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6678
6704
 
6679
6705
  if (ctx->header.version == 1) {
6680
6706
  fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
6681
- fclose(file);
6682
6707
  lm_gguf_free(ctx);
6683
6708
  return NULL;
6684
6709
  }
@@ -6691,7 +6716,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6691
6716
 
6692
6717
  if (!ok) {
6693
6718
  fprintf(stderr, "%s: failed to read header\n", __func__);
6694
- fclose(file);
6695
6719
  lm_gguf_free(ctx);
6696
6720
  return NULL;
6697
6721
  }
@@ -6701,12 +6725,13 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6701
6725
  {
6702
6726
  const uint64_t n_kv = ctx->header.n_kv;
6703
6727
 
6704
- ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
6705
- if (!ctx->kv) {
6706
- fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6707
- fclose(file);
6708
- lm_gguf_free(ctx);
6709
- return NULL;
6728
+ if (n_kv > 0) {
6729
+ ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
6730
+ if (!ctx->kv) {
6731
+ fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
6732
+ lm_gguf_free(ctx);
6733
+ return NULL;
6734
+ }
6710
6735
  }
6711
6736
 
6712
6737
  for (uint64_t i = 0; i < n_kv; ++i) {
@@ -6753,7 +6778,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6753
6778
  // prevent from integer overflow in the malloc below
6754
6779
  if (kv->value.arr.n >= SIZE_MAX/lm_gguf_type_size(kv->value.arr.type)) {
6755
6780
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6756
- fclose(file);
6757
6781
  lm_gguf_free(ctx);
6758
6782
  return NULL;
6759
6783
  }
@@ -6761,7 +6785,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6761
6785
  kv->value.arr.data = calloc(kv->value.arr.n, lm_gguf_type_size(kv->value.arr.type));
6762
6786
  if (!kv->value.arr.data) {
6763
6787
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6764
- fclose(file);
6765
6788
  lm_gguf_free(ctx);
6766
6789
  return NULL;
6767
6790
  }
@@ -6773,7 +6796,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6773
6796
  // prevent from integer overflow in the malloc below
6774
6797
  if (kv->value.arr.n >= SIZE_MAX/sizeof(struct lm_gguf_str)) {
6775
6798
  fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
6776
- fclose(file);
6777
6799
  lm_gguf_free(ctx);
6778
6800
  return NULL;
6779
6801
  }
@@ -6781,7 +6803,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6781
6803
  kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct lm_gguf_str));
6782
6804
  if (!kv->value.arr.data) {
6783
6805
  fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
6784
- fclose(file);
6785
6806
  lm_gguf_free(ctx);
6786
6807
  return NULL;
6787
6808
  }
@@ -6812,7 +6833,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6812
6833
 
6813
6834
  if (!ok) {
6814
6835
  fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
6815
- fclose(file);
6816
6836
  lm_gguf_free(ctx);
6817
6837
  return NULL;
6818
6838
  }
@@ -6823,7 +6843,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6823
6843
  ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct lm_gguf_tensor_info));
6824
6844
  if (!ctx->infos) {
6825
6845
  fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
6826
- fclose(file);
6827
6846
  lm_gguf_free(ctx);
6828
6847
  return NULL;
6829
6848
  }
@@ -6859,7 +6878,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6859
6878
 
6860
6879
  if (!ok) {
6861
6880
  fprintf(stderr, "%s: failed to read tensor info\n", __func__);
6862
- fclose(file);
6863
6881
  lm_gguf_free(ctx);
6864
6882
  return NULL;
6865
6883
  }
@@ -6902,7 +6920,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6902
6920
  // this tensor type support have been removed:
6903
6921
  fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
6904
6922
  __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type));
6905
- fclose(file);
6906
6923
  lm_gguf_free(ctx);
6907
6924
  return NULL;
6908
6925
  }
@@ -6910,7 +6927,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6910
6927
  if (ne % lm_ggml_blck_size(info->type) != 0) {
6911
6928
  fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
6912
6929
  __func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type), ne, lm_ggml_blck_size(info->type));
6913
- fclose(file);
6914
6930
  lm_gguf_free(ctx);
6915
6931
  return NULL;
6916
6932
  }
@@ -6942,7 +6958,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6942
6958
  *params.ctx = lm_ggml_init(pdata);
6943
6959
  if (*params.ctx == NULL) {
6944
6960
  fprintf(stderr, "%s: failed to initialize context\n", __func__);
6945
- fclose(file);
6946
6961
  lm_gguf_free(ctx);
6947
6962
  return NULL;
6948
6963
  }
@@ -6961,7 +6976,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
6961
6976
 
6962
6977
  if (!ok) {
6963
6978
  fprintf(stderr, "%s: failed to read tensor data\n", __func__);
6964
- fclose(file);
6965
6979
  lm_ggml_free(ctx_data);
6966
6980
  lm_gguf_free(ctx);
6967
6981
  return NULL;
@@ -7000,7 +7014,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
7000
7014
 
7001
7015
  if (!ok) {
7002
7016
  fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
7003
- fclose(file);
7004
7017
  lm_ggml_free(ctx_data);
7005
7018
  lm_gguf_free(ctx);
7006
7019
  return NULL;
@@ -7009,11 +7022,21 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
7009
7022
  lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
7010
7023
  }
7011
7024
 
7012
- fclose(file);
7013
-
7014
7025
  return ctx;
7015
7026
  }
7016
7027
 
7028
+ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
7029
+ FILE * file = lm_ggml_fopen(fname, "rb");
7030
+ if (!file) {
7031
+ fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
7032
+ return NULL;
7033
+ }
7034
+
7035
+ struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
7036
+ fclose(file);
7037
+ return result;
7038
+ }
7039
+
7017
7040
  void lm_gguf_free(struct lm_gguf_context * ctx) {
7018
7041
  if (ctx == NULL) {
7019
7042
  return;
@@ -7473,13 +7496,7 @@ void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, co
7473
7496
  // fwrite(val, sizeof(char), size, file);
7474
7497
  //}
7475
7498
 
7476
- struct lm_gguf_buf {
7477
- void * data;
7478
- size_t size;
7479
- size_t offset;
7480
- };
7481
-
7482
- static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7499
+ struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7483
7500
  struct lm_gguf_buf buf = {
7484
7501
  /*buf.data =*/ size == 0 ? NULL : LM_GGML_CALLOC(1, size),
7485
7502
  /*buf.size =*/ size,
@@ -7489,7 +7506,7 @@ static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
7489
7506
  return buf;
7490
7507
  }
7491
7508
 
7492
- static void lm_gguf_buf_free(struct lm_gguf_buf buf) {
7509
+ void lm_gguf_buf_free(struct lm_gguf_buf buf) {
7493
7510
  if (buf.data) {
7494
7511
  LM_GGML_FREE(buf.data);
7495
7512
  }
@@ -7527,7 +7544,7 @@ static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t
7527
7544
  buf->offset += el_size;
7528
7545
  }
7529
7546
 
7530
- static void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
7547
+ void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
7531
7548
  // write header
7532
7549
  lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
7533
7550
  lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
package/cpp/ggml.h CHANGED
@@ -1565,17 +1565,6 @@ extern "C" {
1565
1565
  int d1, // dilation dimension 1
1566
1566
  bool is_2D);
1567
1567
 
1568
- LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
1569
- struct lm_ggml_context * ctx,
1570
- struct lm_ggml_tensor * a, // convolution kernel
1571
- struct lm_ggml_tensor * b, // data
1572
- int s0, // stride dimension 0
1573
- int s1, // stride dimension 1
1574
- int p0, // padding dimension 0
1575
- int p1, // padding dimension 1
1576
- int d0, // dilation dimension 0
1577
- int d1); // dilation dimension 1
1578
-
1579
1568
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
1580
1569
  struct lm_ggml_context * ctx,
1581
1570
  struct lm_ggml_tensor * a, // convolution kernel
@@ -1593,6 +1582,23 @@ extern "C" {
1593
1582
  int s, // stride
1594
1583
  int d); // dilation
1595
1584
 
1585
+ // depthwise
1586
+ // TODO: this is very likely wrong for some cases! - needs more testing
1587
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
1588
+ struct lm_ggml_context * ctx,
1589
+ struct lm_ggml_tensor * a, // convolution kernel
1590
+ struct lm_ggml_tensor * b, // data
1591
+ int s0, // stride
1592
+ int p0, // padding
1593
+ int d0); // dilation
1594
+
1595
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
1596
+ struct lm_ggml_context * ctx,
1597
+ struct lm_ggml_tensor * a, // convolution kernel
1598
+ struct lm_ggml_tensor * b, // data
1599
+ int s0, // stride
1600
+ int d0); // dilation
1601
+
1596
1602
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
1597
1603
  struct lm_ggml_context * ctx,
1598
1604
  struct lm_ggml_tensor * a, // convolution kernel
@@ -1612,7 +1618,6 @@ extern "C" {
1612
1618
  int d0, // dilation dimension 0
1613
1619
  int d1); // dilation dimension 1
1614
1620
 
1615
-
1616
1621
  // kernel size is a->ne[0] x a->ne[1]
1617
1622
  // stride is equal to kernel size
1618
1623
  // padding is zero
@@ -1639,6 +1644,18 @@ extern "C" {
1639
1644
  struct lm_ggml_tensor * a,
1640
1645
  struct lm_ggml_tensor * b);
1641
1646
 
1647
+ // depthwise
1648
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
1649
+ struct lm_ggml_context * ctx,
1650
+ struct lm_ggml_tensor * a, // convolution kernel
1651
+ struct lm_ggml_tensor * b, // data
1652
+ int s0, // stride dimension 0
1653
+ int s1, // stride dimension 1
1654
+ int p0, // padding dimension 0
1655
+ int p1, // padding dimension 1
1656
+ int d0, // dilation dimension 0
1657
+ int d1); // dilation dimension 1
1658
+
1642
1659
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
1643
1660
  struct lm_ggml_context * ctx,
1644
1661
  struct lm_ggml_tensor * a,
@@ -822,15 +822,11 @@ llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar)
822
822
  return grammar->stacks;
823
823
  }
824
824
 
825
- void llama_grammar_accept(
826
- const llama_grammar_rules & rules,
827
- const llama_grammar_stacks & stacks,
828
- const uint32_t chr,
829
- llama_grammar_stacks & stacks_new) {
830
- stacks_new.clear();
831
- stacks_new.reserve(stacks.size());
825
+ void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
826
+ llama_grammar_stacks stacks_new;
827
+ stacks_new.reserve(grammar->stacks.size());
832
828
 
833
- for (const auto & stack : stacks) {
829
+ for (const auto & stack : grammar->stacks) {
834
830
  if (stack.empty()) {
835
831
  continue;
836
832
  }
@@ -844,9 +840,11 @@ void llama_grammar_accept(
844
840
  if (!llama_grammar_is_end_of_sequence(pos)) {
845
841
  new_stack.push_back(pos);
846
842
  }
847
- llama_grammar_advance_stack(rules, new_stack, stacks_new);
843
+ llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
848
844
  }
849
845
  }
846
+
847
+ grammar->stacks = std::move(stacks_new);
850
848
  }
851
849
 
852
850
  llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
@@ -1051,7 +1049,12 @@ void llama_grammar_free_impl(struct llama_grammar * grammar) {
1051
1049
  }
1052
1050
 
1053
1051
  struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1054
- llama_grammar * result = new llama_grammar { grammar.vocab, grammar.rules, grammar.stacks, grammar.partial_utf8, };
1052
+ llama_grammar * result = new llama_grammar {
1053
+ grammar.vocab,
1054
+ grammar.rules,
1055
+ grammar.stacks,
1056
+ grammar.partial_utf8,
1057
+ };
1055
1058
 
1056
1059
  // redirect elements in stacks to point to new rules
1057
1060
  for (size_t is = 0; is < result->stacks.size(); is++) {
@@ -1059,7 +1062,7 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra
1059
1062
  for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1060
1063
  for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1061
1064
  if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1062
- result->stacks[is][ie] = &result->rules[ir0][ir1];
1065
+ result->stacks[is][ie] = &result->rules[ir0][ir1];
1063
1066
  }
1064
1067
  }
1065
1068
  }
@@ -1126,11 +1129,8 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
1126
1129
  const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1127
1130
  const auto & code_points = decoded.first;
1128
1131
 
1129
- llama_grammar_stacks stacks_new;
1130
-
1131
1132
  for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1132
- llama_grammar_accept(grammar.rules, grammar.stacks, *it, stacks_new);
1133
- grammar.stacks = std::move(stacks_new);
1133
+ llama_grammar_accept(&grammar, *it);
1134
1134
  }
1135
1135
 
1136
1136
  grammar.partial_utf8 = decoded.second;