llama_cpp 0.9.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -233,24 +233,6 @@ inline static void * ggml_aligned_malloc(size_t size) {
233
233
  #define UNUSED GGML_UNUSED
234
234
  #define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
235
235
 
236
- //
237
- // tensor access macros
238
- //
239
-
240
- #define GGML_TENSOR_UNARY_OP_LOCALS \
241
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
242
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
243
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
244
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
245
-
246
- #define GGML_TENSOR_BINARY_OP_LOCALS \
247
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
248
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
249
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
250
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
251
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
252
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
253
-
254
236
  #if defined(GGML_USE_ACCELERATE)
255
237
  #include <Accelerate/Accelerate.h>
256
238
  #if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
@@ -1613,6 +1595,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1613
1595
  "GROUP_NORM",
1614
1596
 
1615
1597
  "MUL_MAT",
1598
+ "MUL_MAT_ID",
1616
1599
  "OUT_PROD",
1617
1600
 
1618
1601
  "SCALE",
@@ -1640,6 +1623,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1640
1623
  "POOL_1D",
1641
1624
  "POOL_2D",
1642
1625
  "UPSCALE",
1626
+ "ARGSORT",
1643
1627
 
1644
1628
  "FLASH_ATTN",
1645
1629
  "FLASH_FF",
@@ -1666,7 +1650,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1666
1650
  "CROSS_ENTROPY_LOSS_BACK",
1667
1651
  };
1668
1652
 
1669
- static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
1653
+ static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
1670
1654
 
1671
1655
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1672
1656
  "none",
@@ -1695,6 +1679,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1695
1679
  "group_norm(x)",
1696
1680
 
1697
1681
  "X*Y",
1682
+ "X[i]*Y",
1698
1683
  "X*Y",
1699
1684
 
1700
1685
  "x*v",
@@ -1722,6 +1707,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1722
1707
  "pool_1d(x)",
1723
1708
  "pool_2d(x)",
1724
1709
  "upscale(x)",
1710
+ "argsort(x)",
1725
1711
 
1726
1712
  "flash_attn(x)",
1727
1713
  "flash_ff(x)",
@@ -1748,10 +1734,28 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1748
1734
  "cross_entropy_loss_back(x,y)",
1749
1735
  };
1750
1736
 
1751
- static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
1737
+ static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
1752
1738
 
1753
1739
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1754
1740
 
1741
+
1742
+ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1743
+ "ABS",
1744
+ "SGN",
1745
+ "NEG",
1746
+ "STEP",
1747
+ "TANH",
1748
+ "ELU",
1749
+ "RELU",
1750
+ "GELU",
1751
+ "GELU_QUICK",
1752
+ "SILU",
1753
+ "LEAKY",
1754
+ };
1755
+
1756
+ static_assert(GGML_UNARY_OP_COUNT == 11, "GGML_UNARY_OP_COUNT != 11");
1757
+
1758
+
1755
1759
  static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1756
1760
  static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1757
1761
 
@@ -1771,6 +1775,7 @@ static void ggml_setup_op_has_task_pass(void) {
1771
1775
 
1772
1776
  p[GGML_OP_ACC ] = true;
1773
1777
  p[GGML_OP_MUL_MAT ] = true;
1778
+ p[GGML_OP_MUL_MAT_ID ] = true;
1774
1779
  p[GGML_OP_OUT_PROD ] = true;
1775
1780
  p[GGML_OP_SET ] = true;
1776
1781
  p[GGML_OP_GET_ROWS_BACK ] = true;
@@ -2023,6 +2028,20 @@ const char * ggml_op_symbol(enum ggml_op op) {
2023
2028
  return GGML_OP_SYMBOL[op];
2024
2029
  }
2025
2030
 
2031
+ const char * ggml_unary_op_name(enum ggml_unary_op op) {
2032
+ return GGML_UNARY_OP_NAME[op];
2033
+ }
2034
+
2035
+ const char * ggml_op_desc(const struct ggml_tensor * t) {
2036
+ if (t->op == GGML_OP_UNARY) {
2037
+ enum ggml_unary_op uop = ggml_get_unary_op(t);
2038
+ return ggml_unary_op_name(uop);
2039
+ }
2040
+ else {
2041
+ return ggml_op_name(t->op);
2042
+ }
2043
+ }
2044
+
2026
2045
  size_t ggml_element_size(const struct ggml_tensor * tensor) {
2027
2046
  return ggml_type_size(tensor->type);
2028
2047
  }
@@ -3154,9 +3173,7 @@ static struct ggml_tensor * ggml_add_impl(
3154
3173
  struct ggml_tensor * a,
3155
3174
  struct ggml_tensor * b,
3156
3175
  bool inplace) {
3157
- // TODO: support less-strict constraint
3158
- // GGML_ASSERT(ggml_can_repeat(b, a));
3159
- GGML_ASSERT(ggml_can_repeat_rows(b, a));
3176
+ GGML_ASSERT(ggml_can_repeat(b, a));
3160
3177
 
3161
3178
  bool is_node = false;
3162
3179
 
@@ -3371,9 +3388,7 @@ static struct ggml_tensor * ggml_mul_impl(
3371
3388
  struct ggml_tensor * a,
3372
3389
  struct ggml_tensor * b,
3373
3390
  bool inplace) {
3374
- // TODO: support less-strict constraint
3375
- // GGML_ASSERT(ggml_can_repeat(b, a));
3376
- GGML_ASSERT(ggml_can_repeat_rows(b, a));
3391
+ GGML_ASSERT(ggml_can_repeat(b, a));
3377
3392
 
3378
3393
  bool is_node = false;
3379
3394
 
@@ -3418,7 +3433,7 @@ static struct ggml_tensor * ggml_div_impl(
3418
3433
  struct ggml_tensor * a,
3419
3434
  struct ggml_tensor * b,
3420
3435
  bool inplace) {
3421
- GGML_ASSERT(ggml_are_same_shape(a, b));
3436
+ GGML_ASSERT(ggml_can_repeat(b, a));
3422
3437
 
3423
3438
  bool is_node = false;
3424
3439
 
@@ -4056,6 +4071,49 @@ struct ggml_tensor * ggml_mul_mat(
4056
4071
  return result;
4057
4072
  }
4058
4073
 
4074
+ // ggml_mul_mat_id
4075
+
4076
+ struct ggml_tensor * ggml_mul_mat_id(
4077
+ struct ggml_context * ctx,
4078
+ struct ggml_tensor * as[],
4079
+ struct ggml_tensor * ids,
4080
+ int id,
4081
+ struct ggml_tensor * b) {
4082
+
4083
+ int64_t n_as = ids->ne[0];
4084
+
4085
+ GGML_ASSERT(ids->type == GGML_TYPE_I32);
4086
+ GGML_ASSERT(ggml_is_vector(ids));
4087
+ GGML_ASSERT(n_as > 0 && n_as <= GGML_MAX_SRC - 2);
4088
+ GGML_ASSERT(id >= 0 && id < n_as);
4089
+
4090
+ bool is_node = false;
4091
+
4092
+ if (as[0]->grad || b->grad) {
4093
+ is_node = true;
4094
+ }
4095
+
4096
+ const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] };
4097
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne);
4098
+
4099
+ ggml_set_op_params_i32(result, 0, id);
4100
+
4101
+ result->op = GGML_OP_MUL_MAT_ID;
4102
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4103
+ result->src[0] = ids;
4104
+ result->src[1] = b;
4105
+
4106
+ for (int64_t i = 0; i < n_as; i++) {
4107
+ struct ggml_tensor * a = as[i];
4108
+ GGML_ASSERT(ggml_are_same_shape(as[0], a));
4109
+ GGML_ASSERT(ggml_can_mul_mat(a, b));
4110
+ GGML_ASSERT(!ggml_is_transposed(a));
4111
+ result->src[i + 2] = a;
4112
+ }
4113
+
4114
+ return result;
4115
+ }
4116
+
4059
4117
  // ggml_out_prod
4060
4118
 
4061
4119
  struct ggml_tensor * ggml_out_prod(
@@ -4209,7 +4267,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
4209
4267
  struct ggml_tensor * b,
4210
4268
  size_t nb1,
4211
4269
  size_t offset) {
4212
- return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
4270
+ return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
4213
4271
  }
4214
4272
 
4215
4273
  // ggml_cpy
@@ -5468,6 +5526,43 @@ struct ggml_tensor * ggml_upscale(
5468
5526
  return ggml_upscale_impl(ctx, a, scale_factor);
5469
5527
  }
5470
5528
 
5529
+ // ggml_argsort
5530
+
5531
+ struct ggml_tensor * ggml_argsort(
5532
+ struct ggml_context * ctx,
5533
+ struct ggml_tensor * a,
5534
+ enum ggml_sort_order order) {
5535
+ bool is_node = false;
5536
+
5537
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, a->ne);
5538
+
5539
+ ggml_set_op_params_i32(result, 0, (int32_t) order);
5540
+
5541
+ result->op = GGML_OP_ARGSORT;
5542
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5543
+ result->src[0] = a;
5544
+
5545
+ return result;
5546
+ }
5547
+
5548
+ // ggml_top_k
5549
+
5550
+ struct ggml_tensor * ggml_top_k(
5551
+ struct ggml_context * ctx,
5552
+ struct ggml_tensor * a,
5553
+ int k) {
5554
+ GGML_ASSERT(a->ne[0] >= k);
5555
+
5556
+ struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_DESC);
5557
+
5558
+ result = ggml_view_4d(ctx, result,
5559
+ k, result->ne[1], result->ne[2], result->ne[3],
5560
+ result->nb[1], result->nb[2], result->nb[3],
5561
+ 0);
5562
+
5563
+ return result;
5564
+ }
5565
+
5471
5566
  // ggml_flash_attn
5472
5567
 
5473
5568
  struct ggml_tensor * ggml_flash_attn(
@@ -6827,7 +6922,7 @@ static void ggml_compute_forward_add_f32(
6827
6922
  const struct ggml_tensor * src0,
6828
6923
  const struct ggml_tensor * src1,
6829
6924
  struct ggml_tensor * dst) {
6830
- GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst));
6925
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
6831
6926
 
6832
6927
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
6833
6928
  return;
@@ -6860,16 +6955,19 @@ static void ggml_compute_forward_add_f32(
6860
6955
  const int64_t i13 = i03 % ne13;
6861
6956
  const int64_t i12 = i02 % ne12;
6862
6957
  const int64_t i11 = i01 % ne11;
6958
+ const int64_t nr0 = ne00 / ne10;
6863
6959
 
6864
6960
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
6865
6961
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
6866
6962
  float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
6867
6963
 
6964
+ for (int64_t r = 0; r < nr0; ++r) {
6868
6965
  #ifdef GGML_USE_ACCELERATE
6869
- vDSP_vadd(src0_ptr, 1, src1_ptr, 1, dst_ptr, 1, ne00);
6966
+ vDSP_vadd(src0_ptr + r*ne10, 1, src1_ptr, 1, dst_ptr + r*ne10, 1, ne10);
6870
6967
  #else
6871
- ggml_vec_add_f32(ne00, dst_ptr, src0_ptr, src1_ptr);
6968
+ ggml_vec_add_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
6872
6969
  #endif
6970
+ }
6873
6971
  }
6874
6972
  } else {
6875
6973
  // src1 is not contiguous
@@ -6886,8 +6984,9 @@ static void ggml_compute_forward_add_f32(
6886
6984
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
6887
6985
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
6888
6986
 
6889
- for (int i0 = 0; i0 < ne0; i0++) {
6890
- float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10);
6987
+ for (int64_t i0 = 0; i0 < ne0; ++i0) {
6988
+ const int64_t i10 = i0 % ne10;
6989
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
6891
6990
 
6892
6991
  dst_ptr[i0] = src0_ptr[i0] + *src1_ptr;
6893
6992
  }
@@ -7607,7 +7706,7 @@ static void ggml_compute_forward_mul_f32(
7607
7706
  const struct ggml_tensor * src0,
7608
7707
  const struct ggml_tensor * src1,
7609
7708
  struct ggml_tensor * dst) {
7610
- GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst));
7709
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
7611
7710
 
7612
7711
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7613
7712
  return;
@@ -7630,7 +7729,6 @@ static void ggml_compute_forward_mul_f32(
7630
7729
 
7631
7730
  GGML_ASSERT( nb0 == sizeof(float));
7632
7731
  GGML_ASSERT(nb00 == sizeof(float));
7633
- GGML_ASSERT(ne00 == ne10);
7634
7732
 
7635
7733
  if (nb10 == sizeof(float)) {
7636
7734
  for (int64_t ir = ith; ir < nr; ir += nth) {
@@ -7642,20 +7740,21 @@ static void ggml_compute_forward_mul_f32(
7642
7740
  const int64_t i13 = i03 % ne13;
7643
7741
  const int64_t i12 = i02 % ne12;
7644
7742
  const int64_t i11 = i01 % ne11;
7743
+ const int64_t nr0 = ne00 / ne10;
7645
7744
 
7646
7745
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7647
7746
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7648
7747
  float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
7649
7748
 
7749
+ for (int64_t r = 0 ; r < nr0; ++r) {
7650
7750
  #ifdef GGML_USE_ACCELERATE
7651
- UNUSED(ggml_vec_mul_f32);
7751
+ UNUSED(ggml_vec_mul_f32);
7652
7752
 
7653
- vDSP_vmul( src0_ptr, 1, src1_ptr, 1, dst_ptr, 1, ne00);
7753
+ vDSP_vmul(src0_ptr + r*ne10, 1, src1_ptr, 1, dst_ptr + r*ne10, 1, ne10);
7654
7754
  #else
7655
- ggml_vec_mul_f32(ne00, dst_ptr, src0_ptr, src1_ptr);
7755
+ ggml_vec_mul_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
7656
7756
  #endif
7657
- // }
7658
- // }
7757
+ }
7659
7758
  }
7660
7759
  } else {
7661
7760
  // src1 is not contiguous
@@ -7673,8 +7772,9 @@ static void ggml_compute_forward_mul_f32(
7673
7772
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7674
7773
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7675
7774
 
7676
- for (int64_t i0 = 0; i0 < ne00; i0++) {
7677
- float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10);
7775
+ for (int64_t i0 = 0; i0 < ne00; ++i0) {
7776
+ const int64_t i10 = i0 % ne10;
7777
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
7678
7778
 
7679
7779
  dst_ptr[i0] = src0_ptr[i0] * (*src1_ptr);
7680
7780
  }
@@ -7708,14 +7808,16 @@ static void ggml_compute_forward_div_f32(
7708
7808
  const struct ggml_tensor * src0,
7709
7809
  const struct ggml_tensor * src1,
7710
7810
  struct ggml_tensor * dst) {
7711
- assert(params->ith == 0);
7712
- assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
7811
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
7713
7812
 
7714
7813
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7715
7814
  return;
7716
7815
  }
7717
7816
 
7718
- const int nr = ggml_nrows(src0);
7817
+ const int ith = params->ith;
7818
+ const int nth = params->nth;
7819
+
7820
+ const int64_t nr = ggml_nrows(src0);
7719
7821
 
7720
7822
  GGML_TENSOR_BINARY_OP_LOCALS
7721
7823
 
@@ -7723,41 +7825,50 @@ static void ggml_compute_forward_div_f32(
7723
7825
  GGML_ASSERT(nb00 == sizeof(float));
7724
7826
 
7725
7827
  if (nb10 == sizeof(float)) {
7726
- for (int ir = 0; ir < nr; ++ir) {
7727
- // src0, src1 and dst are same shape => same indices
7728
- const int i3 = ir/(ne2*ne1);
7729
- const int i2 = (ir - i3*ne2*ne1)/ne1;
7730
- const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
7828
+ for (int64_t ir = ith; ir < nr; ir += nth) {
7829
+ // src0 and dst are same shape => same indices
7830
+ const int64_t i03 = ir/(ne02*ne01);
7831
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
7832
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
7833
+
7834
+ const int64_t i13 = i03 % ne13;
7835
+ const int64_t i12 = i02 % ne12;
7836
+ const int64_t i11 = i01 % ne11;
7837
+ const int64_t nr0 = ne00 / ne10;
7838
+
7839
+ float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7840
+ float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7841
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
7731
7842
 
7843
+ for (int64_t r = 0; r < nr0; ++r) {
7732
7844
  #ifdef GGML_USE_ACCELERATE
7733
- UNUSED(ggml_vec_div_f32);
7845
+ UNUSED(ggml_vec_div_f32);
7734
7846
 
7735
- vDSP_vdiv(
7736
- (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
7737
- (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
7738
- (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1,
7739
- ne0);
7847
+ vDSP_vdiv(src1_ptr, 1, src0_ptr + r*ne10, 1, dst_ptr + r*ne10, 1, ne10);
7740
7848
  #else
7741
- ggml_vec_div_f32(ne0,
7742
- (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
7743
- (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01),
7744
- (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11));
7849
+ ggml_vec_div_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
7745
7850
  #endif
7746
- // }
7747
- // }
7851
+ }
7748
7852
  }
7749
7853
  } else {
7750
7854
  // src1 is not contiguous
7751
- for (int ir = 0; ir < nr; ++ir) {
7752
- // src0, src1 and dst are same shape => same indices
7753
- const int i3 = ir/(ne2*ne1);
7754
- const int i2 = (ir - i3*ne2*ne1)/ne1;
7755
- const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
7855
+ for (int64_t ir = ith; ir < nr; ir += nth) {
7856
+ // src0 and dst are same shape => same indices
7857
+ // src1 is broadcastable across src0 and dst in i1, i2, i3
7858
+ const int64_t i03 = ir/(ne02*ne01);
7859
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
7860
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
7756
7861
 
7757
- float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 );
7758
- float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
7759
- for (int i0 = 0; i0 < ne0; i0++) {
7760
- float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10);
7862
+ const int64_t i13 = i03 % ne13;
7863
+ const int64_t i12 = i02 % ne12;
7864
+ const int64_t i11 = i01 % ne11;
7865
+
7866
+ float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7867
+ float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7868
+
7869
+ for (int64_t i0 = 0; i0 < ne00; ++i0) {
7870
+ const int64_t i10 = i0 % ne10;
7871
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
7761
7872
 
7762
7873
  dst_ptr[i0] = src0_ptr[i0] / (*src1_ptr);
7763
7874
  }
@@ -8203,7 +8314,7 @@ static void ggml_compute_forward_repeat_f16(
8203
8314
  return;
8204
8315
  }
8205
8316
 
8206
- GGML_TENSOR_UNARY_OP_LOCALS;
8317
+ GGML_TENSOR_UNARY_OP_LOCALS
8207
8318
 
8208
8319
  // guaranteed to be an integer due to the check in ggml_can_repeat
8209
8320
  const int nr0 = (int)(ne0/ne00);
@@ -8348,6 +8459,7 @@ static void ggml_compute_forward_concat_f32(
8348
8459
  GGML_ASSERT(src0->nb[0] == sizeof(float));
8349
8460
 
8350
8461
  const int ith = params->ith;
8462
+ const int nth = params->nth;
8351
8463
 
8352
8464
  GGML_TENSOR_BINARY_OP_LOCALS
8353
8465
 
@@ -8357,7 +8469,7 @@ static void ggml_compute_forward_concat_f32(
8357
8469
  GGML_ASSERT(nb10 == sizeof(float));
8358
8470
 
8359
8471
  for (int i3 = 0; i3 < ne3; i3++) {
8360
- for (int i2 = ith; i2 < ne2; i2++) {
8472
+ for (int i2 = ith; i2 < ne2; i2 += nth) {
8361
8473
  if (i2 < ne02) { // src0
8362
8474
  for (int i1 = 0; i1 < ne1; i1++) {
8363
8475
  for (int i0 = 0; i0 < ne0; i0++) {
@@ -9517,6 +9629,8 @@ static void ggml_compute_forward_mul_mat(
9517
9629
  char * wdata = params->wdata;
9518
9630
  const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
9519
9631
 
9632
+ assert(params->wsize >= ne11*ne12*ne13*row_size);
9633
+
9520
9634
  for (int64_t i13 = 0; i13 < ne13; ++i13) {
9521
9635
  for (int64_t i12 = 0; i12 < ne12; ++i12) {
9522
9636
  for (int64_t i11 = 0; i11 < ne11; ++i11) {
@@ -9618,6 +9732,26 @@ static void ggml_compute_forward_mul_mat(
9618
9732
  }
9619
9733
  }
9620
9734
 
9735
+ // ggml_compute_forward_mul_mat_id
9736
+
9737
+ static void ggml_compute_forward_mul_mat_id(
9738
+ const struct ggml_compute_params * params,
9739
+ struct ggml_tensor * dst) {
9740
+
9741
+ const struct ggml_tensor * ids = dst->src[0];
9742
+ const struct ggml_tensor * src1 = dst->src[1];
9743
+
9744
+ const int id = ggml_get_op_params_i32(dst, 0);
9745
+
9746
+ const int a_id = ((int32_t *)ids->data)[id];
9747
+
9748
+ GGML_ASSERT(a_id >= 0 && a_id < ids->ne[0]);
9749
+
9750
+ const struct ggml_tensor * src0 = dst->src[a_id + 2];
9751
+
9752
+ ggml_compute_forward_mul_mat(params, src0, src1, dst);
9753
+ }
9754
+
9621
9755
  // ggml_compute_forward_out_prod
9622
9756
 
9623
9757
  static void ggml_compute_forward_out_prod_f32(
@@ -12021,6 +12155,67 @@ static void ggml_compute_forward_upscale(
12021
12155
  }
12022
12156
  }
12023
12157
 
12158
+ // ggml_compute_forward_argsort
12159
+
12160
+ static void ggml_compute_forward_argsort_f32(
12161
+ const struct ggml_compute_params * params,
12162
+ const struct ggml_tensor * src0,
12163
+ struct ggml_tensor * dst) {
12164
+
12165
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12166
+ return;
12167
+ }
12168
+
12169
+ GGML_TENSOR_UNARY_OP_LOCALS
12170
+
12171
+ GGML_ASSERT(nb0 == sizeof(float));
12172
+
12173
+ const int ith = params->ith;
12174
+ const int nth = params->nth;
12175
+
12176
+ const int64_t nr = ggml_nrows(src0);
12177
+
12178
+ enum ggml_sort_order order = (enum ggml_sort_order) ggml_get_op_params_i32(dst, 0);
12179
+
12180
+ for (int64_t i = ith; i < nr; i += nth) {
12181
+ int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
12182
+ const float * src_data = (float *)((char *) src0->data + i*nb01);
12183
+
12184
+ for (int64_t j = 0; j < ne0; j++) {
12185
+ dst_data[j] = j;
12186
+ }
12187
+
12188
+ // C doesn't have a functional sort, so we do a bubble sort instead
12189
+ for (int64_t j = 0; j < ne0; j++) {
12190
+ for (int64_t k = j + 1; k < ne0; k++) {
12191
+ if ((order == GGML_SORT_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
12192
+ (order == GGML_SORT_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
12193
+ int32_t tmp = dst_data[j];
12194
+ dst_data[j] = dst_data[k];
12195
+ dst_data[k] = tmp;
12196
+ }
12197
+ }
12198
+ }
12199
+ }
12200
+ }
12201
+
12202
+ static void ggml_compute_forward_argsort(
12203
+ const struct ggml_compute_params * params,
12204
+ const struct ggml_tensor * src0,
12205
+ struct ggml_tensor * dst) {
12206
+
12207
+ switch (src0->type) {
12208
+ case GGML_TYPE_F32:
12209
+ {
12210
+ ggml_compute_forward_argsort_f32(params, src0, dst);
12211
+ } break;
12212
+ default:
12213
+ {
12214
+ GGML_ASSERT(false);
12215
+ } break;
12216
+ }
12217
+ }
12218
+
12024
12219
  // ggml_compute_forward_flash_attn
12025
12220
 
12026
12221
  static void ggml_compute_forward_flash_attn_f32(
@@ -13844,6 +14039,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
13844
14039
  {
13845
14040
  ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor);
13846
14041
  } break;
14042
+ case GGML_OP_MUL_MAT_ID:
14043
+ {
14044
+ ggml_compute_forward_mul_mat_id(params, tensor);
14045
+ } break;
13847
14046
  case GGML_OP_OUT_PROD:
13848
14047
  {
13849
14048
  ggml_compute_forward_out_prod(params, tensor->src[0], tensor->src[1], tensor);
@@ -13948,6 +14147,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
13948
14147
  {
13949
14148
  ggml_compute_forward_upscale(params, tensor->src[0], tensor);
13950
14149
  } break;
14150
+ case GGML_OP_ARGSORT:
14151
+ {
14152
+ ggml_compute_forward_argsort(params, tensor->src[0], tensor);
14153
+ } break;
13951
14154
  case GGML_OP_FLASH_ATTN:
13952
14155
  {
13953
14156
  const int32_t t = ggml_get_op_params_i32(tensor, 0);
@@ -14598,6 +14801,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
14598
14801
  zero_table);
14599
14802
  }
14600
14803
  } break;
14804
+ case GGML_OP_MUL_MAT_ID:
14805
+ {
14806
+ GGML_ASSERT(false); // TODO: not implemented
14807
+ } break;
14601
14808
  case GGML_OP_OUT_PROD:
14602
14809
  {
14603
14810
  GGML_ASSERT(false); // TODO: not implemented
@@ -14936,6 +15143,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
14936
15143
  {
14937
15144
  GGML_ASSERT(false); // TODO: not implemented
14938
15145
  } break;
15146
+ case GGML_OP_ARGSORT:
15147
+ {
15148
+ GGML_ASSERT(false); // TODO: not implemented
15149
+ } break;
14939
15150
  case GGML_OP_FLASH_ATTN:
14940
15151
  {
14941
15152
  struct ggml_tensor * flash_grad = NULL;
@@ -15296,12 +15507,8 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
15296
15507
  return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
15297
15508
  }
15298
15509
 
15299
- struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgraph * cgraph0, int i0, int i1) {
15300
- const size_t obj_size = sizeof(struct ggml_cgraph);
15301
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
15302
- struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
15303
-
15304
- *cgraph = (struct ggml_cgraph) {
15510
+ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
15511
+ struct ggml_cgraph cgraph = {
15305
15512
  /*.size =*/ 0,
15306
15513
  /*.n_nodes =*/ i1 - i0,
15307
15514
  /*.n_leafs =*/ 0,
@@ -15536,7 +15743,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15536
15743
  n_tasks = n_threads;
15537
15744
  } break;
15538
15745
  case GGML_OP_SUB:
15539
- case GGML_OP_DIV:
15540
15746
  case GGML_OP_SQR:
15541
15747
  case GGML_OP_SQRT:
15542
15748
  case GGML_OP_LOG:
@@ -15569,10 +15775,13 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15569
15775
  {
15570
15776
  n_tasks = n_threads;
15571
15777
  } break;
15778
+ default:
15779
+ GGML_ASSERT(false);
15572
15780
  }
15573
15781
  break;
15574
15782
  case GGML_OP_SILU_BACK:
15575
15783
  case GGML_OP_MUL:
15784
+ case GGML_OP_DIV:
15576
15785
  case GGML_OP_NORM:
15577
15786
  case GGML_OP_RMS_NORM:
15578
15787
  case GGML_OP_RMS_NORM_BACK:
@@ -15610,6 +15819,11 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15610
15819
  }
15611
15820
  #endif
15612
15821
  } break;
15822
+ case GGML_OP_MUL_MAT_ID:
15823
+ {
15824
+ // FIXME: blas
15825
+ n_tasks = n_threads;
15826
+ } break;
15613
15827
  case GGML_OP_OUT_PROD:
15614
15828
  {
15615
15829
  n_tasks = n_threads;
@@ -15629,7 +15843,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15629
15843
  } break;
15630
15844
  case GGML_OP_DIAG_MASK_ZERO:
15631
15845
  case GGML_OP_DIAG_MASK_INF:
15632
- case GGML_OP_SOFT_MAX:
15633
15846
  case GGML_OP_SOFT_MAX_BACK:
15634
15847
  case GGML_OP_ROPE:
15635
15848
  case GGML_OP_ROPE_BACK:
@@ -15645,6 +15858,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15645
15858
  {
15646
15859
  n_tasks = 1; //TODO
15647
15860
  } break;
15861
+ case GGML_OP_SOFT_MAX:
15862
+ {
15863
+ n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
15864
+ } break;
15648
15865
  case GGML_OP_CONV_TRANSPOSE_1D:
15649
15866
  {
15650
15867
  n_tasks = n_threads;
@@ -15666,6 +15883,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15666
15883
  {
15667
15884
  n_tasks = n_threads;
15668
15885
  } break;
15886
+ case GGML_OP_ARGSORT:
15887
+ {
15888
+ n_tasks = n_threads;
15889
+ } break;
15669
15890
  case GGML_OP_FLASH_ATTN:
15670
15891
  {
15671
15892
  n_tasks = n_threads;
@@ -15728,6 +15949,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15728
15949
  {
15729
15950
  n_tasks = 1;
15730
15951
  } break;
15952
+ case GGML_OP_COUNT:
15953
+ {
15954
+ GGML_ASSERT(false);
15955
+ } break;
15731
15956
  default:
15732
15957
  {
15733
15958
  fprintf(stderr, "%s: op not implemented: ", __func__);
@@ -15876,18 +16101,16 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15876
16101
 
15877
16102
  // thread scheduling for the different operations + work buffer size estimation
15878
16103
  for (int i = 0; i < cgraph->n_nodes; i++) {
15879
- int n_tasks = 1;
15880
-
15881
16104
  struct ggml_tensor * node = cgraph->nodes[i];
15882
16105
 
16106
+ const int n_tasks = ggml_get_n_tasks(node, n_threads);
16107
+
15883
16108
  size_t cur = 0;
15884
16109
 
15885
16110
  switch (node->op) {
15886
16111
  case GGML_OP_CPY:
15887
16112
  case GGML_OP_DUP:
15888
16113
  {
15889
- n_tasks = n_threads;
15890
-
15891
16114
  if (ggml_is_quantized(node->type)) {
15892
16115
  cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
15893
16116
  }
@@ -15895,16 +16118,12 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15895
16118
  case GGML_OP_ADD:
15896
16119
  case GGML_OP_ADD1:
15897
16120
  {
15898
- n_tasks = n_threads;
15899
-
15900
16121
  if (ggml_is_quantized(node->src[0]->type)) {
15901
16122
  cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
15902
16123
  }
15903
16124
  } break;
15904
16125
  case GGML_OP_ACC:
15905
16126
  {
15906
- n_tasks = n_threads;
15907
-
15908
16127
  if (ggml_is_quantized(node->src[0]->type)) {
15909
16128
  cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks;
15910
16129
  }
@@ -15930,18 +16149,31 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15930
16149
  cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
15931
16150
  }
15932
16151
  } break;
16152
+ case GGML_OP_MUL_MAT_ID:
16153
+ {
16154
+ const struct ggml_tensor * a = node->src[2];
16155
+ const struct ggml_tensor * b = node->src[1];
16156
+ const enum ggml_type vec_dot_type = type_traits[a->type].vec_dot_type;
16157
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
16158
+ if (ggml_compute_forward_mul_mat_use_blas(a, b, node)) {
16159
+ if (a->type != GGML_TYPE_F32) {
16160
+ // here we need memory just for single 2D matrix from src0
16161
+ cur = ggml_type_size(GGML_TYPE_F32)*(a->ne[0]*a->ne[1]);
16162
+ }
16163
+ } else
16164
+ #endif
16165
+ if (b->type != vec_dot_type) {
16166
+ cur = ggml_type_size(vec_dot_type)*ggml_nelements(b)/ggml_blck_size(vec_dot_type);
16167
+ }
16168
+ } break;
15933
16169
  case GGML_OP_OUT_PROD:
15934
16170
  {
15935
- n_tasks = n_threads;
15936
-
15937
16171
  if (ggml_is_quantized(node->src[0]->type)) {
15938
16172
  cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
15939
16173
  }
15940
16174
  } break;
15941
16175
  case GGML_OP_SOFT_MAX:
15942
16176
  {
15943
- n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
15944
-
15945
16177
  cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
15946
16178
  } break;
15947
16179
  case GGML_OP_CONV_TRANSPOSE_1D:
@@ -15969,10 +16201,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15969
16201
  GGML_ASSERT(false);
15970
16202
  }
15971
16203
  } break;
15972
- case GGML_OP_IM2COL:
15973
- {
15974
- n_tasks = n_threads;
15975
- } break;
15976
16204
  case GGML_OP_CONV_TRANSPOSE_2D:
15977
16205
  {
15978
16206
  const int64_t ne00 = node->src[0]->ne[0]; // W
@@ -15989,8 +16217,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15989
16217
  } break;
15990
16218
  case GGML_OP_FLASH_ATTN:
15991
16219
  {
15992
- n_tasks = n_threads;
15993
-
15994
16220
  const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
15995
16221
 
15996
16222
  if (node->src[1]->type == GGML_TYPE_F32) {
@@ -16003,8 +16229,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16003
16229
  } break;
16004
16230
  case GGML_OP_FLASH_FF:
16005
16231
  {
16006
- n_tasks = n_threads;
16007
-
16008
16232
  if (node->src[1]->type == GGML_TYPE_F32) {
16009
16233
  cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
16010
16234
  cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
@@ -16015,8 +16239,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16015
16239
  } break;
16016
16240
  case GGML_OP_FLASH_ATTN_BACK:
16017
16241
  {
16018
- n_tasks = n_threads;
16019
-
16020
16242
  const int64_t D = node->src[0]->ne[0];
16021
16243
  const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
16022
16244
  const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back
@@ -16031,8 +16253,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16031
16253
 
16032
16254
  case GGML_OP_CROSS_ENTROPY_LOSS:
16033
16255
  {
16034
- n_tasks = n_threads;
16035
-
16036
16256
  cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks);
16037
16257
  } break;
16038
16258
  case GGML_OP_COUNT:
@@ -17819,8 +18039,8 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
17819
18039
  memcpy(&qh, &y[i].qh, sizeof(qh));
17820
18040
 
17821
18041
  for (int j = 0; j < QK5_0; j += 2) {
17822
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
17823
- const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));
18042
+ const uint8_t vh0 = ((qh & (1u << (j/2 + 0 ))) >> (j/2 + 0 )) << 4;
18043
+ const uint8_t vh1 = ((qh & (1u << (j/2 + 16))) >> (j/2 + 12));
17824
18044
 
17825
18045
  // cast to 16 bins
17826
18046
  const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
@@ -17849,8 +18069,8 @@ size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t *
17849
18069
  memcpy(&qh, &y[i].qh, sizeof(qh));
17850
18070
 
17851
18071
  for (int j = 0; j < QK5_1; j += 2) {
17852
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
17853
- const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));
18072
+ const uint8_t vh0 = ((qh & (1u << (j/2 + 0 ))) >> (j/2 + 0 )) << 4;
18073
+ const uint8_t vh1 = ((qh & (1u << (j/2 + 16))) >> (j/2 + 12));
17854
18074
 
17855
18075
  // cast to 16 bins
17856
18076
  const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
@@ -18040,6 +18260,7 @@ struct gguf_kv {
18040
18260
 
18041
18261
  struct gguf_header {
18042
18262
  char magic[4];
18263
+
18043
18264
  uint32_t version;
18044
18265
  uint64_t n_tensors; // GGUFv2
18045
18266
  uint64_t n_kv; // GGUFv2
@@ -18129,7 +18350,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18129
18350
 
18130
18351
  for (uint32_t i = 0; i < sizeof(magic); i++) {
18131
18352
  if (magic[i] != GGUF_MAGIC[i]) {
18132
- fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
18353
+ fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
18133
18354
  fclose(file);
18134
18355
  return NULL;
18135
18356
  }
@@ -18144,7 +18365,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18144
18365
  {
18145
18366
  strncpy(ctx->header.magic, magic, 4);
18146
18367
 
18147
-
18148
18368
  ctx->kv = NULL;
18149
18369
  ctx->infos = NULL;
18150
18370
  ctx->data = NULL;