llama_cpp 0.9.5 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -233,24 +233,6 @@ inline static void * ggml_aligned_malloc(size_t size) {
233
233
  #define UNUSED GGML_UNUSED
234
234
  #define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
235
235
 
236
- //
237
- // tensor access macros
238
- //
239
-
240
- #define GGML_TENSOR_UNARY_OP_LOCALS \
241
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
242
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
243
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
244
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
245
-
246
- #define GGML_TENSOR_BINARY_OP_LOCALS \
247
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
248
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
249
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
250
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
251
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
252
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
253
-
254
236
  #if defined(GGML_USE_ACCELERATE)
255
237
  #include <Accelerate/Accelerate.h>
256
238
  #if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
@@ -1613,6 +1595,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1613
1595
  "GROUP_NORM",
1614
1596
 
1615
1597
  "MUL_MAT",
1598
+ "MUL_MAT_ID",
1616
1599
  "OUT_PROD",
1617
1600
 
1618
1601
  "SCALE",
@@ -1640,6 +1623,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1640
1623
  "POOL_1D",
1641
1624
  "POOL_2D",
1642
1625
  "UPSCALE",
1626
+ "ARGSORT",
1643
1627
 
1644
1628
  "FLASH_ATTN",
1645
1629
  "FLASH_FF",
@@ -1666,7 +1650,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1666
1650
  "CROSS_ENTROPY_LOSS_BACK",
1667
1651
  };
1668
1652
 
1669
- static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
1653
+ static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
1670
1654
 
1671
1655
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1672
1656
  "none",
@@ -1695,6 +1679,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1695
1679
  "group_norm(x)",
1696
1680
 
1697
1681
  "X*Y",
1682
+ "X[i]*Y",
1698
1683
  "X*Y",
1699
1684
 
1700
1685
  "x*v",
@@ -1722,6 +1707,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1722
1707
  "pool_1d(x)",
1723
1708
  "pool_2d(x)",
1724
1709
  "upscale(x)",
1710
+ "argsort(x)",
1725
1711
 
1726
1712
  "flash_attn(x)",
1727
1713
  "flash_ff(x)",
@@ -1748,10 +1734,28 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1748
1734
  "cross_entropy_loss_back(x,y)",
1749
1735
  };
1750
1736
 
1751
- static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
1737
+ static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
1752
1738
 
1753
1739
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1754
1740
 
1741
+
1742
+ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1743
+ "ABS",
1744
+ "SGN",
1745
+ "NEG",
1746
+ "STEP",
1747
+ "TANH",
1748
+ "ELU",
1749
+ "RELU",
1750
+ "GELU",
1751
+ "GELU_QUICK",
1752
+ "SILU",
1753
+ "LEAKY",
1754
+ };
1755
+
1756
+ static_assert(GGML_UNARY_OP_COUNT == 11, "GGML_UNARY_OP_COUNT != 11");
1757
+
1758
+
1755
1759
  static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1756
1760
  static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1757
1761
 
@@ -1771,6 +1775,7 @@ static void ggml_setup_op_has_task_pass(void) {
1771
1775
 
1772
1776
  p[GGML_OP_ACC ] = true;
1773
1777
  p[GGML_OP_MUL_MAT ] = true;
1778
+ p[GGML_OP_MUL_MAT_ID ] = true;
1774
1779
  p[GGML_OP_OUT_PROD ] = true;
1775
1780
  p[GGML_OP_SET ] = true;
1776
1781
  p[GGML_OP_GET_ROWS_BACK ] = true;
@@ -2023,6 +2028,20 @@ const char * ggml_op_symbol(enum ggml_op op) {
2023
2028
  return GGML_OP_SYMBOL[op];
2024
2029
  }
2025
2030
 
2031
+ const char * ggml_unary_op_name(enum ggml_unary_op op) {
2032
+ return GGML_UNARY_OP_NAME[op];
2033
+ }
2034
+
2035
+ const char * ggml_op_desc(const struct ggml_tensor * t) {
2036
+ if (t->op == GGML_OP_UNARY) {
2037
+ enum ggml_unary_op uop = ggml_get_unary_op(t);
2038
+ return ggml_unary_op_name(uop);
2039
+ }
2040
+ else {
2041
+ return ggml_op_name(t->op);
2042
+ }
2043
+ }
2044
+
2026
2045
  size_t ggml_element_size(const struct ggml_tensor * tensor) {
2027
2046
  return ggml_type_size(tensor->type);
2028
2047
  }
@@ -3154,9 +3173,7 @@ static struct ggml_tensor * ggml_add_impl(
3154
3173
  struct ggml_tensor * a,
3155
3174
  struct ggml_tensor * b,
3156
3175
  bool inplace) {
3157
- // TODO: support less-strict constraint
3158
- // GGML_ASSERT(ggml_can_repeat(b, a));
3159
- GGML_ASSERT(ggml_can_repeat_rows(b, a));
3176
+ GGML_ASSERT(ggml_can_repeat(b, a));
3160
3177
 
3161
3178
  bool is_node = false;
3162
3179
 
@@ -3371,9 +3388,7 @@ static struct ggml_tensor * ggml_mul_impl(
3371
3388
  struct ggml_tensor * a,
3372
3389
  struct ggml_tensor * b,
3373
3390
  bool inplace) {
3374
- // TODO: support less-strict constraint
3375
- // GGML_ASSERT(ggml_can_repeat(b, a));
3376
- GGML_ASSERT(ggml_can_repeat_rows(b, a));
3391
+ GGML_ASSERT(ggml_can_repeat(b, a));
3377
3392
 
3378
3393
  bool is_node = false;
3379
3394
 
@@ -3418,7 +3433,7 @@ static struct ggml_tensor * ggml_div_impl(
3418
3433
  struct ggml_tensor * a,
3419
3434
  struct ggml_tensor * b,
3420
3435
  bool inplace) {
3421
- GGML_ASSERT(ggml_are_same_shape(a, b));
3436
+ GGML_ASSERT(ggml_can_repeat(b, a));
3422
3437
 
3423
3438
  bool is_node = false;
3424
3439
 
@@ -4056,6 +4071,49 @@ struct ggml_tensor * ggml_mul_mat(
4056
4071
  return result;
4057
4072
  }
4058
4073
 
4074
+ // ggml_mul_mat_id
4075
+
4076
+ struct ggml_tensor * ggml_mul_mat_id(
4077
+ struct ggml_context * ctx,
4078
+ struct ggml_tensor * as[],
4079
+ struct ggml_tensor * ids,
4080
+ int id,
4081
+ struct ggml_tensor * b) {
4082
+
4083
+ int64_t n_as = ids->ne[0];
4084
+
4085
+ GGML_ASSERT(ids->type == GGML_TYPE_I32);
4086
+ GGML_ASSERT(ggml_is_vector(ids));
4087
+ GGML_ASSERT(n_as > 0 && n_as <= GGML_MAX_SRC - 2);
4088
+ GGML_ASSERT(id >= 0 && id < n_as);
4089
+
4090
+ bool is_node = false;
4091
+
4092
+ if (as[0]->grad || b->grad) {
4093
+ is_node = true;
4094
+ }
4095
+
4096
+ const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] };
4097
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne);
4098
+
4099
+ ggml_set_op_params_i32(result, 0, id);
4100
+
4101
+ result->op = GGML_OP_MUL_MAT_ID;
4102
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
4103
+ result->src[0] = ids;
4104
+ result->src[1] = b;
4105
+
4106
+ for (int64_t i = 0; i < n_as; i++) {
4107
+ struct ggml_tensor * a = as[i];
4108
+ GGML_ASSERT(ggml_are_same_shape(as[0], a));
4109
+ GGML_ASSERT(ggml_can_mul_mat(a, b));
4110
+ GGML_ASSERT(!ggml_is_transposed(a));
4111
+ result->src[i + 2] = a;
4112
+ }
4113
+
4114
+ return result;
4115
+ }
4116
+
4059
4117
  // ggml_out_prod
4060
4118
 
4061
4119
  struct ggml_tensor * ggml_out_prod(
@@ -4209,7 +4267,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
4209
4267
  struct ggml_tensor * b,
4210
4268
  size_t nb1,
4211
4269
  size_t offset) {
4212
- return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
4270
+ return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
4213
4271
  }
4214
4272
 
4215
4273
  // ggml_cpy
@@ -5468,6 +5526,43 @@ struct ggml_tensor * ggml_upscale(
5468
5526
  return ggml_upscale_impl(ctx, a, scale_factor);
5469
5527
  }
5470
5528
 
5529
+ // ggml_argsort
5530
+
5531
+ struct ggml_tensor * ggml_argsort(
5532
+ struct ggml_context * ctx,
5533
+ struct ggml_tensor * a,
5534
+ enum ggml_sort_order order) {
5535
+ bool is_node = false;
5536
+
5537
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, a->ne);
5538
+
5539
+ ggml_set_op_params_i32(result, 0, (int32_t) order);
5540
+
5541
+ result->op = GGML_OP_ARGSORT;
5542
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5543
+ result->src[0] = a;
5544
+
5545
+ return result;
5546
+ }
5547
+
5548
+ // ggml_top_k
5549
+
5550
+ struct ggml_tensor * ggml_top_k(
5551
+ struct ggml_context * ctx,
5552
+ struct ggml_tensor * a,
5553
+ int k) {
5554
+ GGML_ASSERT(a->ne[0] >= k);
5555
+
5556
+ struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_DESC);
5557
+
5558
+ result = ggml_view_4d(ctx, result,
5559
+ k, result->ne[1], result->ne[2], result->ne[3],
5560
+ result->nb[1], result->nb[2], result->nb[3],
5561
+ 0);
5562
+
5563
+ return result;
5564
+ }
5565
+
5471
5566
  // ggml_flash_attn
5472
5567
 
5473
5568
  struct ggml_tensor * ggml_flash_attn(
@@ -6827,7 +6922,7 @@ static void ggml_compute_forward_add_f32(
6827
6922
  const struct ggml_tensor * src0,
6828
6923
  const struct ggml_tensor * src1,
6829
6924
  struct ggml_tensor * dst) {
6830
- GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst));
6925
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
6831
6926
 
6832
6927
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
6833
6928
  return;
@@ -6860,16 +6955,19 @@ static void ggml_compute_forward_add_f32(
6860
6955
  const int64_t i13 = i03 % ne13;
6861
6956
  const int64_t i12 = i02 % ne12;
6862
6957
  const int64_t i11 = i01 % ne11;
6958
+ const int64_t nr0 = ne00 / ne10;
6863
6959
 
6864
6960
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
6865
6961
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
6866
6962
  float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
6867
6963
 
6964
+ for (int64_t r = 0; r < nr0; ++r) {
6868
6965
  #ifdef GGML_USE_ACCELERATE
6869
- vDSP_vadd(src0_ptr, 1, src1_ptr, 1, dst_ptr, 1, ne00);
6966
+ vDSP_vadd(src0_ptr + r*ne10, 1, src1_ptr, 1, dst_ptr + r*ne10, 1, ne10);
6870
6967
  #else
6871
- ggml_vec_add_f32(ne00, dst_ptr, src0_ptr, src1_ptr);
6968
+ ggml_vec_add_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
6872
6969
  #endif
6970
+ }
6873
6971
  }
6874
6972
  } else {
6875
6973
  // src1 is not contiguous
@@ -6886,8 +6984,9 @@ static void ggml_compute_forward_add_f32(
6886
6984
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
6887
6985
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
6888
6986
 
6889
- for (int i0 = 0; i0 < ne0; i0++) {
6890
- float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10);
6987
+ for (int64_t i0 = 0; i0 < ne0; ++i0) {
6988
+ const int64_t i10 = i0 % ne10;
6989
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
6891
6990
 
6892
6991
  dst_ptr[i0] = src0_ptr[i0] + *src1_ptr;
6893
6992
  }
@@ -7607,7 +7706,7 @@ static void ggml_compute_forward_mul_f32(
7607
7706
  const struct ggml_tensor * src0,
7608
7707
  const struct ggml_tensor * src1,
7609
7708
  struct ggml_tensor * dst) {
7610
- GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst));
7709
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
7611
7710
 
7612
7711
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7613
7712
  return;
@@ -7630,7 +7729,6 @@ static void ggml_compute_forward_mul_f32(
7630
7729
 
7631
7730
  GGML_ASSERT( nb0 == sizeof(float));
7632
7731
  GGML_ASSERT(nb00 == sizeof(float));
7633
- GGML_ASSERT(ne00 == ne10);
7634
7732
 
7635
7733
  if (nb10 == sizeof(float)) {
7636
7734
  for (int64_t ir = ith; ir < nr; ir += nth) {
@@ -7642,20 +7740,21 @@ static void ggml_compute_forward_mul_f32(
7642
7740
  const int64_t i13 = i03 % ne13;
7643
7741
  const int64_t i12 = i02 % ne12;
7644
7742
  const int64_t i11 = i01 % ne11;
7743
+ const int64_t nr0 = ne00 / ne10;
7645
7744
 
7646
7745
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7647
7746
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7648
7747
  float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
7649
7748
 
7749
+ for (int64_t r = 0 ; r < nr0; ++r) {
7650
7750
  #ifdef GGML_USE_ACCELERATE
7651
- UNUSED(ggml_vec_mul_f32);
7751
+ UNUSED(ggml_vec_mul_f32);
7652
7752
 
7653
- vDSP_vmul( src0_ptr, 1, src1_ptr, 1, dst_ptr, 1, ne00);
7753
+ vDSP_vmul(src0_ptr + r*ne10, 1, src1_ptr, 1, dst_ptr + r*ne10, 1, ne10);
7654
7754
  #else
7655
- ggml_vec_mul_f32(ne00, dst_ptr, src0_ptr, src1_ptr);
7755
+ ggml_vec_mul_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
7656
7756
  #endif
7657
- // }
7658
- // }
7757
+ }
7659
7758
  }
7660
7759
  } else {
7661
7760
  // src1 is not contiguous
@@ -7673,8 +7772,9 @@ static void ggml_compute_forward_mul_f32(
7673
7772
  float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7674
7773
  float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7675
7774
 
7676
- for (int64_t i0 = 0; i0 < ne00; i0++) {
7677
- float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10);
7775
+ for (int64_t i0 = 0; i0 < ne00; ++i0) {
7776
+ const int64_t i10 = i0 % ne10;
7777
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
7678
7778
 
7679
7779
  dst_ptr[i0] = src0_ptr[i0] * (*src1_ptr);
7680
7780
  }
@@ -7708,14 +7808,16 @@ static void ggml_compute_forward_div_f32(
7708
7808
  const struct ggml_tensor * src0,
7709
7809
  const struct ggml_tensor * src1,
7710
7810
  struct ggml_tensor * dst) {
7711
- assert(params->ith == 0);
7712
- assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
7811
+ GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
7713
7812
 
7714
7813
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7715
7814
  return;
7716
7815
  }
7717
7816
 
7718
- const int nr = ggml_nrows(src0);
7817
+ const int ith = params->ith;
7818
+ const int nth = params->nth;
7819
+
7820
+ const int64_t nr = ggml_nrows(src0);
7719
7821
 
7720
7822
  GGML_TENSOR_BINARY_OP_LOCALS
7721
7823
 
@@ -7723,41 +7825,50 @@ static void ggml_compute_forward_div_f32(
7723
7825
  GGML_ASSERT(nb00 == sizeof(float));
7724
7826
 
7725
7827
  if (nb10 == sizeof(float)) {
7726
- for (int ir = 0; ir < nr; ++ir) {
7727
- // src0, src1 and dst are same shape => same indices
7728
- const int i3 = ir/(ne2*ne1);
7729
- const int i2 = (ir - i3*ne2*ne1)/ne1;
7730
- const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
7828
+ for (int64_t ir = ith; ir < nr; ir += nth) {
7829
+ // src0 and dst are same shape => same indices
7830
+ const int64_t i03 = ir/(ne02*ne01);
7831
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
7832
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
7833
+
7834
+ const int64_t i13 = i03 % ne13;
7835
+ const int64_t i12 = i02 % ne12;
7836
+ const int64_t i11 = i01 % ne11;
7837
+ const int64_t nr0 = ne00 / ne10;
7838
+
7839
+ float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7840
+ float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7841
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
7731
7842
 
7843
+ for (int64_t r = 0; r < nr0; ++r) {
7732
7844
  #ifdef GGML_USE_ACCELERATE
7733
- UNUSED(ggml_vec_div_f32);
7845
+ UNUSED(ggml_vec_div_f32);
7734
7846
 
7735
- vDSP_vdiv(
7736
- (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
7737
- (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
7738
- (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1,
7739
- ne0);
7847
+ vDSP_vdiv(src1_ptr, 1, src0_ptr + r*ne10, 1, dst_ptr + r*ne10, 1, ne10);
7740
7848
  #else
7741
- ggml_vec_div_f32(ne0,
7742
- (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ),
7743
- (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01),
7744
- (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11));
7849
+ ggml_vec_div_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
7745
7850
  #endif
7746
- // }
7747
- // }
7851
+ }
7748
7852
  }
7749
7853
  } else {
7750
7854
  // src1 is not contiguous
7751
- for (int ir = 0; ir < nr; ++ir) {
7752
- // src0, src1 and dst are same shape => same indices
7753
- const int i3 = ir/(ne2*ne1);
7754
- const int i2 = (ir - i3*ne2*ne1)/ne1;
7755
- const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
7855
+ for (int64_t ir = ith; ir < nr; ir += nth) {
7856
+ // src0 and dst are same shape => same indices
7857
+ // src1 is broadcastable across src0 and dst in i1, i2, i3
7858
+ const int64_t i03 = ir/(ne02*ne01);
7859
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
7860
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
7756
7861
 
7757
- float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 );
7758
- float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
7759
- for (int i0 = 0; i0 < ne0; i0++) {
7760
- float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10);
7862
+ const int64_t i13 = i03 % ne13;
7863
+ const int64_t i12 = i02 % ne12;
7864
+ const int64_t i11 = i01 % ne11;
7865
+
7866
+ float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
7867
+ float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
7868
+
7869
+ for (int64_t i0 = 0; i0 < ne00; ++i0) {
7870
+ const int64_t i10 = i0 % ne10;
7871
+ float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10);
7761
7872
 
7762
7873
  dst_ptr[i0] = src0_ptr[i0] / (*src1_ptr);
7763
7874
  }
@@ -8203,7 +8314,7 @@ static void ggml_compute_forward_repeat_f16(
8203
8314
  return;
8204
8315
  }
8205
8316
 
8206
- GGML_TENSOR_UNARY_OP_LOCALS;
8317
+ GGML_TENSOR_UNARY_OP_LOCALS
8207
8318
 
8208
8319
  // guaranteed to be an integer due to the check in ggml_can_repeat
8209
8320
  const int nr0 = (int)(ne0/ne00);
@@ -8348,6 +8459,7 @@ static void ggml_compute_forward_concat_f32(
8348
8459
  GGML_ASSERT(src0->nb[0] == sizeof(float));
8349
8460
 
8350
8461
  const int ith = params->ith;
8462
+ const int nth = params->nth;
8351
8463
 
8352
8464
  GGML_TENSOR_BINARY_OP_LOCALS
8353
8465
 
@@ -8357,7 +8469,7 @@ static void ggml_compute_forward_concat_f32(
8357
8469
  GGML_ASSERT(nb10 == sizeof(float));
8358
8470
 
8359
8471
  for (int i3 = 0; i3 < ne3; i3++) {
8360
- for (int i2 = ith; i2 < ne2; i2++) {
8472
+ for (int i2 = ith; i2 < ne2; i2 += nth) {
8361
8473
  if (i2 < ne02) { // src0
8362
8474
  for (int i1 = 0; i1 < ne1; i1++) {
8363
8475
  for (int i0 = 0; i0 < ne0; i0++) {
@@ -9517,6 +9629,8 @@ static void ggml_compute_forward_mul_mat(
9517
9629
  char * wdata = params->wdata;
9518
9630
  const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
9519
9631
 
9632
+ assert(params->wsize >= ne11*ne12*ne13*row_size);
9633
+
9520
9634
  for (int64_t i13 = 0; i13 < ne13; ++i13) {
9521
9635
  for (int64_t i12 = 0; i12 < ne12; ++i12) {
9522
9636
  for (int64_t i11 = 0; i11 < ne11; ++i11) {
@@ -9618,6 +9732,26 @@ static void ggml_compute_forward_mul_mat(
9618
9732
  }
9619
9733
  }
9620
9734
 
9735
+ // ggml_compute_forward_mul_mat_id
9736
+
9737
+ static void ggml_compute_forward_mul_mat_id(
9738
+ const struct ggml_compute_params * params,
9739
+ struct ggml_tensor * dst) {
9740
+
9741
+ const struct ggml_tensor * ids = dst->src[0];
9742
+ const struct ggml_tensor * src1 = dst->src[1];
9743
+
9744
+ const int id = ggml_get_op_params_i32(dst, 0);
9745
+
9746
+ const int a_id = ((int32_t *)ids->data)[id];
9747
+
9748
+ GGML_ASSERT(a_id >= 0 && a_id < ids->ne[0]);
9749
+
9750
+ const struct ggml_tensor * src0 = dst->src[a_id + 2];
9751
+
9752
+ ggml_compute_forward_mul_mat(params, src0, src1, dst);
9753
+ }
9754
+
9621
9755
  // ggml_compute_forward_out_prod
9622
9756
 
9623
9757
  static void ggml_compute_forward_out_prod_f32(
@@ -12021,6 +12155,67 @@ static void ggml_compute_forward_upscale(
12021
12155
  }
12022
12156
  }
12023
12157
 
12158
+ // ggml_compute_forward_argsort
12159
+
12160
+ static void ggml_compute_forward_argsort_f32(
12161
+ const struct ggml_compute_params * params,
12162
+ const struct ggml_tensor * src0,
12163
+ struct ggml_tensor * dst) {
12164
+
12165
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12166
+ return;
12167
+ }
12168
+
12169
+ GGML_TENSOR_UNARY_OP_LOCALS
12170
+
12171
+ GGML_ASSERT(nb0 == sizeof(float));
12172
+
12173
+ const int ith = params->ith;
12174
+ const int nth = params->nth;
12175
+
12176
+ const int64_t nr = ggml_nrows(src0);
12177
+
12178
+ enum ggml_sort_order order = (enum ggml_sort_order) ggml_get_op_params_i32(dst, 0);
12179
+
12180
+ for (int64_t i = ith; i < nr; i += nth) {
12181
+ int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
12182
+ const float * src_data = (float *)((char *) src0->data + i*nb01);
12183
+
12184
+ for (int64_t j = 0; j < ne0; j++) {
12185
+ dst_data[j] = j;
12186
+ }
12187
+
12188
+ // C doesn't have a functional sort, so we do a bubble sort instead
12189
+ for (int64_t j = 0; j < ne0; j++) {
12190
+ for (int64_t k = j + 1; k < ne0; k++) {
12191
+ if ((order == GGML_SORT_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
12192
+ (order == GGML_SORT_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
12193
+ int32_t tmp = dst_data[j];
12194
+ dst_data[j] = dst_data[k];
12195
+ dst_data[k] = tmp;
12196
+ }
12197
+ }
12198
+ }
12199
+ }
12200
+ }
12201
+
12202
+ static void ggml_compute_forward_argsort(
12203
+ const struct ggml_compute_params * params,
12204
+ const struct ggml_tensor * src0,
12205
+ struct ggml_tensor * dst) {
12206
+
12207
+ switch (src0->type) {
12208
+ case GGML_TYPE_F32:
12209
+ {
12210
+ ggml_compute_forward_argsort_f32(params, src0, dst);
12211
+ } break;
12212
+ default:
12213
+ {
12214
+ GGML_ASSERT(false);
12215
+ } break;
12216
+ }
12217
+ }
12218
+
12024
12219
  // ggml_compute_forward_flash_attn
12025
12220
 
12026
12221
  static void ggml_compute_forward_flash_attn_f32(
@@ -13844,6 +14039,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
13844
14039
  {
13845
14040
  ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor);
13846
14041
  } break;
14042
+ case GGML_OP_MUL_MAT_ID:
14043
+ {
14044
+ ggml_compute_forward_mul_mat_id(params, tensor);
14045
+ } break;
13847
14046
  case GGML_OP_OUT_PROD:
13848
14047
  {
13849
14048
  ggml_compute_forward_out_prod(params, tensor->src[0], tensor->src[1], tensor);
@@ -13948,6 +14147,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
13948
14147
  {
13949
14148
  ggml_compute_forward_upscale(params, tensor->src[0], tensor);
13950
14149
  } break;
14150
+ case GGML_OP_ARGSORT:
14151
+ {
14152
+ ggml_compute_forward_argsort(params, tensor->src[0], tensor);
14153
+ } break;
13951
14154
  case GGML_OP_FLASH_ATTN:
13952
14155
  {
13953
14156
  const int32_t t = ggml_get_op_params_i32(tensor, 0);
@@ -14598,6 +14801,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
14598
14801
  zero_table);
14599
14802
  }
14600
14803
  } break;
14804
+ case GGML_OP_MUL_MAT_ID:
14805
+ {
14806
+ GGML_ASSERT(false); // TODO: not implemented
14807
+ } break;
14601
14808
  case GGML_OP_OUT_PROD:
14602
14809
  {
14603
14810
  GGML_ASSERT(false); // TODO: not implemented
@@ -14936,6 +15143,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
14936
15143
  {
14937
15144
  GGML_ASSERT(false); // TODO: not implemented
14938
15145
  } break;
15146
+ case GGML_OP_ARGSORT:
15147
+ {
15148
+ GGML_ASSERT(false); // TODO: not implemented
15149
+ } break;
14939
15150
  case GGML_OP_FLASH_ATTN:
14940
15151
  {
14941
15152
  struct ggml_tensor * flash_grad = NULL;
@@ -15296,12 +15507,8 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
15296
15507
  return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
15297
15508
  }
15298
15509
 
15299
- struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgraph * cgraph0, int i0, int i1) {
15300
- const size_t obj_size = sizeof(struct ggml_cgraph);
15301
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
15302
- struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
15303
-
15304
- *cgraph = (struct ggml_cgraph) {
15510
+ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
15511
+ struct ggml_cgraph cgraph = {
15305
15512
  /*.size =*/ 0,
15306
15513
  /*.n_nodes =*/ i1 - i0,
15307
15514
  /*.n_leafs =*/ 0,
@@ -15536,7 +15743,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15536
15743
  n_tasks = n_threads;
15537
15744
  } break;
15538
15745
  case GGML_OP_SUB:
15539
- case GGML_OP_DIV:
15540
15746
  case GGML_OP_SQR:
15541
15747
  case GGML_OP_SQRT:
15542
15748
  case GGML_OP_LOG:
@@ -15569,10 +15775,13 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15569
15775
  {
15570
15776
  n_tasks = n_threads;
15571
15777
  } break;
15778
+ default:
15779
+ GGML_ASSERT(false);
15572
15780
  }
15573
15781
  break;
15574
15782
  case GGML_OP_SILU_BACK:
15575
15783
  case GGML_OP_MUL:
15784
+ case GGML_OP_DIV:
15576
15785
  case GGML_OP_NORM:
15577
15786
  case GGML_OP_RMS_NORM:
15578
15787
  case GGML_OP_RMS_NORM_BACK:
@@ -15610,6 +15819,11 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15610
15819
  }
15611
15820
  #endif
15612
15821
  } break;
15822
+ case GGML_OP_MUL_MAT_ID:
15823
+ {
15824
+ // FIXME: blas
15825
+ n_tasks = n_threads;
15826
+ } break;
15613
15827
  case GGML_OP_OUT_PROD:
15614
15828
  {
15615
15829
  n_tasks = n_threads;
@@ -15629,7 +15843,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15629
15843
  } break;
15630
15844
  case GGML_OP_DIAG_MASK_ZERO:
15631
15845
  case GGML_OP_DIAG_MASK_INF:
15632
- case GGML_OP_SOFT_MAX:
15633
15846
  case GGML_OP_SOFT_MAX_BACK:
15634
15847
  case GGML_OP_ROPE:
15635
15848
  case GGML_OP_ROPE_BACK:
@@ -15645,6 +15858,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15645
15858
  {
15646
15859
  n_tasks = 1; //TODO
15647
15860
  } break;
15861
+ case GGML_OP_SOFT_MAX:
15862
+ {
15863
+ n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
15864
+ } break;
15648
15865
  case GGML_OP_CONV_TRANSPOSE_1D:
15649
15866
  {
15650
15867
  n_tasks = n_threads;
@@ -15666,6 +15883,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15666
15883
  {
15667
15884
  n_tasks = n_threads;
15668
15885
  } break;
15886
+ case GGML_OP_ARGSORT:
15887
+ {
15888
+ n_tasks = n_threads;
15889
+ } break;
15669
15890
  case GGML_OP_FLASH_ATTN:
15670
15891
  {
15671
15892
  n_tasks = n_threads;
@@ -15728,6 +15949,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
15728
15949
  {
15729
15950
  n_tasks = 1;
15730
15951
  } break;
15952
+ case GGML_OP_COUNT:
15953
+ {
15954
+ GGML_ASSERT(false);
15955
+ } break;
15731
15956
  default:
15732
15957
  {
15733
15958
  fprintf(stderr, "%s: op not implemented: ", __func__);
@@ -15876,18 +16101,16 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15876
16101
 
15877
16102
  // thread scheduling for the different operations + work buffer size estimation
15878
16103
  for (int i = 0; i < cgraph->n_nodes; i++) {
15879
- int n_tasks = 1;
15880
-
15881
16104
  struct ggml_tensor * node = cgraph->nodes[i];
15882
16105
 
16106
+ const int n_tasks = ggml_get_n_tasks(node, n_threads);
16107
+
15883
16108
  size_t cur = 0;
15884
16109
 
15885
16110
  switch (node->op) {
15886
16111
  case GGML_OP_CPY:
15887
16112
  case GGML_OP_DUP:
15888
16113
  {
15889
- n_tasks = n_threads;
15890
-
15891
16114
  if (ggml_is_quantized(node->type)) {
15892
16115
  cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
15893
16116
  }
@@ -15895,16 +16118,12 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15895
16118
  case GGML_OP_ADD:
15896
16119
  case GGML_OP_ADD1:
15897
16120
  {
15898
- n_tasks = n_threads;
15899
-
15900
16121
  if (ggml_is_quantized(node->src[0]->type)) {
15901
16122
  cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
15902
16123
  }
15903
16124
  } break;
15904
16125
  case GGML_OP_ACC:
15905
16126
  {
15906
- n_tasks = n_threads;
15907
-
15908
16127
  if (ggml_is_quantized(node->src[0]->type)) {
15909
16128
  cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks;
15910
16129
  }
@@ -15930,18 +16149,31 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15930
16149
  cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
15931
16150
  }
15932
16151
  } break;
16152
+ case GGML_OP_MUL_MAT_ID:
16153
+ {
16154
+ const struct ggml_tensor * a = node->src[2];
16155
+ const struct ggml_tensor * b = node->src[1];
16156
+ const enum ggml_type vec_dot_type = type_traits[a->type].vec_dot_type;
16157
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
16158
+ if (ggml_compute_forward_mul_mat_use_blas(a, b, node)) {
16159
+ if (a->type != GGML_TYPE_F32) {
16160
+ // here we need memory just for single 2D matrix from src0
16161
+ cur = ggml_type_size(GGML_TYPE_F32)*(a->ne[0]*a->ne[1]);
16162
+ }
16163
+ } else
16164
+ #endif
16165
+ if (b->type != vec_dot_type) {
16166
+ cur = ggml_type_size(vec_dot_type)*ggml_nelements(b)/ggml_blck_size(vec_dot_type);
16167
+ }
16168
+ } break;
15933
16169
  case GGML_OP_OUT_PROD:
15934
16170
  {
15935
- n_tasks = n_threads;
15936
-
15937
16171
  if (ggml_is_quantized(node->src[0]->type)) {
15938
16172
  cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
15939
16173
  }
15940
16174
  } break;
15941
16175
  case GGML_OP_SOFT_MAX:
15942
16176
  {
15943
- n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
15944
-
15945
16177
  cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
15946
16178
  } break;
15947
16179
  case GGML_OP_CONV_TRANSPOSE_1D:
@@ -15969,10 +16201,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15969
16201
  GGML_ASSERT(false);
15970
16202
  }
15971
16203
  } break;
15972
- case GGML_OP_IM2COL:
15973
- {
15974
- n_tasks = n_threads;
15975
- } break;
15976
16204
  case GGML_OP_CONV_TRANSPOSE_2D:
15977
16205
  {
15978
16206
  const int64_t ne00 = node->src[0]->ne[0]; // W
@@ -15989,8 +16217,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
15989
16217
  } break;
15990
16218
  case GGML_OP_FLASH_ATTN:
15991
16219
  {
15992
- n_tasks = n_threads;
15993
-
15994
16220
  const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
15995
16221
 
15996
16222
  if (node->src[1]->type == GGML_TYPE_F32) {
@@ -16003,8 +16229,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16003
16229
  } break;
16004
16230
  case GGML_OP_FLASH_FF:
16005
16231
  {
16006
- n_tasks = n_threads;
16007
-
16008
16232
  if (node->src[1]->type == GGML_TYPE_F32) {
16009
16233
  cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
16010
16234
  cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
@@ -16015,8 +16239,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16015
16239
  } break;
16016
16240
  case GGML_OP_FLASH_ATTN_BACK:
16017
16241
  {
16018
- n_tasks = n_threads;
16019
-
16020
16242
  const int64_t D = node->src[0]->ne[0];
16021
16243
  const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
16022
16244
  const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back
@@ -16031,8 +16253,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16031
16253
 
16032
16254
  case GGML_OP_CROSS_ENTROPY_LOSS:
16033
16255
  {
16034
- n_tasks = n_threads;
16035
-
16036
16256
  cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks);
16037
16257
  } break;
16038
16258
  case GGML_OP_COUNT:
@@ -17819,8 +18039,8 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
17819
18039
  memcpy(&qh, &y[i].qh, sizeof(qh));
17820
18040
 
17821
18041
  for (int j = 0; j < QK5_0; j += 2) {
17822
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
17823
- const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));
18042
+ const uint8_t vh0 = ((qh & (1u << (j/2 + 0 ))) >> (j/2 + 0 )) << 4;
18043
+ const uint8_t vh1 = ((qh & (1u << (j/2 + 16))) >> (j/2 + 12));
17824
18044
 
17825
18045
  // cast to 16 bins
17826
18046
  const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
@@ -17849,8 +18069,8 @@ size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t *
17849
18069
  memcpy(&qh, &y[i].qh, sizeof(qh));
17850
18070
 
17851
18071
  for (int j = 0; j < QK5_1; j += 2) {
17852
- const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
17853
- const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12));
18072
+ const uint8_t vh0 = ((qh & (1u << (j/2 + 0 ))) >> (j/2 + 0 )) << 4;
18073
+ const uint8_t vh1 = ((qh & (1u << (j/2 + 16))) >> (j/2 + 12));
17854
18074
 
17855
18075
  // cast to 16 bins
17856
18076
  const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
@@ -18040,6 +18260,7 @@ struct gguf_kv {
18040
18260
 
18041
18261
  struct gguf_header {
18042
18262
  char magic[4];
18263
+
18043
18264
  uint32_t version;
18044
18265
  uint64_t n_tensors; // GGUFv2
18045
18266
  uint64_t n_kv; // GGUFv2
@@ -18129,7 +18350,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18129
18350
 
18130
18351
  for (uint32_t i = 0; i < sizeof(magic); i++) {
18131
18352
  if (magic[i] != GGUF_MAGIC[i]) {
18132
- fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
18353
+ fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
18133
18354
  fclose(file);
18134
18355
  return NULL;
18135
18356
  }
@@ -18144,7 +18365,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18144
18365
  {
18145
18366
  strncpy(ctx->header.magic, magic, 4);
18146
18367
 
18147
-
18148
18368
  ctx->kv = NULL;
18149
18369
  ctx->infos = NULL;
18150
18370
  ctx->data = NULL;