llama_cpp 0.3.5 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -195,8 +195,8 @@ typedef void * thread_ret_t;
195
195
  #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
196
196
  #define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
197
197
  #else
198
- inline static void* ggml_aligned_malloc(size_t size) {
199
- void* aligned_memory = NULL;
198
+ inline static void * ggml_aligned_malloc(size_t size) {
199
+ void * aligned_memory = NULL;
200
200
  #ifdef GGML_USE_METAL
201
201
  int result = posix_memalign(&aligned_memory, getpagesize(), size);
202
202
  #else
@@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
3811
3811
  "CROSS_ENTROPY_LOSS_BACK",
3812
3812
  };
3813
3813
 
3814
- static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
3814
+ static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
3815
3815
 
3816
3816
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
3817
3817
  "none",
@@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
3883
3883
  "cross_entropy_loss_back(x,y)",
3884
3884
  };
3885
3885
 
3886
- static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
3886
+ static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
3887
3887
 
3888
3888
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
3889
3889
 
@@ -4110,7 +4110,7 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
4110
4110
  //
4111
4111
  // is enough, but just in case, adding the second part
4112
4112
 
4113
- return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
4113
+ return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
4114
4114
  }
4115
4115
 
4116
4116
  size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
4253
4253
  tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
4254
4254
  }
4255
4255
 
4256
- static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
4256
+ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
4257
4257
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
4258
4258
 
4259
4259
  return
@@ -4557,10 +4557,12 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
4557
4557
 
4558
4558
  static struct ggml_tensor * ggml_new_tensor_impl(
4559
4559
  struct ggml_context * ctx,
4560
- enum ggml_type type,
4561
- int n_dims,
4562
- const int64_t* ne,
4563
- void* data) {
4560
+ enum ggml_type type,
4561
+ int n_dims,
4562
+ const int64_t * ne,
4563
+ void * data) {
4564
+
4565
+ assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
4564
4566
 
4565
4567
  size_t data_size = 0;
4566
4568
 
@@ -4600,7 +4602,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
4600
4602
  /*.ne =*/ { 1, 1, 1, 1 },
4601
4603
  /*.nb =*/ { 0, 0, 0, 0 },
4602
4604
  /*.op =*/ GGML_OP_NONE,
4603
- /*.op_params =*/ {0},
4605
+ /*.op_params =*/ { 0 },
4604
4606
  /*.is_param =*/ false,
4605
4607
  /*.grad =*/ NULL,
4606
4608
  /*.src =*/ { NULL },
@@ -4632,6 +4634,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
4632
4634
  }
4633
4635
 
4634
4636
  static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
4637
+ GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
4635
4638
  assert(params_size <= GGML_MAX_OP_PARAMS);
4636
4639
  memcpy(tensor->op_params, params, params_size);
4637
4640
  }
@@ -4648,22 +4651,22 @@ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int3
4648
4651
 
4649
4652
  struct ggml_tensor * ggml_new_tensor(
4650
4653
  struct ggml_context * ctx,
4651
- enum ggml_type type,
4652
- int n_dims,
4653
- const int64_t * ne) {
4654
+ enum ggml_type type,
4655
+ int n_dims,
4656
+ const int64_t * ne) {
4654
4657
  return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
4655
4658
  }
4656
4659
 
4657
4660
  struct ggml_tensor * ggml_new_tensor_1d(
4658
4661
  struct ggml_context * ctx,
4659
- enum ggml_type type,
4662
+ enum ggml_type type,
4660
4663
  int64_t ne0) {
4661
4664
  return ggml_new_tensor(ctx, type, 1, &ne0);
4662
4665
  }
4663
4666
 
4664
4667
  struct ggml_tensor * ggml_new_tensor_2d(
4665
4668
  struct ggml_context * ctx,
4666
- enum ggml_type type,
4669
+ enum ggml_type type,
4667
4670
  int64_t ne0,
4668
4671
  int64_t ne1) {
4669
4672
  const int64_t ne[2] = { ne0, ne1 };
@@ -4672,7 +4675,7 @@ struct ggml_tensor * ggml_new_tensor_2d(
4672
4675
 
4673
4676
  struct ggml_tensor * ggml_new_tensor_3d(
4674
4677
  struct ggml_context * ctx,
4675
- enum ggml_type type,
4678
+ enum ggml_type type,
4676
4679
  int64_t ne0,
4677
4680
  int64_t ne1,
4678
4681
  int64_t ne2) {
@@ -6238,6 +6241,27 @@ struct ggml_tensor * ggml_reshape_4d(
6238
6241
 
6239
6242
  // ggml_view_1d
6240
6243
 
6244
+ static struct ggml_tensor * ggml_view_tensor_offset(
6245
+ struct ggml_context * ctx,
6246
+ struct ggml_tensor * a,
6247
+ int n_dims,
6248
+ const int64_t * ne,
6249
+ size_t offset) {
6250
+ // don't calculate an offset from an unallocated tensor
6251
+ void * data = NULL;
6252
+ if (a->data != NULL) {
6253
+ data = (char *) a->data + offset;
6254
+ }
6255
+
6256
+ struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
6257
+
6258
+ ggml_format_name(result, "%s (view)", a->name);
6259
+
6260
+ ggml_set_op_params(result, &offset, sizeof(offset));
6261
+
6262
+ return result;
6263
+ }
6264
+
6241
6265
  struct ggml_tensor * ggml_view_1d(
6242
6266
  struct ggml_context * ctx,
6243
6267
  struct ggml_tensor * a,
@@ -6250,10 +6274,7 @@ struct ggml_tensor * ggml_view_1d(
6250
6274
  is_node = true;
6251
6275
  }
6252
6276
 
6253
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
6254
- ggml_format_name(result, "%s (view)", a->name);
6255
-
6256
- ggml_set_op_params(result, &offset, sizeof(offset));
6277
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
6257
6278
 
6258
6279
  result->op = GGML_OP_VIEW;
6259
6280
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6280,10 +6301,7 @@ struct ggml_tensor * ggml_view_2d(
6280
6301
 
6281
6302
  const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
6282
6303
 
6283
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
6284
- ggml_format_name(result, "%s (view)", a->name);
6285
-
6286
- ggml_set_op_params(result, &offset, sizeof(offset));
6304
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
6287
6305
 
6288
6306
  result->nb[1] = nb1;
6289
6307
  result->nb[2] = result->nb[1]*ne1;
@@ -6316,10 +6334,7 @@ struct ggml_tensor * ggml_view_3d(
6316
6334
 
6317
6335
  const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
6318
6336
 
6319
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
6320
- ggml_format_name(result, "%s (view)", a->name);
6321
-
6322
- ggml_set_op_params(result, &offset, sizeof(offset));
6337
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
6323
6338
 
6324
6339
  result->nb[1] = nb1;
6325
6340
  result->nb[2] = nb2;
@@ -6354,10 +6369,7 @@ struct ggml_tensor * ggml_view_4d(
6354
6369
 
6355
6370
  const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
6356
6371
 
6357
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
6358
- ggml_format_name(result, "%s (view)", a->name);
6359
-
6360
- ggml_set_op_params(result, &offset, sizeof(offset));
6372
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
6361
6373
 
6362
6374
  result->nb[1] = nb1;
6363
6375
  result->nb[2] = nb2;
@@ -6428,7 +6440,7 @@ struct ggml_tensor * ggml_permute(
6428
6440
  result->src[0] = a;
6429
6441
 
6430
6442
  int32_t params[] = { axis0, axis1, axis2, axis3 };
6431
- ggml_set_op_params(result, &params, sizeof(params));
6443
+ ggml_set_op_params(result, params, sizeof(params));
6432
6444
 
6433
6445
  return result;
6434
6446
  }
@@ -6554,7 +6566,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
6554
6566
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6555
6567
 
6556
6568
  int32_t params[] = { n_past, inplace ? 1 : 0 };
6557
- ggml_set_op_params(result, &params, sizeof(params));
6569
+ ggml_set_op_params(result, params, sizeof(params));
6558
6570
 
6559
6571
  result->op = GGML_OP_DIAG_MASK_INF;
6560
6572
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6594,7 +6606,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
6594
6606
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6595
6607
 
6596
6608
  int32_t params[] = { n_past, inplace ? 1 : 0 };
6597
- ggml_set_op_params(result, &params, sizeof(params));
6609
+ ggml_set_op_params(result, params, sizeof(params));
6598
6610
 
6599
6611
  result->op = GGML_OP_DIAG_MASK_ZERO;
6600
6612
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6710,9 +6722,9 @@ static struct ggml_tensor * ggml_rope_impl(
6710
6722
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6711
6723
 
6712
6724
  int32_t params[6] = { n_past, n_dims, mode, n_ctx };
6713
- memcpy(params + 4, &freq_base, sizeof(float));
6725
+ memcpy(params + 4, &freq_base, sizeof(float));
6714
6726
  memcpy(params + 5, &freq_scale, sizeof(float));
6715
- ggml_set_op_params(result, &params, sizeof(params));
6727
+ ggml_set_op_params(result, params, sizeof(params));
6716
6728
 
6717
6729
  result->op = GGML_OP_ROPE;
6718
6730
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6741,6 +6753,18 @@ struct ggml_tensor * ggml_rope_inplace(
6741
6753
  return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
6742
6754
  }
6743
6755
 
6756
+ struct ggml_tensor * ggml_rope_custom(
6757
+ struct ggml_context * ctx,
6758
+ struct ggml_tensor * a,
6759
+ int n_past,
6760
+ int n_dims,
6761
+ int mode,
6762
+ int n_ctx,
6763
+ float freq_base,
6764
+ float freq_scale) {
6765
+ return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, false);
6766
+ }
6767
+
6744
6768
  struct ggml_tensor * ggml_rope_custom_inplace(
6745
6769
  struct ggml_context * ctx,
6746
6770
  struct ggml_tensor * a,
@@ -6774,7 +6798,7 @@ struct ggml_tensor * ggml_rope_back(
6774
6798
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
6775
6799
 
6776
6800
  int32_t params[] = { n_past, n_dims, mode, n_ctx };
6777
- ggml_set_op_params(result, &params, sizeof(params));
6801
+ ggml_set_op_params(result, params, sizeof(params));
6778
6802
 
6779
6803
  result->op = GGML_OP_ROPE_BACK;
6780
6804
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6805,7 +6829,7 @@ struct ggml_tensor * ggml_alibi(
6805
6829
 
6806
6830
  int32_t op_params[3] = { n_past, n_head };
6807
6831
  memcpy(op_params + 2, &bias_max, sizeof(float));
6808
- ggml_set_op_params(result, &op_params, sizeof(op_params));
6832
+ ggml_set_op_params(result, op_params, sizeof(op_params));
6809
6833
 
6810
6834
  result->op = GGML_OP_ALIBI;
6811
6835
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6832,7 +6856,7 @@ struct ggml_tensor * ggml_clamp(
6832
6856
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6833
6857
 
6834
6858
  float params[] = { min, max };
6835
- ggml_set_op_params(result, &params, sizeof(params));
6859
+ ggml_set_op_params(result, params, sizeof(params));
6836
6860
 
6837
6861
  result->op = GGML_OP_CLAMP;
6838
6862
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6867,10 +6891,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
6867
6891
  ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
6868
6892
  a->ne[2], 1, 1,
6869
6893
  };
6870
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6894
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6871
6895
 
6872
6896
  int32_t params[] = { s0, p0, d0 };
6873
- ggml_set_op_params(result, &params, sizeof(params));
6897
+ ggml_set_op_params(result, params, sizeof(params));
6874
6898
 
6875
6899
  result->op = GGML_OP_CONV_1D;
6876
6900
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6882,10 +6906,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
6882
6906
 
6883
6907
  // ggml_conv_2d
6884
6908
 
6885
- struct ggml_tensor* ggml_conv_2d(
6886
- struct ggml_context* ctx,
6887
- struct ggml_tensor * a,
6888
- struct ggml_tensor * b,
6909
+ struct ggml_tensor * ggml_conv_2d(
6910
+ struct ggml_context * ctx,
6911
+ struct ggml_tensor * a,
6912
+ struct ggml_tensor * b,
6889
6913
  int s0,
6890
6914
  int s1,
6891
6915
  int p0,
@@ -6906,10 +6930,10 @@ struct ggml_tensor* ggml_conv_2d(
6906
6930
  ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
6907
6931
  a->ne[3], b->ne[3],
6908
6932
  };
6909
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6933
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6910
6934
 
6911
6935
  int32_t params[] = { s0, s1, p0, p1, d0, d1 };
6912
- ggml_set_op_params(result, &params, sizeof(params));
6936
+ ggml_set_op_params(result, params, sizeof(params));
6913
6937
 
6914
6938
  result->op = GGML_OP_CONV_2D;
6915
6939
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6922,7 +6946,7 @@ struct ggml_tensor* ggml_conv_2d(
6922
6946
 
6923
6947
  // ggml_conv_1d_ph
6924
6948
 
6925
- struct ggml_tensor* ggml_conv_1d_ph(
6949
+ struct ggml_tensor * ggml_conv_1d_ph(
6926
6950
  struct ggml_context * ctx,
6927
6951
  struct ggml_tensor * a,
6928
6952
  struct ggml_tensor * b,
@@ -6940,7 +6964,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
6940
6964
 
6941
6965
  // ggml_pool_1d
6942
6966
 
6943
- struct ggml_tensor* ggml_pool_1d(
6967
+ struct ggml_tensor * ggml_pool_1d(
6944
6968
  struct ggml_context * ctx,
6945
6969
  struct ggml_tensor * a,
6946
6970
  enum ggml_op_pool op,
@@ -6959,10 +6983,10 @@ struct ggml_tensor* ggml_pool_1d(
6959
6983
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
6960
6984
  a->ne[1],
6961
6985
  };
6962
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6986
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6963
6987
 
6964
6988
  int32_t params[] = { op, k0, s0, p0 };
6965
- ggml_set_op_params(result, &params, sizeof(params));
6989
+ ggml_set_op_params(result, params, sizeof(params));
6966
6990
 
6967
6991
  result->op = GGML_OP_POOL_1D;
6968
6992
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6973,7 +6997,7 @@ struct ggml_tensor* ggml_pool_1d(
6973
6997
 
6974
6998
  // ggml_pool_2d
6975
6999
 
6976
- struct ggml_tensor* ggml_pool_2d(
7000
+ struct ggml_tensor * ggml_pool_2d(
6977
7001
  struct ggml_context * ctx,
6978
7002
  struct ggml_tensor * a,
6979
7003
  enum ggml_op_pool op,
@@ -6996,10 +7020,10 @@ struct ggml_tensor* ggml_pool_2d(
6996
7020
  ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
6997
7021
  a->ne[2],
6998
7022
  };
6999
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7023
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7000
7024
 
7001
7025
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
7002
- ggml_set_op_params(result, &params, sizeof(params));
7026
+ ggml_set_op_params(result, params, sizeof(params));
7003
7027
 
7004
7028
  result->op = GGML_OP_POOL_2D;
7005
7029
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7167,7 +7191,7 @@ struct ggml_tensor * ggml_win_part(
7167
7191
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7168
7192
 
7169
7193
  int32_t params[] = { npx, npy, w };
7170
- ggml_set_op_params(result, &params, sizeof(params));
7194
+ ggml_set_op_params(result, params, sizeof(params));
7171
7195
 
7172
7196
  result->op = GGML_OP_WIN_PART;
7173
7197
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7197,7 +7221,7 @@ struct ggml_tensor * ggml_win_unpart(
7197
7221
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7198
7222
 
7199
7223
  int32_t params[] = { w };
7200
- ggml_set_op_params(result, &params, sizeof(params));
7224
+ ggml_set_op_params(result, params, sizeof(params));
7201
7225
 
7202
7226
  result->op = GGML_OP_WIN_UNPART;
7203
7227
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7326,7 +7350,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
7326
7350
  return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
7327
7351
  }
7328
7352
 
7329
- // ggml_map_custom1
7353
+ // ggml_map_custom1_f32
7330
7354
 
7331
7355
  static struct ggml_tensor * ggml_map_custom1_impl_f32(
7332
7356
  struct ggml_context * ctx,
@@ -7343,7 +7367,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
7343
7367
 
7344
7368
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7345
7369
 
7346
- result->op = GGML_OP_MAP_CUSTOM1;
7370
+ result->op = GGML_OP_MAP_CUSTOM1_F32;
7347
7371
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7348
7372
  result->src[0] = a;
7349
7373
 
@@ -7364,7 +7388,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
7364
7388
  return ggml_map_custom1_impl_f32(ctx, a, fun, true);
7365
7389
  }
7366
7390
 
7367
- // ggml_map_custom2
7391
+ // ggml_map_custom2_f32
7368
7392
 
7369
7393
  static struct ggml_tensor * ggml_map_custom2_impl_f32(
7370
7394
  struct ggml_context * ctx,
@@ -7382,7 +7406,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
7382
7406
 
7383
7407
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7384
7408
 
7385
- result->op = GGML_OP_MAP_CUSTOM2;
7409
+ result->op = GGML_OP_MAP_CUSTOM2_F32;
7386
7410
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7387
7411
  result->src[0] = a;
7388
7412
  result->src[1] = b;
@@ -7406,7 +7430,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
7406
7430
  return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
7407
7431
  }
7408
7432
 
7409
- // ggml_map_custom3
7433
+ // ggml_map_custom3_f32
7410
7434
 
7411
7435
  static struct ggml_tensor * ggml_map_custom3_impl_f32(
7412
7436
  struct ggml_context * ctx,
@@ -7425,7 +7449,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
7425
7449
 
7426
7450
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7427
7451
 
7428
- result->op = GGML_OP_MAP_CUSTOM3;
7452
+ result->op = GGML_OP_MAP_CUSTOM3_F32;
7429
7453
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7430
7454
  result->src[0] = a;
7431
7455
  result->src[1] = b;
@@ -7452,6 +7476,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
7452
7476
  return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
7453
7477
  }
7454
7478
 
7479
+ // ggml_map_custom1
7480
+ struct ggml_map_custom1_op_params {
7481
+ ggml_custom1_op_t fun;
7482
+ int n_tasks;
7483
+ void * userdata;
7484
+ };
7485
+
7486
+ static struct ggml_tensor * ggml_map_custom1_impl(
7487
+ struct ggml_context * ctx,
7488
+ struct ggml_tensor * a,
7489
+ const ggml_custom1_op_t fun,
7490
+ int n_tasks,
7491
+ void * userdata,
7492
+ bool inplace) {
7493
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7494
+
7495
+ bool is_node = false;
7496
+
7497
+ if (!inplace && a->grad) {
7498
+ is_node = true;
7499
+ }
7500
+
7501
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7502
+
7503
+ struct ggml_map_custom1_op_params params = {
7504
+ /*.fun =*/ fun,
7505
+ /*.n_tasks =*/ n_tasks,
7506
+ /*.userdata =*/ userdata
7507
+ };
7508
+ ggml_set_op_params(result, (const void *) &params, sizeof(params));
7509
+
7510
+ result->op = GGML_OP_MAP_CUSTOM1;
7511
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7512
+ result->src[0] = a;
7513
+
7514
+ return result;
7515
+ }
7516
+
7517
+ struct ggml_tensor * ggml_map_custom1(
7518
+ struct ggml_context * ctx,
7519
+ struct ggml_tensor * a,
7520
+ const ggml_custom1_op_t fun,
7521
+ int n_tasks,
7522
+ void * userdata) {
7523
+ return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
7524
+ }
7525
+
7526
+ struct ggml_tensor * ggml_map_custom1_inplace(
7527
+ struct ggml_context * ctx,
7528
+ struct ggml_tensor * a,
7529
+ const ggml_custom1_op_t fun,
7530
+ int n_tasks,
7531
+ void * userdata) {
7532
+ return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
7533
+ }
7534
+
7535
+ // ggml_map_custom2
7536
+
7537
+ struct ggml_map_custom2_op_params {
7538
+ ggml_custom2_op_t fun;
7539
+ int n_tasks;
7540
+ void * userdata;
7541
+ };
7542
+
7543
+ static struct ggml_tensor * ggml_map_custom2_impl(
7544
+ struct ggml_context * ctx,
7545
+ struct ggml_tensor * a,
7546
+ struct ggml_tensor * b,
7547
+ const ggml_custom2_op_t fun,
7548
+ int n_tasks,
7549
+ void * userdata,
7550
+ bool inplace) {
7551
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7552
+
7553
+ bool is_node = false;
7554
+
7555
+ if (!inplace && (a->grad || b->grad)) {
7556
+ is_node = true;
7557
+ }
7558
+
7559
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7560
+
7561
+ struct ggml_map_custom2_op_params params = {
7562
+ /*.fun =*/ fun,
7563
+ /*.n_tasks =*/ n_tasks,
7564
+ /*.userdata =*/ userdata
7565
+ };
7566
+ ggml_set_op_params(result, (const void *) &params, sizeof(params));
7567
+
7568
+ result->op = GGML_OP_MAP_CUSTOM2;
7569
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7570
+ result->src[0] = a;
7571
+ result->src[1] = b;
7572
+
7573
+ return result;
7574
+ }
7575
+
7576
+ struct ggml_tensor * ggml_map_custom2(
7577
+ struct ggml_context * ctx,
7578
+ struct ggml_tensor * a,
7579
+ struct ggml_tensor * b,
7580
+ const ggml_custom2_op_t fun,
7581
+ int n_tasks,
7582
+ void * userdata) {
7583
+ return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
7584
+ }
7585
+
7586
+ struct ggml_tensor * ggml_map_custom2_inplace(
7587
+ struct ggml_context * ctx,
7588
+ struct ggml_tensor * a,
7589
+ struct ggml_tensor * b,
7590
+ const ggml_custom2_op_t fun,
7591
+ int n_tasks,
7592
+ void * userdata) {
7593
+ return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
7594
+ }
7595
+
7596
+ // ggml_map_custom3
7597
+
7598
+ struct ggml_map_custom3_op_params {
7599
+ ggml_custom3_op_t fun;
7600
+ int n_tasks;
7601
+ void * userdata;
7602
+ };
7603
+
7604
+ static struct ggml_tensor * ggml_map_custom3_impl(
7605
+ struct ggml_context * ctx,
7606
+ struct ggml_tensor * a,
7607
+ struct ggml_tensor * b,
7608
+ struct ggml_tensor * c,
7609
+ const ggml_custom3_op_t fun,
7610
+ int n_tasks,
7611
+ void * userdata,
7612
+ bool inplace) {
7613
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7614
+
7615
+ bool is_node = false;
7616
+
7617
+ if (!inplace && (a->grad || b->grad || c->grad)) {
7618
+ is_node = true;
7619
+ }
7620
+
7621
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7622
+
7623
+ struct ggml_map_custom3_op_params params = {
7624
+ /*.fun =*/ fun,
7625
+ /*.n_tasks =*/ n_tasks,
7626
+ /*.userdata =*/ userdata
7627
+ };
7628
+ ggml_set_op_params(result, (const void *) &params, sizeof(params));
7629
+
7630
+ result->op = GGML_OP_MAP_CUSTOM3;
7631
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7632
+ result->src[0] = a;
7633
+ result->src[1] = b;
7634
+ result->src[2] = c;
7635
+
7636
+ return result;
7637
+ }
7638
+
7639
+ struct ggml_tensor * ggml_map_custom3(
7640
+ struct ggml_context * ctx,
7641
+ struct ggml_tensor * a,
7642
+ struct ggml_tensor * b,
7643
+ struct ggml_tensor * c,
7644
+ const ggml_custom3_op_t fun,
7645
+ int n_tasks,
7646
+ void * userdata) {
7647
+ return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
7648
+ }
7649
+
7650
+ struct ggml_tensor * ggml_map_custom3_inplace(
7651
+ struct ggml_context * ctx,
7652
+ struct ggml_tensor * a,
7653
+ struct ggml_tensor * b,
7654
+ struct ggml_tensor * c,
7655
+ const ggml_custom3_op_t fun,
7656
+ int n_tasks,
7657
+ void * userdata) {
7658
+ return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
7659
+ }
7660
+
7661
+
7662
+
7455
7663
  // ggml_cross_entropy_loss
7456
7664
 
7457
7665
  struct ggml_tensor * ggml_cross_entropy_loss(
@@ -9260,8 +9468,8 @@ static void ggml_compute_forward_sum_rows_f32(
9260
9468
  for (int64_t i3 = 0; i3 < ne03; i3++) {
9261
9469
  for (int64_t i2 = 0; i2 < ne02; i2++) {
9262
9470
  for (int64_t i1 = 0; i1 < ne01; i1++) {
9263
- float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
9264
- float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
9471
+ float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
9472
+ float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
9265
9473
  float row_sum = 0;
9266
9474
  ggml_vec_sum_f32(ne00, &row_sum, src_row);
9267
9475
  dst_row[0] = row_sum;
@@ -10523,71 +10731,95 @@ static void ggml_compute_forward_mul_mat(
10523
10731
  return;
10524
10732
  }
10525
10733
 
10526
- // parallelize by src0 rows
10527
- const int64_t dr = (ne01 + nth - 1)/nth;
10734
+ const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
10735
+ const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
10528
10736
 
10529
- const int64_t ir10 = dr*ith;
10530
- const int64_t ir11 = MIN(ir10 + dr, ne01);
10737
+ const int64_t nr0 = ne01; // src0 rows
10738
+ const int64_t nr1 = ne11*ne12*ne13; // src1 rows
10531
10739
 
10532
- // src1 rows
10533
- const int64_t nr1 = ne11*ne12*ne13;
10740
+ //printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
10534
10741
 
10535
- const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
10536
- const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
10742
+ // distribute the thread work across the inner or outer loop based on which one is larger
10537
10743
 
10538
- for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
10539
- const int64_t i13 = (ir1/(ne12*ne11));
10540
- const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
10541
- const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
10542
-
10543
- const int64_t ir0 = (ir1/ne11)%(ne02*ne03);
10544
- const int64_t i03 = (ir0/(ne02));
10545
- // Hack for "Falcon multi-query-attention key stutter" / alternative to ggml_repeat2.
10546
- // See https://github.com/ggerganov/llama.cpp/issues/1602#issuecomment-1606087470:
10547
- // GG: this is likely the correct way to broadcast, though need some more thought
10548
- // therefore leaving the comments to remind us for now
10549
- const int64_t i02 = (i12 / (ne12 / ne02));
10550
- // Original from PR/224 (and also essential/correct for non-broadcast matmuls in Falcon)
10551
- // const int64_t i02 = (ir0 - i03*ne02);
10552
-
10553
- const int64_t i1 = i11;
10554
- const int64_t i2 = i12;
10555
- const int64_t i3 = i13;
10556
-
10557
- const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
10558
-
10559
- // desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
10560
- // if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
10561
- // the original src1 data pointer, so we should index using the indices directly
10562
- // TODO: this is a bit of a hack, we should probably have a better way to handle this
10563
- const char * src1_col = (const char *) wdata +
10564
- (src1_cont || src1->type != vec_dot_type
10565
- ? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
10566
- : (i11*nb11 + i12*nb12 + i13*nb13));
10567
-
10568
- float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
10569
-
10570
- for (int64_t ir = ir10; ir < ir11; ++ir) {
10571
- vec_dot(ne00, &dst_col[ir], src0_row + ir*nb01, src1_col);
10572
- }
10744
+ const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
10745
+ const int64_t nth1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
10746
+
10747
+ const int64_t ith0 = ith % nth0;
10748
+ const int64_t ith1 = ith / nth0;
10749
+
10750
+ const int64_t dr0 = (nr0 + nth0 - 1)/nth0;
10751
+ const int64_t dr1 = (nr1 + nth1 - 1)/nth1;
10752
+
10753
+ const int64_t ir010 = dr0*ith0;
10754
+ const int64_t ir011 = MIN(ir010 + dr0, nr0);
10755
+
10756
+ const int64_t ir110 = dr1*ith1;
10757
+ const int64_t ir111 = MIN(ir110 + dr1, nr1);
10758
+
10759
+ //printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
10760
+
10761
+ // threads with no work simply yield (not sure if it helps)
10762
+ if (ir010 >= ir011 || ir110 >= ir111) {
10763
+ sched_yield();
10764
+ return;
10573
10765
  }
10574
10766
 
10575
- //int64_t t1 = ggml_time_us();
10576
- //static int64_t acc = 0;
10577
- //acc += t1 - t0;
10578
- //if (t1 - t0 > 10) {
10579
- // printf("\n");
10580
- // printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03);
10581
- // printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03);
10582
- // printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13);
10767
+ assert(ne12 % ne02 == 0);
10768
+ assert(ne13 % ne03 == 0);
10583
10769
 
10584
- // printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc);
10585
- //}
10586
- }
10770
+ // broadcast factors
10771
+ const int64_t r2 = ne12/ne02;
10772
+ const int64_t r3 = ne13/ne03;
10587
10773
 
10774
+ // block-tiling attempt
10775
+ const int64_t blck_0 = 16;
10776
+ const int64_t blck_1 = 16;
10588
10777
 
10589
- // ggml_compute_forward_out_prod
10778
+ // attempt to reduce false-sharing (does not seem to make a difference)
10779
+ float tmp[16];
10780
+
10781
+ for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
10782
+ for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
10783
+ for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
10784
+ const int64_t i13 = (ir1/(ne12*ne11));
10785
+ const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
10786
+ const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
10590
10787
 
10788
+ // broadcast src0 into src1
10789
+ const int64_t i03 = i13/r3;
10790
+ const int64_t i02 = i12/r2;
10791
+
10792
+ const int64_t i1 = i11;
10793
+ const int64_t i2 = i12;
10794
+ const int64_t i3 = i13;
10795
+
10796
+ const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03);
10797
+
10798
+ // desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
10799
+ // if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
10800
+ // the original src1 data pointer, so we should index using the indices directly
10801
+ // TODO: this is a bit of a hack, we should probably have a better way to handle this
10802
+ const char * src1_col = (const char *) wdata +
10803
+ (src1_cont || src1->type != vec_dot_type
10804
+ ? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
10805
+ : (i11*nb11 + i12*nb12 + i13*nb13));
10806
+
10807
+ float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
10808
+
10809
+ //for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
10810
+ // vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
10811
+ //}
10812
+
10813
+ for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
10814
+ vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
10815
+ }
10816
+ memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
10817
+ }
10818
+ }
10819
+ }
10820
+ }
10821
+
10822
+ // ggml_compute_forward_out_prod
10591
10823
 
10592
10824
  static void ggml_compute_forward_out_prod_f32(
10593
10825
  const struct ggml_compute_params * params,
@@ -12871,7 +13103,7 @@ static void ggml_compute_forward_pool_1d(
12871
13103
  const struct ggml_tensor * src0,
12872
13104
  struct ggml_tensor * dst) {
12873
13105
 
12874
- const int32_t* opts = (const int32_t*)dst->op_params;
13106
+ const int32_t * opts = (const int32_t *)dst->op_params;
12875
13107
  enum ggml_op_pool op = opts[0];
12876
13108
  const int k0 = opts[1];
12877
13109
  const int s0 = opts[2];
@@ -14204,24 +14436,6 @@ static void ggml_compute_forward_map_custom1_f32(
14204
14436
  fun(dst, a);
14205
14437
  }
14206
14438
 
14207
-
14208
- static void ggml_compute_forward_map_custom1(
14209
- const struct ggml_compute_params * params,
14210
- const struct ggml_tensor * a,
14211
- struct ggml_tensor * dst,
14212
- const ggml_custom1_op_f32_t fun) {
14213
- switch (a->type) {
14214
- case GGML_TYPE_F32:
14215
- {
14216
- ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
14217
- } break;
14218
- default:
14219
- {
14220
- GGML_ASSERT(false);
14221
- } break;
14222
- }
14223
- }
14224
-
14225
14439
  // ggml_compute_forward_map_custom2
14226
14440
 
14227
14441
  static void ggml_compute_forward_map_custom2_f32(
@@ -14240,24 +14454,6 @@ static void ggml_compute_forward_map_custom2_f32(
14240
14454
  }
14241
14455
 
14242
14456
 
14243
- static void ggml_compute_forward_map_custom2(
14244
- const struct ggml_compute_params * params,
14245
- const struct ggml_tensor * a,
14246
- const struct ggml_tensor * b,
14247
- struct ggml_tensor * dst,
14248
- const ggml_custom2_op_f32_t fun) {
14249
- switch (a->type) {
14250
- case GGML_TYPE_F32:
14251
- {
14252
- ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
14253
- } break;
14254
- default:
14255
- {
14256
- GGML_ASSERT(false);
14257
- } break;
14258
- }
14259
- }
14260
-
14261
14457
  // ggml_compute_forward_map_custom3
14262
14458
 
14263
14459
  static void ggml_compute_forward_map_custom3_f32(
@@ -14276,24 +14472,52 @@ static void ggml_compute_forward_map_custom3_f32(
14276
14472
  fun(dst, a, b, c);
14277
14473
  }
14278
14474
 
14475
+ // ggml_compute_forward_map_custom1
14476
+
14477
+ static void ggml_compute_forward_map_custom1(
14478
+ const struct ggml_compute_params * params,
14479
+ const struct ggml_tensor * a,
14480
+ struct ggml_tensor * dst) {
14481
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14482
+ return;
14483
+ }
14484
+
14485
+ struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
14486
+
14487
+ p->fun(dst, a, params->ith, params->nth, p->userdata);
14488
+ }
14489
+
14490
+ // ggml_compute_forward_map_custom2
14491
+
14492
+ static void ggml_compute_forward_map_custom2(
14493
+ const struct ggml_compute_params * params,
14494
+ const struct ggml_tensor * a,
14495
+ const struct ggml_tensor * b,
14496
+ struct ggml_tensor * dst) {
14497
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14498
+ return;
14499
+ }
14500
+
14501
+ struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
14502
+
14503
+ p->fun(dst, a, b, params->ith, params->nth, p->userdata);
14504
+ }
14505
+
14506
+ // ggml_compute_forward_map_custom3
14279
14507
 
14280
14508
  static void ggml_compute_forward_map_custom3(
14281
14509
  const struct ggml_compute_params * params,
14282
14510
  const struct ggml_tensor * a,
14283
14511
  const struct ggml_tensor * b,
14284
14512
  const struct ggml_tensor * c,
14285
- struct ggml_tensor * dst,
14286
- const ggml_custom3_op_f32_t fun) {
14287
- switch (a->type) {
14288
- case GGML_TYPE_F32:
14289
- {
14290
- ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
14291
- } break;
14292
- default:
14293
- {
14294
- GGML_ASSERT(false);
14295
- } break;
14513
+ struct ggml_tensor * dst) {
14514
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14515
+ return;
14296
14516
  }
14517
+
14518
+ struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
14519
+
14520
+ p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
14297
14521
  }
14298
14522
 
14299
14523
  // ggml_compute_forward_cross_entropy_loss
@@ -14815,25 +15039,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
14815
15039
  ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
14816
15040
  }
14817
15041
  break;
14818
- case GGML_OP_MAP_CUSTOM1:
15042
+ case GGML_OP_MAP_CUSTOM1_F32:
14819
15043
  {
14820
15044
  ggml_custom1_op_f32_t fun;
14821
15045
  memcpy(&fun, tensor->op_params, sizeof(fun));
14822
- ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
15046
+ ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
14823
15047
  }
14824
15048
  break;
14825
- case GGML_OP_MAP_CUSTOM2:
15049
+ case GGML_OP_MAP_CUSTOM2_F32:
14826
15050
  {
14827
15051
  ggml_custom2_op_f32_t fun;
14828
15052
  memcpy(&fun, tensor->op_params, sizeof(fun));
14829
- ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
15053
+ ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
14830
15054
  }
14831
15055
  break;
14832
- case GGML_OP_MAP_CUSTOM3:
15056
+ case GGML_OP_MAP_CUSTOM3_F32:
14833
15057
  {
14834
15058
  ggml_custom3_op_f32_t fun;
14835
15059
  memcpy(&fun, tensor->op_params, sizeof(fun));
14836
- ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
15060
+ ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
15061
+ }
15062
+ break;
15063
+ case GGML_OP_MAP_CUSTOM1:
15064
+ {
15065
+ ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
15066
+ }
15067
+ break;
15068
+ case GGML_OP_MAP_CUSTOM2:
15069
+ {
15070
+ ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
15071
+ }
15072
+ break;
15073
+ case GGML_OP_MAP_CUSTOM3:
15074
+ {
15075
+ ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
14837
15076
  }
14838
15077
  break;
14839
15078
  case GGML_OP_CROSS_ENTROPY_LOSS:
@@ -15641,6 +15880,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15641
15880
  } break;
15642
15881
  case GGML_OP_MAP_UNARY:
15643
15882
  case GGML_OP_MAP_BINARY:
15883
+ case GGML_OP_MAP_CUSTOM1_F32:
15884
+ case GGML_OP_MAP_CUSTOM2_F32:
15885
+ case GGML_OP_MAP_CUSTOM3_F32:
15644
15886
  case GGML_OP_MAP_CUSTOM1:
15645
15887
  case GGML_OP_MAP_CUSTOM2:
15646
15888
  case GGML_OP_MAP_CUSTOM3:
@@ -16426,11 +16668,38 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16426
16668
  case GGML_OP_WIN_UNPART:
16427
16669
  case GGML_OP_MAP_UNARY:
16428
16670
  case GGML_OP_MAP_BINARY:
16671
+ case GGML_OP_MAP_CUSTOM1_F32:
16672
+ case GGML_OP_MAP_CUSTOM2_F32:
16673
+ case GGML_OP_MAP_CUSTOM3_F32:
16674
+ {
16675
+ n_tasks = 1;
16676
+ } break;
16429
16677
  case GGML_OP_MAP_CUSTOM1:
16678
+ {
16679
+ struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
16680
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
16681
+ n_tasks = n_threads;
16682
+ } else {
16683
+ n_tasks = MIN(p->n_tasks, n_threads);
16684
+ }
16685
+ } break;
16430
16686
  case GGML_OP_MAP_CUSTOM2:
16687
+ {
16688
+ struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
16689
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
16690
+ n_tasks = n_threads;
16691
+ } else {
16692
+ n_tasks = MIN(p->n_tasks, n_threads);
16693
+ }
16694
+ } break;
16431
16695
  case GGML_OP_MAP_CUSTOM3:
16432
16696
  {
16433
- n_tasks = 1;
16697
+ struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
16698
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
16699
+ n_tasks = n_threads;
16700
+ } else {
16701
+ n_tasks = MIN(p->n_tasks, n_threads);
16702
+ }
16434
16703
  } break;
16435
16704
  case GGML_OP_CROSS_ENTROPY_LOSS:
16436
16705
  {