llama_cpp 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -195,8 +195,8 @@ typedef void * thread_ret_t;
195
195
  #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
196
196
  #define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
197
197
  #else
198
- inline static void* ggml_aligned_malloc(size_t size) {
199
- void* aligned_memory = NULL;
198
+ inline static void * ggml_aligned_malloc(size_t size) {
199
+ void * aligned_memory = NULL;
200
200
  #ifdef GGML_USE_METAL
201
201
  int result = posix_memalign(&aligned_memory, getpagesize(), size);
202
202
  #else
@@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
3811
3811
  "CROSS_ENTROPY_LOSS_BACK",
3812
3812
  };
3813
3813
 
3814
- static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
3814
+ static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
3815
3815
 
3816
3816
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
3817
3817
  "none",
@@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
3883
3883
  "cross_entropy_loss_back(x,y)",
3884
3884
  };
3885
3885
 
3886
- static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
3886
+ static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
3887
3887
 
3888
3888
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
3889
3889
 
@@ -4110,7 +4110,7 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
4110
4110
  //
4111
4111
  // is enough, but just in case, adding the second part
4112
4112
 
4113
- return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
4113
+ return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
4114
4114
  }
4115
4115
 
4116
4116
  size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
4253
4253
  tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
4254
4254
  }
4255
4255
 
4256
- static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
4256
+ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
4257
4257
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
4258
4258
 
4259
4259
  return
@@ -4557,10 +4557,12 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
4557
4557
 
4558
4558
  static struct ggml_tensor * ggml_new_tensor_impl(
4559
4559
  struct ggml_context * ctx,
4560
- enum ggml_type type,
4561
- int n_dims,
4562
- const int64_t* ne,
4563
- void* data) {
4560
+ enum ggml_type type,
4561
+ int n_dims,
4562
+ const int64_t * ne,
4563
+ void * data) {
4564
+
4565
+ assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
4564
4566
 
4565
4567
  size_t data_size = 0;
4566
4568
 
@@ -4600,7 +4602,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
4600
4602
  /*.ne =*/ { 1, 1, 1, 1 },
4601
4603
  /*.nb =*/ { 0, 0, 0, 0 },
4602
4604
  /*.op =*/ GGML_OP_NONE,
4603
- /*.op_params =*/ {0},
4605
+ /*.op_params =*/ { 0 },
4604
4606
  /*.is_param =*/ false,
4605
4607
  /*.grad =*/ NULL,
4606
4608
  /*.src =*/ { NULL },
@@ -4632,6 +4634,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
4632
4634
  }
4633
4635
 
4634
4636
  static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
4637
+ GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
4635
4638
  assert(params_size <= GGML_MAX_OP_PARAMS);
4636
4639
  memcpy(tensor->op_params, params, params_size);
4637
4640
  }
@@ -4648,22 +4651,22 @@ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int3
4648
4651
 
4649
4652
  struct ggml_tensor * ggml_new_tensor(
4650
4653
  struct ggml_context * ctx,
4651
- enum ggml_type type,
4652
- int n_dims,
4653
- const int64_t * ne) {
4654
+ enum ggml_type type,
4655
+ int n_dims,
4656
+ const int64_t * ne) {
4654
4657
  return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
4655
4658
  }
4656
4659
 
4657
4660
  struct ggml_tensor * ggml_new_tensor_1d(
4658
4661
  struct ggml_context * ctx,
4659
- enum ggml_type type,
4662
+ enum ggml_type type,
4660
4663
  int64_t ne0) {
4661
4664
  return ggml_new_tensor(ctx, type, 1, &ne0);
4662
4665
  }
4663
4666
 
4664
4667
  struct ggml_tensor * ggml_new_tensor_2d(
4665
4668
  struct ggml_context * ctx,
4666
- enum ggml_type type,
4669
+ enum ggml_type type,
4667
4670
  int64_t ne0,
4668
4671
  int64_t ne1) {
4669
4672
  const int64_t ne[2] = { ne0, ne1 };
@@ -4672,7 +4675,7 @@ struct ggml_tensor * ggml_new_tensor_2d(
4672
4675
 
4673
4676
  struct ggml_tensor * ggml_new_tensor_3d(
4674
4677
  struct ggml_context * ctx,
4675
- enum ggml_type type,
4678
+ enum ggml_type type,
4676
4679
  int64_t ne0,
4677
4680
  int64_t ne1,
4678
4681
  int64_t ne2) {
@@ -6238,6 +6241,27 @@ struct ggml_tensor * ggml_reshape_4d(
6238
6241
 
6239
6242
  // ggml_view_1d
6240
6243
 
6244
+ static struct ggml_tensor * ggml_view_tensor_offset(
6245
+ struct ggml_context * ctx,
6246
+ struct ggml_tensor * a,
6247
+ int n_dims,
6248
+ const int64_t * ne,
6249
+ size_t offset) {
6250
+ // don't calculate an offset from an unallocated tensor
6251
+ void * data = NULL;
6252
+ if (a->data != NULL) {
6253
+ data = (char *) a->data + offset;
6254
+ }
6255
+
6256
+ struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
6257
+
6258
+ ggml_format_name(result, "%s (view)", a->name);
6259
+
6260
+ ggml_set_op_params(result, &offset, sizeof(offset));
6261
+
6262
+ return result;
6263
+ }
6264
+
6241
6265
  struct ggml_tensor * ggml_view_1d(
6242
6266
  struct ggml_context * ctx,
6243
6267
  struct ggml_tensor * a,
@@ -6250,10 +6274,7 @@ struct ggml_tensor * ggml_view_1d(
6250
6274
  is_node = true;
6251
6275
  }
6252
6276
 
6253
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
6254
- ggml_format_name(result, "%s (view)", a->name);
6255
-
6256
- ggml_set_op_params(result, &offset, sizeof(offset));
6277
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
6257
6278
 
6258
6279
  result->op = GGML_OP_VIEW;
6259
6280
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6280,10 +6301,7 @@ struct ggml_tensor * ggml_view_2d(
6280
6301
 
6281
6302
  const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
6282
6303
 
6283
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
6284
- ggml_format_name(result, "%s (view)", a->name);
6285
-
6286
- ggml_set_op_params(result, &offset, sizeof(offset));
6304
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
6287
6305
 
6288
6306
  result->nb[1] = nb1;
6289
6307
  result->nb[2] = result->nb[1]*ne1;
@@ -6316,10 +6334,7 @@ struct ggml_tensor * ggml_view_3d(
6316
6334
 
6317
6335
  const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
6318
6336
 
6319
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
6320
- ggml_format_name(result, "%s (view)", a->name);
6321
-
6322
- ggml_set_op_params(result, &offset, sizeof(offset));
6337
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
6323
6338
 
6324
6339
  result->nb[1] = nb1;
6325
6340
  result->nb[2] = nb2;
@@ -6354,10 +6369,7 @@ struct ggml_tensor * ggml_view_4d(
6354
6369
 
6355
6370
  const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
6356
6371
 
6357
- struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
6358
- ggml_format_name(result, "%s (view)", a->name);
6359
-
6360
- ggml_set_op_params(result, &offset, sizeof(offset));
6372
+ struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
6361
6373
 
6362
6374
  result->nb[1] = nb1;
6363
6375
  result->nb[2] = nb2;
@@ -6428,7 +6440,7 @@ struct ggml_tensor * ggml_permute(
6428
6440
  result->src[0] = a;
6429
6441
 
6430
6442
  int32_t params[] = { axis0, axis1, axis2, axis3 };
6431
- ggml_set_op_params(result, &params, sizeof(params));
6443
+ ggml_set_op_params(result, params, sizeof(params));
6432
6444
 
6433
6445
  return result;
6434
6446
  }
@@ -6554,7 +6566,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
6554
6566
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6555
6567
 
6556
6568
  int32_t params[] = { n_past, inplace ? 1 : 0 };
6557
- ggml_set_op_params(result, &params, sizeof(params));
6569
+ ggml_set_op_params(result, params, sizeof(params));
6558
6570
 
6559
6571
  result->op = GGML_OP_DIAG_MASK_INF;
6560
6572
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6594,7 +6606,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
6594
6606
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6595
6607
 
6596
6608
  int32_t params[] = { n_past, inplace ? 1 : 0 };
6597
- ggml_set_op_params(result, &params, sizeof(params));
6609
+ ggml_set_op_params(result, params, sizeof(params));
6598
6610
 
6599
6611
  result->op = GGML_OP_DIAG_MASK_ZERO;
6600
6612
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6710,9 +6722,9 @@ static struct ggml_tensor * ggml_rope_impl(
6710
6722
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
6711
6723
 
6712
6724
  int32_t params[6] = { n_past, n_dims, mode, n_ctx };
6713
- memcpy(params + 4, &freq_base, sizeof(float));
6725
+ memcpy(params + 4, &freq_base, sizeof(float));
6714
6726
  memcpy(params + 5, &freq_scale, sizeof(float));
6715
- ggml_set_op_params(result, &params, sizeof(params));
6727
+ ggml_set_op_params(result, params, sizeof(params));
6716
6728
 
6717
6729
  result->op = GGML_OP_ROPE;
6718
6730
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6741,6 +6753,18 @@ struct ggml_tensor * ggml_rope_inplace(
6741
6753
  return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
6742
6754
  }
6743
6755
 
6756
+ struct ggml_tensor * ggml_rope_custom(
6757
+ struct ggml_context * ctx,
6758
+ struct ggml_tensor * a,
6759
+ int n_past,
6760
+ int n_dims,
6761
+ int mode,
6762
+ int n_ctx,
6763
+ float freq_base,
6764
+ float freq_scale) {
6765
+ return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, false);
6766
+ }
6767
+
6744
6768
  struct ggml_tensor * ggml_rope_custom_inplace(
6745
6769
  struct ggml_context * ctx,
6746
6770
  struct ggml_tensor * a,
@@ -6774,7 +6798,7 @@ struct ggml_tensor * ggml_rope_back(
6774
6798
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
6775
6799
 
6776
6800
  int32_t params[] = { n_past, n_dims, mode, n_ctx };
6777
- ggml_set_op_params(result, &params, sizeof(params));
6801
+ ggml_set_op_params(result, params, sizeof(params));
6778
6802
 
6779
6803
  result->op = GGML_OP_ROPE_BACK;
6780
6804
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6805,7 +6829,7 @@ struct ggml_tensor * ggml_alibi(
6805
6829
 
6806
6830
  int32_t op_params[3] = { n_past, n_head };
6807
6831
  memcpy(op_params + 2, &bias_max, sizeof(float));
6808
- ggml_set_op_params(result, &op_params, sizeof(op_params));
6832
+ ggml_set_op_params(result, op_params, sizeof(op_params));
6809
6833
 
6810
6834
  result->op = GGML_OP_ALIBI;
6811
6835
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6832,7 +6856,7 @@ struct ggml_tensor * ggml_clamp(
6832
6856
  struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6833
6857
 
6834
6858
  float params[] = { min, max };
6835
- ggml_set_op_params(result, &params, sizeof(params));
6859
+ ggml_set_op_params(result, params, sizeof(params));
6836
6860
 
6837
6861
  result->op = GGML_OP_CLAMP;
6838
6862
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6867,10 +6891,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
6867
6891
  ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
6868
6892
  a->ne[2], 1, 1,
6869
6893
  };
6870
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6894
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6871
6895
 
6872
6896
  int32_t params[] = { s0, p0, d0 };
6873
- ggml_set_op_params(result, &params, sizeof(params));
6897
+ ggml_set_op_params(result, params, sizeof(params));
6874
6898
 
6875
6899
  result->op = GGML_OP_CONV_1D;
6876
6900
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6882,10 +6906,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
6882
6906
 
6883
6907
  // ggml_conv_2d
6884
6908
 
6885
- struct ggml_tensor* ggml_conv_2d(
6886
- struct ggml_context* ctx,
6887
- struct ggml_tensor * a,
6888
- struct ggml_tensor * b,
6909
+ struct ggml_tensor * ggml_conv_2d(
6910
+ struct ggml_context * ctx,
6911
+ struct ggml_tensor * a,
6912
+ struct ggml_tensor * b,
6889
6913
  int s0,
6890
6914
  int s1,
6891
6915
  int p0,
@@ -6906,10 +6930,10 @@ struct ggml_tensor* ggml_conv_2d(
6906
6930
  ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
6907
6931
  a->ne[3], b->ne[3],
6908
6932
  };
6909
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6933
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6910
6934
 
6911
6935
  int32_t params[] = { s0, s1, p0, p1, d0, d1 };
6912
- ggml_set_op_params(result, &params, sizeof(params));
6936
+ ggml_set_op_params(result, params, sizeof(params));
6913
6937
 
6914
6938
  result->op = GGML_OP_CONV_2D;
6915
6939
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6922,7 +6946,7 @@ struct ggml_tensor* ggml_conv_2d(
6922
6946
 
6923
6947
  // ggml_conv_1d_ph
6924
6948
 
6925
- struct ggml_tensor* ggml_conv_1d_ph(
6949
+ struct ggml_tensor * ggml_conv_1d_ph(
6926
6950
  struct ggml_context * ctx,
6927
6951
  struct ggml_tensor * a,
6928
6952
  struct ggml_tensor * b,
@@ -6940,7 +6964,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
6940
6964
 
6941
6965
  // ggml_pool_1d
6942
6966
 
6943
- struct ggml_tensor* ggml_pool_1d(
6967
+ struct ggml_tensor * ggml_pool_1d(
6944
6968
  struct ggml_context * ctx,
6945
6969
  struct ggml_tensor * a,
6946
6970
  enum ggml_op_pool op,
@@ -6959,10 +6983,10 @@ struct ggml_tensor* ggml_pool_1d(
6959
6983
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
6960
6984
  a->ne[1],
6961
6985
  };
6962
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6986
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
6963
6987
 
6964
6988
  int32_t params[] = { op, k0, s0, p0 };
6965
- ggml_set_op_params(result, &params, sizeof(params));
6989
+ ggml_set_op_params(result, params, sizeof(params));
6966
6990
 
6967
6991
  result->op = GGML_OP_POOL_1D;
6968
6992
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -6973,7 +6997,7 @@ struct ggml_tensor* ggml_pool_1d(
6973
6997
 
6974
6998
  // ggml_pool_2d
6975
6999
 
6976
- struct ggml_tensor* ggml_pool_2d(
7000
+ struct ggml_tensor * ggml_pool_2d(
6977
7001
  struct ggml_context * ctx,
6978
7002
  struct ggml_tensor * a,
6979
7003
  enum ggml_op_pool op,
@@ -6996,10 +7020,10 @@ struct ggml_tensor* ggml_pool_2d(
6996
7020
  ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
6997
7021
  a->ne[2],
6998
7022
  };
6999
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7023
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7000
7024
 
7001
7025
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
7002
- ggml_set_op_params(result, &params, sizeof(params));
7026
+ ggml_set_op_params(result, params, sizeof(params));
7003
7027
 
7004
7028
  result->op = GGML_OP_POOL_2D;
7005
7029
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7167,7 +7191,7 @@ struct ggml_tensor * ggml_win_part(
7167
7191
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7168
7192
 
7169
7193
  int32_t params[] = { npx, npy, w };
7170
- ggml_set_op_params(result, &params, sizeof(params));
7194
+ ggml_set_op_params(result, params, sizeof(params));
7171
7195
 
7172
7196
  result->op = GGML_OP_WIN_PART;
7173
7197
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7197,7 +7221,7 @@ struct ggml_tensor * ggml_win_unpart(
7197
7221
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
7198
7222
 
7199
7223
  int32_t params[] = { w };
7200
- ggml_set_op_params(result, &params, sizeof(params));
7224
+ ggml_set_op_params(result, params, sizeof(params));
7201
7225
 
7202
7226
  result->op = GGML_OP_WIN_UNPART;
7203
7227
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -7326,7 +7350,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
7326
7350
  return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
7327
7351
  }
7328
7352
 
7329
- // ggml_map_custom1
7353
+ // ggml_map_custom1_f32
7330
7354
 
7331
7355
  static struct ggml_tensor * ggml_map_custom1_impl_f32(
7332
7356
  struct ggml_context * ctx,
@@ -7343,7 +7367,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
7343
7367
 
7344
7368
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7345
7369
 
7346
- result->op = GGML_OP_MAP_CUSTOM1;
7370
+ result->op = GGML_OP_MAP_CUSTOM1_F32;
7347
7371
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7348
7372
  result->src[0] = a;
7349
7373
 
@@ -7364,7 +7388,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
7364
7388
  return ggml_map_custom1_impl_f32(ctx, a, fun, true);
7365
7389
  }
7366
7390
 
7367
- // ggml_map_custom2
7391
+ // ggml_map_custom2_f32
7368
7392
 
7369
7393
  static struct ggml_tensor * ggml_map_custom2_impl_f32(
7370
7394
  struct ggml_context * ctx,
@@ -7382,7 +7406,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
7382
7406
 
7383
7407
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7384
7408
 
7385
- result->op = GGML_OP_MAP_CUSTOM2;
7409
+ result->op = GGML_OP_MAP_CUSTOM2_F32;
7386
7410
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7387
7411
  result->src[0] = a;
7388
7412
  result->src[1] = b;
@@ -7406,7 +7430,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
7406
7430
  return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
7407
7431
  }
7408
7432
 
7409
- // ggml_map_custom3
7433
+ // ggml_map_custom3_f32
7410
7434
 
7411
7435
  static struct ggml_tensor * ggml_map_custom3_impl_f32(
7412
7436
  struct ggml_context * ctx,
@@ -7425,7 +7449,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
7425
7449
 
7426
7450
  ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
7427
7451
 
7428
- result->op = GGML_OP_MAP_CUSTOM3;
7452
+ result->op = GGML_OP_MAP_CUSTOM3_F32;
7429
7453
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7430
7454
  result->src[0] = a;
7431
7455
  result->src[1] = b;
@@ -7452,6 +7476,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
7452
7476
  return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
7453
7477
  }
7454
7478
 
7479
+ // ggml_map_custom1
7480
+ struct ggml_map_custom1_op_params {
7481
+ ggml_custom1_op_t fun;
7482
+ int n_tasks;
7483
+ void * userdata;
7484
+ };
7485
+
7486
+ static struct ggml_tensor * ggml_map_custom1_impl(
7487
+ struct ggml_context * ctx,
7488
+ struct ggml_tensor * a,
7489
+ const ggml_custom1_op_t fun,
7490
+ int n_tasks,
7491
+ void * userdata,
7492
+ bool inplace) {
7493
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7494
+
7495
+ bool is_node = false;
7496
+
7497
+ if (!inplace && a->grad) {
7498
+ is_node = true;
7499
+ }
7500
+
7501
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7502
+
7503
+ struct ggml_map_custom1_op_params params = {
7504
+ /*.fun =*/ fun,
7505
+ /*.n_tasks =*/ n_tasks,
7506
+ /*.userdata =*/ userdata
7507
+ };
7508
+ ggml_set_op_params(result, (const void *) &params, sizeof(params));
7509
+
7510
+ result->op = GGML_OP_MAP_CUSTOM1;
7511
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7512
+ result->src[0] = a;
7513
+
7514
+ return result;
7515
+ }
7516
+
7517
+ struct ggml_tensor * ggml_map_custom1(
7518
+ struct ggml_context * ctx,
7519
+ struct ggml_tensor * a,
7520
+ const ggml_custom1_op_t fun,
7521
+ int n_tasks,
7522
+ void * userdata) {
7523
+ return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
7524
+ }
7525
+
7526
+ struct ggml_tensor * ggml_map_custom1_inplace(
7527
+ struct ggml_context * ctx,
7528
+ struct ggml_tensor * a,
7529
+ const ggml_custom1_op_t fun,
7530
+ int n_tasks,
7531
+ void * userdata) {
7532
+ return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
7533
+ }
7534
+
7535
+ // ggml_map_custom2
7536
+
7537
+ struct ggml_map_custom2_op_params {
7538
+ ggml_custom2_op_t fun;
7539
+ int n_tasks;
7540
+ void * userdata;
7541
+ };
7542
+
7543
+ static struct ggml_tensor * ggml_map_custom2_impl(
7544
+ struct ggml_context * ctx,
7545
+ struct ggml_tensor * a,
7546
+ struct ggml_tensor * b,
7547
+ const ggml_custom2_op_t fun,
7548
+ int n_tasks,
7549
+ void * userdata,
7550
+ bool inplace) {
7551
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7552
+
7553
+ bool is_node = false;
7554
+
7555
+ if (!inplace && (a->grad || b->grad)) {
7556
+ is_node = true;
7557
+ }
7558
+
7559
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7560
+
7561
+ struct ggml_map_custom2_op_params params = {
7562
+ /*.fun =*/ fun,
7563
+ /*.n_tasks =*/ n_tasks,
7564
+ /*.userdata =*/ userdata
7565
+ };
7566
+ ggml_set_op_params(result, (const void *) &params, sizeof(params));
7567
+
7568
+ result->op = GGML_OP_MAP_CUSTOM2;
7569
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7570
+ result->src[0] = a;
7571
+ result->src[1] = b;
7572
+
7573
+ return result;
7574
+ }
7575
+
7576
+ struct ggml_tensor * ggml_map_custom2(
7577
+ struct ggml_context * ctx,
7578
+ struct ggml_tensor * a,
7579
+ struct ggml_tensor * b,
7580
+ const ggml_custom2_op_t fun,
7581
+ int n_tasks,
7582
+ void * userdata) {
7583
+ return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
7584
+ }
7585
+
7586
+ struct ggml_tensor * ggml_map_custom2_inplace(
7587
+ struct ggml_context * ctx,
7588
+ struct ggml_tensor * a,
7589
+ struct ggml_tensor * b,
7590
+ const ggml_custom2_op_t fun,
7591
+ int n_tasks,
7592
+ void * userdata) {
7593
+ return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
7594
+ }
7595
+
7596
+ // ggml_map_custom3
7597
+
7598
+ struct ggml_map_custom3_op_params {
7599
+ ggml_custom3_op_t fun;
7600
+ int n_tasks;
7601
+ void * userdata;
7602
+ };
7603
+
7604
+ static struct ggml_tensor * ggml_map_custom3_impl(
7605
+ struct ggml_context * ctx,
7606
+ struct ggml_tensor * a,
7607
+ struct ggml_tensor * b,
7608
+ struct ggml_tensor * c,
7609
+ const ggml_custom3_op_t fun,
7610
+ int n_tasks,
7611
+ void * userdata,
7612
+ bool inplace) {
7613
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
7614
+
7615
+ bool is_node = false;
7616
+
7617
+ if (!inplace && (a->grad || b->grad || c->grad)) {
7618
+ is_node = true;
7619
+ }
7620
+
7621
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
7622
+
7623
+ struct ggml_map_custom3_op_params params = {
7624
+ /*.fun =*/ fun,
7625
+ /*.n_tasks =*/ n_tasks,
7626
+ /*.userdata =*/ userdata
7627
+ };
7628
+ ggml_set_op_params(result, (const void *) &params, sizeof(params));
7629
+
7630
+ result->op = GGML_OP_MAP_CUSTOM3;
7631
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
7632
+ result->src[0] = a;
7633
+ result->src[1] = b;
7634
+ result->src[2] = c;
7635
+
7636
+ return result;
7637
+ }
7638
+
7639
+ struct ggml_tensor * ggml_map_custom3(
7640
+ struct ggml_context * ctx,
7641
+ struct ggml_tensor * a,
7642
+ struct ggml_tensor * b,
7643
+ struct ggml_tensor * c,
7644
+ const ggml_custom3_op_t fun,
7645
+ int n_tasks,
7646
+ void * userdata) {
7647
+ return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
7648
+ }
7649
+
7650
+ struct ggml_tensor * ggml_map_custom3_inplace(
7651
+ struct ggml_context * ctx,
7652
+ struct ggml_tensor * a,
7653
+ struct ggml_tensor * b,
7654
+ struct ggml_tensor * c,
7655
+ const ggml_custom3_op_t fun,
7656
+ int n_tasks,
7657
+ void * userdata) {
7658
+ return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
7659
+ }
7660
+
7661
+
7662
+
7455
7663
  // ggml_cross_entropy_loss
7456
7664
 
7457
7665
  struct ggml_tensor * ggml_cross_entropy_loss(
@@ -9260,8 +9468,8 @@ static void ggml_compute_forward_sum_rows_f32(
9260
9468
  for (int64_t i3 = 0; i3 < ne03; i3++) {
9261
9469
  for (int64_t i2 = 0; i2 < ne02; i2++) {
9262
9470
  for (int64_t i1 = 0; i1 < ne01; i1++) {
9263
- float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
9264
- float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
9471
+ float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
9472
+ float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
9265
9473
  float row_sum = 0;
9266
9474
  ggml_vec_sum_f32(ne00, &row_sum, src_row);
9267
9475
  dst_row[0] = row_sum;
@@ -10523,71 +10731,95 @@ static void ggml_compute_forward_mul_mat(
10523
10731
  return;
10524
10732
  }
10525
10733
 
10526
- // parallelize by src0 rows
10527
- const int64_t dr = (ne01 + nth - 1)/nth;
10734
+ const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
10735
+ const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
10528
10736
 
10529
- const int64_t ir10 = dr*ith;
10530
- const int64_t ir11 = MIN(ir10 + dr, ne01);
10737
+ const int64_t nr0 = ne01; // src0 rows
10738
+ const int64_t nr1 = ne11*ne12*ne13; // src1 rows
10531
10739
 
10532
- // src1 rows
10533
- const int64_t nr1 = ne11*ne12*ne13;
10740
+ //printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
10534
10741
 
10535
- const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
10536
- const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
10742
+ // distribute the thread work across the inner or outer loop based on which one is larger
10537
10743
 
10538
- for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
10539
- const int64_t i13 = (ir1/(ne12*ne11));
10540
- const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
10541
- const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
10542
-
10543
- const int64_t ir0 = (ir1/ne11)%(ne02*ne03);
10544
- const int64_t i03 = (ir0/(ne02));
10545
- // Hack for "Falcon multi-query-attention key stutter" / alternative to ggml_repeat2.
10546
- // See https://github.com/ggerganov/llama.cpp/issues/1602#issuecomment-1606087470:
10547
- // GG: this is likely the correct way to broadcast, though need some more thought
10548
- // therefore leaving the comments to remind us for now
10549
- const int64_t i02 = (i12 / (ne12 / ne02));
10550
- // Original from PR/224 (and also essential/correct for non-broadcast matmuls in Falcon)
10551
- // const int64_t i02 = (ir0 - i03*ne02);
10552
-
10553
- const int64_t i1 = i11;
10554
- const int64_t i2 = i12;
10555
- const int64_t i3 = i13;
10556
-
10557
- const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
10558
-
10559
- // desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
10560
- // if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
10561
- // the original src1 data pointer, so we should index using the indices directly
10562
- // TODO: this is a bit of a hack, we should probably have a better way to handle this
10563
- const char * src1_col = (const char *) wdata +
10564
- (src1_cont || src1->type != vec_dot_type
10565
- ? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
10566
- : (i11*nb11 + i12*nb12 + i13*nb13));
10567
-
10568
- float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
10569
-
10570
- for (int64_t ir = ir10; ir < ir11; ++ir) {
10571
- vec_dot(ne00, &dst_col[ir], src0_row + ir*nb01, src1_col);
10572
- }
10744
+ const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
10745
+ const int64_t nth1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
10746
+
10747
+ const int64_t ith0 = ith % nth0;
10748
+ const int64_t ith1 = ith / nth0;
10749
+
10750
+ const int64_t dr0 = (nr0 + nth0 - 1)/nth0;
10751
+ const int64_t dr1 = (nr1 + nth1 - 1)/nth1;
10752
+
10753
+ const int64_t ir010 = dr0*ith0;
10754
+ const int64_t ir011 = MIN(ir010 + dr0, nr0);
10755
+
10756
+ const int64_t ir110 = dr1*ith1;
10757
+ const int64_t ir111 = MIN(ir110 + dr1, nr1);
10758
+
10759
+ //printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
10760
+
10761
+ // threads with no work simply yield (not sure if it helps)
10762
+ if (ir010 >= ir011 || ir110 >= ir111) {
10763
+ sched_yield();
10764
+ return;
10573
10765
  }
10574
10766
 
10575
- //int64_t t1 = ggml_time_us();
10576
- //static int64_t acc = 0;
10577
- //acc += t1 - t0;
10578
- //if (t1 - t0 > 10) {
10579
- // printf("\n");
10580
- // printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03);
10581
- // printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03);
10582
- // printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13);
10767
+ assert(ne12 % ne02 == 0);
10768
+ assert(ne13 % ne03 == 0);
10583
10769
 
10584
- // printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc);
10585
- //}
10586
- }
10770
+ // broadcast factors
10771
+ const int64_t r2 = ne12/ne02;
10772
+ const int64_t r3 = ne13/ne03;
10587
10773
 
10774
+ // block-tiling attempt
10775
+ const int64_t blck_0 = 16;
10776
+ const int64_t blck_1 = 16;
10588
10777
 
10589
- // ggml_compute_forward_out_prod
10778
+ // attempt to reduce false-sharing (does not seem to make a difference)
10779
+ float tmp[16];
10780
+
10781
+ for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
10782
+ for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
10783
+ for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
10784
+ const int64_t i13 = (ir1/(ne12*ne11));
10785
+ const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
10786
+ const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
10590
10787
 
10788
+ // broadcast src0 into src1
10789
+ const int64_t i03 = i13/r3;
10790
+ const int64_t i02 = i12/r2;
10791
+
10792
+ const int64_t i1 = i11;
10793
+ const int64_t i2 = i12;
10794
+ const int64_t i3 = i13;
10795
+
10796
+ const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03);
10797
+
10798
+ // desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
10799
+ // if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
10800
+ // the original src1 data pointer, so we should index using the indices directly
10801
+ // TODO: this is a bit of a hack, we should probably have a better way to handle this
10802
+ const char * src1_col = (const char *) wdata +
10803
+ (src1_cont || src1->type != vec_dot_type
10804
+ ? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
10805
+ : (i11*nb11 + i12*nb12 + i13*nb13));
10806
+
10807
+ float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
10808
+
10809
+ //for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
10810
+ // vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
10811
+ //}
10812
+
10813
+ for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
10814
+ vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
10815
+ }
10816
+ memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
10817
+ }
10818
+ }
10819
+ }
10820
+ }
10821
+
10822
+ // ggml_compute_forward_out_prod
10591
10823
 
10592
10824
  static void ggml_compute_forward_out_prod_f32(
10593
10825
  const struct ggml_compute_params * params,
@@ -12871,7 +13103,7 @@ static void ggml_compute_forward_pool_1d(
12871
13103
  const struct ggml_tensor * src0,
12872
13104
  struct ggml_tensor * dst) {
12873
13105
 
12874
- const int32_t* opts = (const int32_t*)dst->op_params;
13106
+ const int32_t * opts = (const int32_t *)dst->op_params;
12875
13107
  enum ggml_op_pool op = opts[0];
12876
13108
  const int k0 = opts[1];
12877
13109
  const int s0 = opts[2];
@@ -14204,24 +14436,6 @@ static void ggml_compute_forward_map_custom1_f32(
14204
14436
  fun(dst, a);
14205
14437
  }
14206
14438
 
14207
-
14208
- static void ggml_compute_forward_map_custom1(
14209
- const struct ggml_compute_params * params,
14210
- const struct ggml_tensor * a,
14211
- struct ggml_tensor * dst,
14212
- const ggml_custom1_op_f32_t fun) {
14213
- switch (a->type) {
14214
- case GGML_TYPE_F32:
14215
- {
14216
- ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
14217
- } break;
14218
- default:
14219
- {
14220
- GGML_ASSERT(false);
14221
- } break;
14222
- }
14223
- }
14224
-
14225
14439
  // ggml_compute_forward_map_custom2
14226
14440
 
14227
14441
  static void ggml_compute_forward_map_custom2_f32(
@@ -14240,24 +14454,6 @@ static void ggml_compute_forward_map_custom2_f32(
14240
14454
  }
14241
14455
 
14242
14456
 
14243
- static void ggml_compute_forward_map_custom2(
14244
- const struct ggml_compute_params * params,
14245
- const struct ggml_tensor * a,
14246
- const struct ggml_tensor * b,
14247
- struct ggml_tensor * dst,
14248
- const ggml_custom2_op_f32_t fun) {
14249
- switch (a->type) {
14250
- case GGML_TYPE_F32:
14251
- {
14252
- ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
14253
- } break;
14254
- default:
14255
- {
14256
- GGML_ASSERT(false);
14257
- } break;
14258
- }
14259
- }
14260
-
14261
14457
  // ggml_compute_forward_map_custom3
14262
14458
 
14263
14459
  static void ggml_compute_forward_map_custom3_f32(
@@ -14276,24 +14472,52 @@ static void ggml_compute_forward_map_custom3_f32(
14276
14472
  fun(dst, a, b, c);
14277
14473
  }
14278
14474
 
14475
+ // ggml_compute_forward_map_custom1
14476
+
14477
+ static void ggml_compute_forward_map_custom1(
14478
+ const struct ggml_compute_params * params,
14479
+ const struct ggml_tensor * a,
14480
+ struct ggml_tensor * dst) {
14481
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14482
+ return;
14483
+ }
14484
+
14485
+ struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
14486
+
14487
+ p->fun(dst, a, params->ith, params->nth, p->userdata);
14488
+ }
14489
+
14490
+ // ggml_compute_forward_map_custom2
14491
+
14492
+ static void ggml_compute_forward_map_custom2(
14493
+ const struct ggml_compute_params * params,
14494
+ const struct ggml_tensor * a,
14495
+ const struct ggml_tensor * b,
14496
+ struct ggml_tensor * dst) {
14497
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14498
+ return;
14499
+ }
14500
+
14501
+ struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
14502
+
14503
+ p->fun(dst, a, b, params->ith, params->nth, p->userdata);
14504
+ }
14505
+
14506
+ // ggml_compute_forward_map_custom3
14279
14507
 
14280
14508
  static void ggml_compute_forward_map_custom3(
14281
14509
  const struct ggml_compute_params * params,
14282
14510
  const struct ggml_tensor * a,
14283
14511
  const struct ggml_tensor * b,
14284
14512
  const struct ggml_tensor * c,
14285
- struct ggml_tensor * dst,
14286
- const ggml_custom3_op_f32_t fun) {
14287
- switch (a->type) {
14288
- case GGML_TYPE_F32:
14289
- {
14290
- ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
14291
- } break;
14292
- default:
14293
- {
14294
- GGML_ASSERT(false);
14295
- } break;
14513
+ struct ggml_tensor * dst) {
14514
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14515
+ return;
14296
14516
  }
14517
+
14518
+ struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
14519
+
14520
+ p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
14297
14521
  }
14298
14522
 
14299
14523
  // ggml_compute_forward_cross_entropy_loss
@@ -14815,25 +15039,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
14815
15039
  ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
14816
15040
  }
14817
15041
  break;
14818
- case GGML_OP_MAP_CUSTOM1:
15042
+ case GGML_OP_MAP_CUSTOM1_F32:
14819
15043
  {
14820
15044
  ggml_custom1_op_f32_t fun;
14821
15045
  memcpy(&fun, tensor->op_params, sizeof(fun));
14822
- ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
15046
+ ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
14823
15047
  }
14824
15048
  break;
14825
- case GGML_OP_MAP_CUSTOM2:
15049
+ case GGML_OP_MAP_CUSTOM2_F32:
14826
15050
  {
14827
15051
  ggml_custom2_op_f32_t fun;
14828
15052
  memcpy(&fun, tensor->op_params, sizeof(fun));
14829
- ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
15053
+ ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
14830
15054
  }
14831
15055
  break;
14832
- case GGML_OP_MAP_CUSTOM3:
15056
+ case GGML_OP_MAP_CUSTOM3_F32:
14833
15057
  {
14834
15058
  ggml_custom3_op_f32_t fun;
14835
15059
  memcpy(&fun, tensor->op_params, sizeof(fun));
14836
- ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
15060
+ ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
15061
+ }
15062
+ break;
15063
+ case GGML_OP_MAP_CUSTOM1:
15064
+ {
15065
+ ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
15066
+ }
15067
+ break;
15068
+ case GGML_OP_MAP_CUSTOM2:
15069
+ {
15070
+ ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
15071
+ }
15072
+ break;
15073
+ case GGML_OP_MAP_CUSTOM3:
15074
+ {
15075
+ ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
14837
15076
  }
14838
15077
  break;
14839
15078
  case GGML_OP_CROSS_ENTROPY_LOSS:
@@ -15641,6 +15880,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15641
15880
  } break;
15642
15881
  case GGML_OP_MAP_UNARY:
15643
15882
  case GGML_OP_MAP_BINARY:
15883
+ case GGML_OP_MAP_CUSTOM1_F32:
15884
+ case GGML_OP_MAP_CUSTOM2_F32:
15885
+ case GGML_OP_MAP_CUSTOM3_F32:
15644
15886
  case GGML_OP_MAP_CUSTOM1:
15645
15887
  case GGML_OP_MAP_CUSTOM2:
15646
15888
  case GGML_OP_MAP_CUSTOM3:
@@ -16426,11 +16668,38 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
16426
16668
  case GGML_OP_WIN_UNPART:
16427
16669
  case GGML_OP_MAP_UNARY:
16428
16670
  case GGML_OP_MAP_BINARY:
16671
+ case GGML_OP_MAP_CUSTOM1_F32:
16672
+ case GGML_OP_MAP_CUSTOM2_F32:
16673
+ case GGML_OP_MAP_CUSTOM3_F32:
16674
+ {
16675
+ n_tasks = 1;
16676
+ } break;
16429
16677
  case GGML_OP_MAP_CUSTOM1:
16678
+ {
16679
+ struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
16680
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
16681
+ n_tasks = n_threads;
16682
+ } else {
16683
+ n_tasks = MIN(p->n_tasks, n_threads);
16684
+ }
16685
+ } break;
16430
16686
  case GGML_OP_MAP_CUSTOM2:
16687
+ {
16688
+ struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
16689
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
16690
+ n_tasks = n_threads;
16691
+ } else {
16692
+ n_tasks = MIN(p->n_tasks, n_threads);
16693
+ }
16694
+ } break;
16431
16695
  case GGML_OP_MAP_CUSTOM3:
16432
16696
  {
16433
- n_tasks = 1;
16697
+ struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
16698
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
16699
+ n_tasks = n_threads;
16700
+ } else {
16701
+ n_tasks = MIN(p->n_tasks, n_threads);
16702
+ }
16434
16703
  } break;
16435
16704
  case GGML_OP_CROSS_ENTROPY_LOSS:
16436
16705
  {