llama_cpp 0.12.7 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -355,6 +355,10 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
355
355
  }
356
356
  }
357
357
 
358
+ bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
359
+ return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
360
+ }
361
+
358
362
  //
359
363
  // timing
360
364
  //
@@ -678,6 +682,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
678
682
  .vec_dot_type = GGML_TYPE_Q8_K,
679
683
  .nrows = 1,
680
684
  },
685
+ [GGML_TYPE_IQ3_S] = {
686
+ .type_name = "iq3_s",
687
+ .blck_size = QK_K,
688
+ .type_size = sizeof(block_iq3_s),
689
+ .is_quantized = true,
690
+ .to_float = (ggml_to_float_t) dequantize_row_iq3_s,
691
+ .from_float = quantize_row_iq3_s,
692
+ .from_float_reference = (ggml_from_float_t)quantize_row_iq3_s_reference,
693
+ .vec_dot = ggml_vec_dot_iq3_s_q8_K,
694
+ .vec_dot_type = GGML_TYPE_Q8_K,
695
+ .nrows = 1,
696
+ },
697
+ [GGML_TYPE_IQ2_S] = {
698
+ .type_name = "iq2_s",
699
+ .blck_size = QK_K,
700
+ .type_size = sizeof(block_iq2_s),
701
+ .is_quantized = true,
702
+ .to_float = (ggml_to_float_t) dequantize_row_iq2_s,
703
+ .from_float = quantize_row_iq2_s,
704
+ .from_float_reference = (ggml_from_float_t)quantize_row_iq2_s_reference,
705
+ .vec_dot = ggml_vec_dot_iq2_s_q8_K,
706
+ .vec_dot_type = GGML_TYPE_Q8_K,
707
+ .nrows = 1,
708
+ },
681
709
  [GGML_TYPE_IQ1_S] = {
682
710
  .type_name = "iq1_s",
683
711
  .blck_size = QK_K,
@@ -702,6 +730,26 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
702
730
  .vec_dot_type = GGML_TYPE_Q8_0,
703
731
  .nrows = 1,
704
732
  },
733
+ [GGML_TYPE_IQ4_XS] = {
734
+ .type_name = "iq4_xs",
735
+ #if QK_K == 64
736
+ .blck_size = QK4_NL,
737
+ #else
738
+ .blck_size = QK_K,
739
+ #endif
740
+ .type_size = sizeof(block_iq4_xs),
741
+ .is_quantized = true,
742
+ .to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
743
+ .from_float = quantize_row_iq4_xs,
744
+ .from_float_reference = (ggml_from_float_t)quantize_row_iq4_xs_reference,
745
+ .vec_dot = ggml_vec_dot_iq4_xs_q8_K,
746
+ #if QK_K == 64
747
+ .vec_dot_type = GGML_TYPE_Q8_0,
748
+ #else
749
+ .vec_dot_type = GGML_TYPE_Q8_K,
750
+ #endif
751
+ .nrows = 1,
752
+ },
705
753
  [GGML_TYPE_Q8_K] = {
706
754
  .type_name = "q8_K",
707
755
  .blck_size = QK_K,
@@ -1560,9 +1608,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
1560
1608
  inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
1561
1609
  uint16_t t;
1562
1610
  for (int i = 0; i < n; ++i) {
1563
- ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
1564
- memcpy(&t, &fp16, sizeof(uint16_t));
1565
- y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
1611
+ if (x[i] <= -10.0f) {
1612
+ y[i] = 0.0f;
1613
+ } else if (x[i] >= 10.0f) {
1614
+ y[i] = x[i];
1615
+ } else {
1616
+ ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
1617
+ memcpy(&t, &fp16, sizeof(uint16_t));
1618
+ y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
1619
+ }
1566
1620
  }
1567
1621
  }
1568
1622
  #else
@@ -2304,6 +2358,9 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
2304
2358
  case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
2305
2359
  case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
2306
2360
  case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
2361
+ case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
2362
+ case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
2363
+ case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
2307
2364
  case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
2308
2365
  case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
2309
2366
  }
@@ -2708,7 +2765,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
2708
2765
  }
2709
2766
  }
2710
2767
 
2711
- struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
2768
+ struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
2712
2769
 
2713
2770
  // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
2714
2771
 
@@ -2716,7 +2773,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
2716
2773
 
2717
2774
  *result = (struct ggml_tensor) {
2718
2775
  /*.type =*/ type,
2719
- /*.backend =*/ GGML_BACKEND_CPU,
2776
+ /*.backend =*/ GGML_BACKEND_TYPE_CPU,
2720
2777
  /*.buffer =*/ NULL,
2721
2778
  /*.ne =*/ { 1, 1, 1, 1 },
2722
2779
  /*.nb =*/ { 0, 0, 0, 0 },
@@ -3289,7 +3346,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
3289
3346
  char * const mem_buffer = ctx->mem_buffer;
3290
3347
 
3291
3348
  while (obj != NULL) {
3292
- if (obj->type == GGML_OBJECT_TENSOR) {
3349
+ if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
3293
3350
  return (struct ggml_tensor *)(mem_buffer + obj->offs);
3294
3351
  }
3295
3352
 
@@ -3306,7 +3363,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
3306
3363
  char * const mem_buffer = ctx->mem_buffer;
3307
3364
 
3308
3365
  while (obj != NULL) {
3309
- if (obj->type == GGML_OBJECT_TENSOR) {
3366
+ if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
3310
3367
  return (struct ggml_tensor *)(mem_buffer + obj->offs);
3311
3368
  }
3312
3369
 
@@ -3322,7 +3379,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
3322
3379
  char * const mem_buffer = ctx->mem_buffer;
3323
3380
 
3324
3381
  while (obj != NULL) {
3325
- if (obj->type == GGML_OBJECT_TENSOR) {
3382
+ if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
3326
3383
  struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
3327
3384
  if (strcmp(cur->name, name) == 0) {
3328
3385
  return cur;
@@ -5729,11 +5786,13 @@ struct ggml_tensor * ggml_pool_1d(
5729
5786
  is_node = true;
5730
5787
  }
5731
5788
 
5732
- const int64_t ne[2] = {
5789
+ const int64_t ne[4] = {
5733
5790
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
5734
5791
  a->ne[1],
5792
+ a->ne[2],
5793
+ a->ne[3],
5735
5794
  };
5736
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
5795
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5737
5796
 
5738
5797
  int32_t params[] = { op, k0, s0, p0 };
5739
5798
  ggml_set_op_params(result, params, sizeof(params));
@@ -5866,7 +5925,7 @@ struct ggml_tensor * ggml_top_k(
5866
5925
  int k) {
5867
5926
  GGML_ASSERT(a->ne[0] >= k);
5868
5927
 
5869
- struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_DESC);
5928
+ struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5870
5929
 
5871
5930
  result = ggml_view_4d(ctx, result,
5872
5931
  k, result->ne[1], result->ne[2], result->ne[3],
@@ -6660,7 +6719,7 @@ static void ggml_compute_forward_dup_same_cont(
6660
6719
  GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
6661
6720
  GGML_ASSERT(src0->type == dst->type);
6662
6721
 
6663
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
6722
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
6664
6723
  return;
6665
6724
  }
6666
6725
 
@@ -6692,7 +6751,7 @@ static void ggml_compute_forward_dup_f16(
6692
6751
 
6693
6752
  GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
6694
6753
 
6695
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
6754
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
6696
6755
  return;
6697
6756
  }
6698
6757
 
@@ -6965,7 +7024,7 @@ static void ggml_compute_forward_dup_f32(
6965
7024
 
6966
7025
  GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
6967
7026
 
6968
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7027
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
6969
7028
  return;
6970
7029
  }
6971
7030
 
@@ -7218,7 +7277,7 @@ static void ggml_compute_forward_dup_bytes(
7218
7277
  GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
7219
7278
  GGML_ASSERT(src0->type == dst->type);
7220
7279
 
7221
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7280
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7222
7281
  return;
7223
7282
  }
7224
7283
 
@@ -7398,7 +7457,7 @@ static void ggml_compute_forward_add_f32(
7398
7457
 
7399
7458
  GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
7400
7459
 
7401
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7460
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7402
7461
  return;
7403
7462
  }
7404
7463
 
@@ -7406,7 +7465,7 @@ static void ggml_compute_forward_add_f32(
7406
7465
  const int nth = params->nth;
7407
7466
 
7408
7467
  #ifdef GGML_USE_CLBLAST
7409
- if (src1->backend == GGML_BACKEND_GPU) {
7468
+ if (src1->backend == GGML_BACKEND_TYPE_GPU) {
7410
7469
  // TODO: OpenCL kernel support full broadcast
7411
7470
  GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
7412
7471
  if (ith == 0) {
@@ -7488,7 +7547,7 @@ static void ggml_compute_forward_add_f16_f32(
7488
7547
 
7489
7548
  GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
7490
7549
 
7491
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7550
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7492
7551
  return;
7493
7552
  }
7494
7553
 
@@ -7567,7 +7626,7 @@ static void ggml_compute_forward_add_f16_f16(
7567
7626
 
7568
7627
  GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
7569
7628
 
7570
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7629
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7571
7630
  return;
7572
7631
  }
7573
7632
 
@@ -7623,7 +7682,7 @@ static void ggml_compute_forward_add_q_f32(
7623
7682
 
7624
7683
  GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
7625
7684
 
7626
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7685
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7627
7686
  return;
7628
7687
  }
7629
7688
 
@@ -7738,6 +7797,9 @@ static void ggml_compute_forward_add(
7738
7797
  case GGML_TYPE_IQ3_XXS:
7739
7798
  case GGML_TYPE_IQ1_S:
7740
7799
  case GGML_TYPE_IQ4_NL:
7800
+ case GGML_TYPE_IQ4_XS:
7801
+ case GGML_TYPE_IQ3_S:
7802
+ case GGML_TYPE_IQ2_S:
7741
7803
  {
7742
7804
  ggml_compute_forward_add_q_f32(params, dst);
7743
7805
  } break;
@@ -7760,7 +7822,7 @@ static void ggml_compute_forward_add1_f32(
7760
7822
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
7761
7823
  GGML_ASSERT(ggml_is_scalar(src1));
7762
7824
 
7763
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7825
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7764
7826
  return;
7765
7827
  }
7766
7828
 
@@ -7814,7 +7876,7 @@ static void ggml_compute_forward_add1_f16_f32(
7814
7876
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
7815
7877
  GGML_ASSERT(ggml_is_scalar(src1));
7816
7878
 
7817
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7879
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7818
7880
  return;
7819
7881
  }
7820
7882
 
@@ -7866,7 +7928,7 @@ static void ggml_compute_forward_add1_f16_f16(
7866
7928
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
7867
7929
  GGML_ASSERT(ggml_is_scalar(src1));
7868
7930
 
7869
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7931
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7870
7932
  return;
7871
7933
  }
7872
7934
 
@@ -7918,7 +7980,7 @@ static void ggml_compute_forward_add1_q_f32(
7918
7980
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
7919
7981
  GGML_ASSERT(ggml_is_scalar(src1));
7920
7982
 
7921
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
7983
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
7922
7984
  return;
7923
7985
  }
7924
7986
 
@@ -8017,6 +8079,9 @@ static void ggml_compute_forward_add1(
8017
8079
  case GGML_TYPE_IQ3_XXS:
8018
8080
  case GGML_TYPE_IQ1_S:
8019
8081
  case GGML_TYPE_IQ4_NL:
8082
+ case GGML_TYPE_IQ4_XS:
8083
+ case GGML_TYPE_IQ3_S:
8084
+ case GGML_TYPE_IQ2_S:
8020
8085
  {
8021
8086
  ggml_compute_forward_add1_q_f32(params, dst);
8022
8087
  } break;
@@ -8047,7 +8112,7 @@ static void ggml_compute_forward_acc_f32(
8047
8112
  size_t offset = ((int32_t *) dst->op_params)[3];
8048
8113
  bool inplace = (bool) ((int32_t *) dst->op_params)[4];
8049
8114
 
8050
- if (!inplace && (params->type == GGML_TASK_INIT)) {
8115
+ if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
8051
8116
  if (params->ith != 0) {
8052
8117
  return;
8053
8118
  }
@@ -8059,7 +8124,7 @@ static void ggml_compute_forward_acc_f32(
8059
8124
  ggml_nbytes(dst));
8060
8125
  }
8061
8126
 
8062
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8127
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8063
8128
  return;
8064
8129
  }
8065
8130
 
@@ -8141,6 +8206,9 @@ static void ggml_compute_forward_acc(
8141
8206
  case GGML_TYPE_IQ3_XXS:
8142
8207
  case GGML_TYPE_IQ1_S:
8143
8208
  case GGML_TYPE_IQ4_NL:
8209
+ case GGML_TYPE_IQ4_XS:
8210
+ case GGML_TYPE_IQ3_S:
8211
+ case GGML_TYPE_IQ2_S:
8144
8212
  default:
8145
8213
  {
8146
8214
  GGML_ASSERT(false);
@@ -8160,7 +8228,7 @@ static void ggml_compute_forward_sub_f32(
8160
8228
  assert(params->ith == 0);
8161
8229
  assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
8162
8230
 
8163
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8231
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8164
8232
  return;
8165
8233
  }
8166
8234
 
@@ -8241,14 +8309,14 @@ static void ggml_compute_forward_mul_f32(
8241
8309
 
8242
8310
  GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
8243
8311
 
8244
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8312
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8245
8313
  return;
8246
8314
  }
8247
8315
  const int ith = params->ith;
8248
8316
  const int nth = params->nth;
8249
8317
 
8250
8318
  #if defined(GGML_USE_CLBLAST)
8251
- if (src1->backend == GGML_BACKEND_GPU) {
8319
+ if (src1->backend == GGML_BACKEND_TYPE_GPU) {
8252
8320
  // TODO: OpenCL kernel support full broadcast
8253
8321
  GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
8254
8322
  if (ith == 0) {
@@ -8349,7 +8417,7 @@ static void ggml_compute_forward_div_f32(
8349
8417
 
8350
8418
  GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
8351
8419
 
8352
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8420
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8353
8421
  return;
8354
8422
  }
8355
8423
 
@@ -8444,7 +8512,7 @@ static void ggml_compute_forward_sqr_f32(
8444
8512
  assert(params->ith == 0);
8445
8513
  assert(ggml_are_same_shape(src0, dst));
8446
8514
 
8447
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8515
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8448
8516
  return;
8449
8517
  }
8450
8518
 
@@ -8490,7 +8558,7 @@ static void ggml_compute_forward_sqrt_f32(
8490
8558
  assert(params->ith == 0);
8491
8559
  assert(ggml_are_same_shape(src0, dst));
8492
8560
 
8493
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8561
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8494
8562
  return;
8495
8563
  }
8496
8564
 
@@ -8536,7 +8604,7 @@ static void ggml_compute_forward_log_f32(
8536
8604
  GGML_ASSERT(params->ith == 0);
8537
8605
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
8538
8606
 
8539
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8607
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8540
8608
  return;
8541
8609
  }
8542
8610
 
@@ -8582,7 +8650,7 @@ static void ggml_compute_forward_sum_f32(
8582
8650
  assert(params->ith == 0);
8583
8651
  assert(ggml_is_scalar(dst));
8584
8652
 
8585
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8653
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8586
8654
  return;
8587
8655
  }
8588
8656
 
@@ -8617,7 +8685,7 @@ static void ggml_compute_forward_sum_f16(
8617
8685
  assert(params->ith == 0);
8618
8686
  assert(ggml_is_scalar(dst));
8619
8687
 
8620
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8688
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8621
8689
  return;
8622
8690
  }
8623
8691
 
@@ -8674,7 +8742,7 @@ static void ggml_compute_forward_sum_rows_f32(
8674
8742
 
8675
8743
  GGML_ASSERT(params->ith == 0);
8676
8744
 
8677
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8745
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8678
8746
  return;
8679
8747
  }
8680
8748
 
@@ -8729,7 +8797,7 @@ static void ggml_compute_forward_mean_f32(
8729
8797
 
8730
8798
  assert(params->ith == 0);
8731
8799
 
8732
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8800
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8733
8801
  return;
8734
8802
  }
8735
8803
 
@@ -8788,7 +8856,7 @@ static void ggml_compute_forward_argmax_f32(
8788
8856
 
8789
8857
  assert(params->ith == 0);
8790
8858
 
8791
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8859
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8792
8860
  return;
8793
8861
  }
8794
8862
 
@@ -8839,7 +8907,7 @@ static void ggml_compute_forward_repeat_f32(
8839
8907
  GGML_ASSERT(params->ith == 0);
8840
8908
  GGML_ASSERT(ggml_can_repeat(src0, dst));
8841
8909
 
8842
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8910
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8843
8911
  return;
8844
8912
  }
8845
8913
 
@@ -8884,7 +8952,7 @@ static void ggml_compute_forward_repeat_f16(
8884
8952
  GGML_ASSERT(params->ith == 0);
8885
8953
  GGML_ASSERT(ggml_can_repeat(src0, dst));
8886
8954
 
8887
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
8955
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8888
8956
  return;
8889
8957
  }
8890
8958
 
@@ -8958,7 +9026,7 @@ static void ggml_compute_forward_repeat_back_f32(
8958
9026
  GGML_ASSERT(params->ith == 0);
8959
9027
  GGML_ASSERT(ggml_can_repeat(dst, src0));
8960
9028
 
8961
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9029
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
8962
9030
  return;
8963
9031
  }
8964
9032
 
@@ -9035,7 +9103,7 @@ static void ggml_compute_forward_concat_f32(
9035
9103
  const struct ggml_tensor * src0 = dst->src[0];
9036
9104
  const struct ggml_tensor * src1 = dst->src[1];
9037
9105
 
9038
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9106
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9039
9107
  return;
9040
9108
  }
9041
9109
 
@@ -9107,7 +9175,7 @@ static void ggml_compute_forward_abs_f32(
9107
9175
  assert(params->ith == 0);
9108
9176
  assert(ggml_are_same_shape(src0, dst));
9109
9177
 
9110
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9178
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9111
9179
  return;
9112
9180
  }
9113
9181
 
@@ -9153,7 +9221,7 @@ static void ggml_compute_forward_sgn_f32(
9153
9221
  assert(params->ith == 0);
9154
9222
  assert(ggml_are_same_shape(src0, dst));
9155
9223
 
9156
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9224
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9157
9225
  return;
9158
9226
  }
9159
9227
 
@@ -9199,7 +9267,7 @@ static void ggml_compute_forward_neg_f32(
9199
9267
  assert(params->ith == 0);
9200
9268
  assert(ggml_are_same_shape(src0, dst));
9201
9269
 
9202
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9270
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9203
9271
  return;
9204
9272
  }
9205
9273
 
@@ -9245,7 +9313,7 @@ static void ggml_compute_forward_step_f32(
9245
9313
  assert(params->ith == 0);
9246
9314
  assert(ggml_are_same_shape(src0, dst));
9247
9315
 
9248
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9316
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9249
9317
  return;
9250
9318
  }
9251
9319
 
@@ -9291,7 +9359,7 @@ static void ggml_compute_forward_tanh_f32(
9291
9359
  assert(params->ith == 0);
9292
9360
  assert(ggml_are_same_shape(src0, dst));
9293
9361
 
9294
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9362
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9295
9363
  return;
9296
9364
  }
9297
9365
 
@@ -9337,7 +9405,7 @@ static void ggml_compute_forward_elu_f32(
9337
9405
  assert(params->ith == 0);
9338
9406
  assert(ggml_are_same_shape(src0, dst));
9339
9407
 
9340
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9408
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9341
9409
  return;
9342
9410
  }
9343
9411
 
@@ -9383,7 +9451,7 @@ static void ggml_compute_forward_relu_f32(
9383
9451
  assert(params->ith == 0);
9384
9452
  assert(ggml_are_same_shape(src0, dst));
9385
9453
 
9386
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9454
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9387
9455
  return;
9388
9456
  }
9389
9457
 
@@ -9430,7 +9498,7 @@ static void ggml_compute_forward_gelu_f32(
9430
9498
  GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
9431
9499
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
9432
9500
 
9433
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9501
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9434
9502
  return;
9435
9503
  }
9436
9504
 
@@ -9493,7 +9561,7 @@ static void ggml_compute_forward_gelu_quick_f32(
9493
9561
  GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
9494
9562
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
9495
9563
 
9496
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9564
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9497
9565
  return;
9498
9566
  }
9499
9567
 
@@ -9556,7 +9624,7 @@ static void ggml_compute_forward_silu_f32(
9556
9624
  GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
9557
9625
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
9558
9626
 
9559
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9627
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9560
9628
  return;
9561
9629
  }
9562
9630
 
@@ -9617,7 +9685,7 @@ static void ggml_compute_forward_leaky_relu_f32(
9617
9685
  assert(params->ith == 0);
9618
9686
  assert(ggml_are_same_shape(src0, dst));
9619
9687
 
9620
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9688
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9621
9689
  return;
9622
9690
  }
9623
9691
 
@@ -9670,7 +9738,7 @@ static void ggml_compute_forward_silu_back_f32(
9670
9738
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
9671
9739
  GGML_ASSERT(ggml_are_same_shape(src0, grad));
9672
9740
 
9673
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9741
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9674
9742
  return;
9675
9743
  }
9676
9744
 
@@ -9732,7 +9800,7 @@ static void ggml_compute_forward_hardswish_f32(
9732
9800
  assert(params->ith == 0);
9733
9801
  assert(ggml_are_same_shape(src0, dst));
9734
9802
 
9735
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9803
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9736
9804
  return;
9737
9805
  }
9738
9806
 
@@ -9775,7 +9843,7 @@ static void ggml_compute_forward_hardsigmoid_f32(
9775
9843
  assert(params->ith == 0);
9776
9844
  assert(ggml_are_same_shape(src0, dst));
9777
9845
 
9778
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9846
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9779
9847
  return;
9780
9848
  }
9781
9849
 
@@ -9821,7 +9889,7 @@ static void ggml_compute_forward_norm_f32(
9821
9889
 
9822
9890
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
9823
9891
 
9824
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9892
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9825
9893
  return;
9826
9894
  }
9827
9895
 
@@ -9896,7 +9964,7 @@ static void ggml_compute_forward_rms_norm_f32(
9896
9964
 
9897
9965
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
9898
9966
 
9899
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9967
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9900
9968
  return;
9901
9969
  }
9902
9970
 
@@ -9967,7 +10035,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
9967
10035
 
9968
10036
  GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
9969
10037
 
9970
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
10038
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
9971
10039
  return;
9972
10040
  }
9973
10041
 
@@ -10145,7 +10213,7 @@ static void ggml_compute_forward_group_norm_f32(
10145
10213
 
10146
10214
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
10147
10215
 
10148
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
10216
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
10149
10217
  return;
10150
10218
  }
10151
10219
 
@@ -10312,7 +10380,7 @@ static void ggml_compute_forward_mul_mat(
10312
10380
 
10313
10381
  #if defined(GGML_USE_CLBLAST)
10314
10382
  if (ggml_cl_can_mul_mat(src0, src1, dst)) {
10315
- if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
10383
+ if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
10316
10384
  ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
10317
10385
  }
10318
10386
  return;
@@ -10325,7 +10393,7 @@ static void ggml_compute_forward_mul_mat(
10325
10393
  const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
10326
10394
  UNUSED(desired_wsize);
10327
10395
 
10328
- if (params->type == GGML_TASK_INIT) {
10396
+ if (params->type == GGML_TASK_TYPE_INIT) {
10329
10397
  if (type != GGML_TYPE_F32) {
10330
10398
  assert(params->wsize >= desired_wsize);
10331
10399
  // parallelize by src0 rows
@@ -10348,7 +10416,7 @@ static void ggml_compute_forward_mul_mat(
10348
10416
  return;
10349
10417
  }
10350
10418
 
10351
- if (params->type == GGML_TASK_FINALIZE) {
10419
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
10352
10420
  return;
10353
10421
  }
10354
10422
 
@@ -10386,7 +10454,7 @@ static void ggml_compute_forward_mul_mat(
10386
10454
  }
10387
10455
  #endif
10388
10456
 
10389
- if (params->type == GGML_TASK_INIT) {
10457
+ if (params->type == GGML_TASK_TYPE_INIT) {
10390
10458
  if (ith != 0) {
10391
10459
  return;
10392
10460
  }
@@ -10410,7 +10478,7 @@ static void ggml_compute_forward_mul_mat(
10410
10478
  return;
10411
10479
  }
10412
10480
 
10413
- if (params->type == GGML_TASK_FINALIZE) {
10481
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
10414
10482
  return;
10415
10483
  }
10416
10484
 
@@ -10567,7 +10635,7 @@ static void ggml_compute_forward_mul_mat_id(
10567
10635
 
10568
10636
  #define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
10569
10637
 
10570
- if (params->type == GGML_TASK_INIT) {
10638
+ if (params->type == GGML_TASK_TYPE_INIT) {
10571
10639
  if (ith != 0) {
10572
10640
  return;
10573
10641
  }
@@ -10604,7 +10672,7 @@ static void ggml_compute_forward_mul_mat_id(
10604
10672
  return;
10605
10673
  }
10606
10674
 
10607
- if (params->type == GGML_TASK_FINALIZE) {
10675
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
10608
10676
  return;
10609
10677
  }
10610
10678
 
@@ -10752,7 +10820,7 @@ static void ggml_compute_forward_out_prod_f32(
10752
10820
  (ggml_is_contiguous(src1) || ggml_is_transposed(src1));
10753
10821
  #endif
10754
10822
 
10755
- if (params->type == GGML_TASK_INIT) {
10823
+ if (params->type == GGML_TASK_TYPE_INIT) {
10756
10824
  #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
10757
10825
  if (use_blas) {
10758
10826
  return;
@@ -10765,7 +10833,7 @@ static void ggml_compute_forward_out_prod_f32(
10765
10833
  return;
10766
10834
  }
10767
10835
 
10768
- if (params->type == GGML_TASK_FINALIZE) {
10836
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
10769
10837
  return;
10770
10838
  }
10771
10839
 
@@ -10945,7 +11013,7 @@ static void ggml_compute_forward_out_prod_q_f32(
10945
11013
  // TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
10946
11014
  // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
10947
11015
 
10948
- if (params->type == GGML_TASK_INIT) {
11016
+ if (params->type == GGML_TASK_TYPE_INIT) {
10949
11017
  if (ith != 0) {
10950
11018
  return;
10951
11019
  }
@@ -10953,7 +11021,7 @@ static void ggml_compute_forward_out_prod_q_f32(
10953
11021
  return;
10954
11022
  }
10955
11023
 
10956
- if (params->type == GGML_TASK_FINALIZE) {
11024
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
10957
11025
  return;
10958
11026
  }
10959
11027
 
@@ -11039,6 +11107,9 @@ static void ggml_compute_forward_out_prod(
11039
11107
  case GGML_TYPE_IQ3_XXS:
11040
11108
  case GGML_TYPE_IQ1_S:
11041
11109
  case GGML_TYPE_IQ4_NL:
11110
+ case GGML_TYPE_IQ4_XS:
11111
+ case GGML_TYPE_IQ3_S:
11112
+ case GGML_TYPE_IQ2_S:
11042
11113
  {
11043
11114
  ggml_compute_forward_out_prod_q_f32(params, dst);
11044
11115
  } break;
@@ -11070,7 +11141,7 @@ static void ggml_compute_forward_scale_f32(
11070
11141
  GGML_ASSERT(ggml_is_contiguous(dst));
11071
11142
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
11072
11143
 
11073
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11144
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11074
11145
  return;
11075
11146
  }
11076
11147
 
@@ -11142,7 +11213,7 @@ static void ggml_compute_forward_set_f32(
11142
11213
  size_t offset = ((int32_t *) dst->op_params)[3];
11143
11214
  bool inplace = (bool) ((int32_t *) dst->op_params)[4];
11144
11215
 
11145
- if (!inplace && (params->type == GGML_TASK_INIT)) {
11216
+ if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
11146
11217
  if (params->ith != 0) {
11147
11218
  return;
11148
11219
  }
@@ -11154,7 +11225,7 @@ static void ggml_compute_forward_set_f32(
11154
11225
  ggml_nbytes(dst));
11155
11226
  }
11156
11227
 
11157
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11228
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11158
11229
  return;
11159
11230
  }
11160
11231
 
@@ -11227,6 +11298,9 @@ static void ggml_compute_forward_set(
11227
11298
  case GGML_TYPE_IQ3_XXS:
11228
11299
  case GGML_TYPE_IQ1_S:
11229
11300
  case GGML_TYPE_IQ4_NL:
11301
+ case GGML_TYPE_IQ4_XS:
11302
+ case GGML_TYPE_IQ3_S:
11303
+ case GGML_TYPE_IQ2_S:
11230
11304
  default:
11231
11305
  {
11232
11306
  GGML_ASSERT(false);
@@ -11301,7 +11375,7 @@ static void ggml_compute_forward_get_rows_q(
11301
11375
 
11302
11376
  assert(params->ith == 0);
11303
11377
 
11304
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11378
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11305
11379
  return;
11306
11380
  }
11307
11381
 
@@ -11341,7 +11415,7 @@ static void ggml_compute_forward_get_rows_f16(
11341
11415
 
11342
11416
  assert(params->ith == 0);
11343
11417
 
11344
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11418
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11345
11419
  return;
11346
11420
  }
11347
11421
 
@@ -11378,7 +11452,7 @@ static void ggml_compute_forward_get_rows_f32(
11378
11452
 
11379
11453
  assert(params->ith == 0);
11380
11454
 
11381
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11455
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11382
11456
  return;
11383
11457
  }
11384
11458
 
@@ -11429,6 +11503,9 @@ static void ggml_compute_forward_get_rows(
11429
11503
  case GGML_TYPE_IQ3_XXS:
11430
11504
  case GGML_TYPE_IQ1_S:
11431
11505
  case GGML_TYPE_IQ4_NL:
11506
+ case GGML_TYPE_IQ4_XS:
11507
+ case GGML_TYPE_IQ3_S:
11508
+ case GGML_TYPE_IQ2_S:
11432
11509
  {
11433
11510
  ggml_compute_forward_get_rows_q(params, dst);
11434
11511
  } break;
@@ -11480,14 +11557,14 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
11480
11557
 
11481
11558
  // ggml_compute_forward_dup_same_cont(params, opt0, dst);
11482
11559
 
11483
- if (params->type == GGML_TASK_INIT) {
11560
+ if (params->type == GGML_TASK_TYPE_INIT) {
11484
11561
  if (params->ith != 0) {
11485
11562
  return;
11486
11563
  }
11487
11564
  memset(dst->data, 0, ggml_nbytes(dst));
11488
11565
  }
11489
11566
 
11490
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11567
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11491
11568
  return;
11492
11569
  }
11493
11570
 
@@ -11519,14 +11596,14 @@ static void ggml_compute_forward_get_rows_back_f32(
11519
11596
 
11520
11597
  // ggml_compute_forward_dup_same_cont(params, opt0, dst);
11521
11598
 
11522
- if (params->type == GGML_TASK_INIT) {
11599
+ if (params->type == GGML_TASK_TYPE_INIT) {
11523
11600
  if (params->ith != 0) {
11524
11601
  return;
11525
11602
  }
11526
11603
  memset(dst->data, 0, ggml_nbytes(dst));
11527
11604
  }
11528
11605
 
11529
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11606
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11530
11607
  return;
11531
11608
  }
11532
11609
 
@@ -11596,7 +11673,7 @@ static void ggml_compute_forward_diag_f32(
11596
11673
 
11597
11674
  GGML_ASSERT(params->ith == 0);
11598
11675
 
11599
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11676
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11600
11677
  return;
11601
11678
  }
11602
11679
 
@@ -11665,7 +11742,7 @@ static void ggml_compute_forward_diag_mask_f32(
11665
11742
 
11666
11743
  GGML_ASSERT(n_past >= 0);
11667
11744
 
11668
- if (!inplace && (params->type == GGML_TASK_INIT)) {
11745
+ if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
11669
11746
  if (ith != 0) {
11670
11747
  return;
11671
11748
  }
@@ -11679,7 +11756,7 @@ static void ggml_compute_forward_diag_mask_f32(
11679
11756
  ggml_nbytes(dst));
11680
11757
  }
11681
11758
 
11682
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11759
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11683
11760
  return;
11684
11761
  }
11685
11762
 
@@ -11753,7 +11830,7 @@ static void ggml_compute_forward_soft_max_f32(
11753
11830
  assert(ggml_is_contiguous(dst));
11754
11831
  assert(ggml_are_same_shape(src0, dst));
11755
11832
 
11756
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11833
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11757
11834
  return;
11758
11835
  }
11759
11836
 
@@ -11891,7 +11968,7 @@ static void ggml_compute_forward_soft_max_back_f32(
11891
11968
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
11892
11969
  GGML_ASSERT(ggml_are_same_shape(src1, dst));
11893
11970
 
11894
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11971
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11895
11972
  return;
11896
11973
  }
11897
11974
 
@@ -11985,7 +12062,7 @@ static void ggml_compute_forward_alibi_f32(
11985
12062
 
11986
12063
  assert(params->ith == 0);
11987
12064
 
11988
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12065
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11989
12066
  return;
11990
12067
  }
11991
12068
 
@@ -12044,7 +12121,7 @@ static void ggml_compute_forward_alibi_f16(
12044
12121
 
12045
12122
  assert(params->ith == 0);
12046
12123
 
12047
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12124
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
12048
12125
  return;
12049
12126
  }
12050
12127
 
@@ -12129,6 +12206,9 @@ static void ggml_compute_forward_alibi(
12129
12206
  case GGML_TYPE_IQ3_XXS:
12130
12207
  case GGML_TYPE_IQ1_S:
12131
12208
  case GGML_TYPE_IQ4_NL:
12209
+ case GGML_TYPE_IQ4_XS:
12210
+ case GGML_TYPE_IQ3_S:
12211
+ case GGML_TYPE_IQ2_S:
12132
12212
  case GGML_TYPE_Q8_K:
12133
12213
  case GGML_TYPE_I8:
12134
12214
  case GGML_TYPE_I16:
@@ -12150,7 +12230,7 @@ static void ggml_compute_forward_clamp_f32(
12150
12230
 
12151
12231
  assert(params->ith == 0);
12152
12232
 
12153
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12233
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
12154
12234
  return;
12155
12235
  }
12156
12236
 
@@ -12212,6 +12292,9 @@ static void ggml_compute_forward_clamp(
12212
12292
  case GGML_TYPE_IQ3_XXS:
12213
12293
  case GGML_TYPE_IQ1_S:
12214
12294
  case GGML_TYPE_IQ4_NL:
12295
+ case GGML_TYPE_IQ4_XS:
12296
+ case GGML_TYPE_IQ3_S:
12297
+ case GGML_TYPE_IQ2_S:
12215
12298
  case GGML_TYPE_Q8_K:
12216
12299
  case GGML_TYPE_I8:
12217
12300
  case GGML_TYPE_I16:
@@ -12289,7 +12372,7 @@ static void ggml_compute_forward_rope_f32(
12289
12372
  const struct ggml_tensor * src0 = dst->src[0];
12290
12373
  const struct ggml_tensor * src1 = dst->src[1];
12291
12374
 
12292
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12375
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
12293
12376
  return;
12294
12377
  }
12295
12378
 
@@ -12467,7 +12550,7 @@ static void ggml_compute_forward_rope_f16(
12467
12550
  const struct ggml_tensor * src0 = dst->src[0];
12468
12551
  const struct ggml_tensor * src1 = dst->src[1];
12469
12552
 
12470
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12553
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
12471
12554
  return;
12472
12555
  }
12473
12556
 
@@ -12698,7 +12781,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
12698
12781
  GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
12699
12782
  GGML_ASSERT(nb10 == sizeof(float));
12700
12783
 
12701
- if (params->type == GGML_TASK_INIT) {
12784
+ if (params->type == GGML_TASK_TYPE_INIT) {
12702
12785
  if (ith != 0) {
12703
12786
  return;
12704
12787
  }
@@ -12738,7 +12821,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
12738
12821
  return;
12739
12822
  }
12740
12823
 
12741
- if (params->type == GGML_TASK_FINALIZE) {
12824
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
12742
12825
  return;
12743
12826
  }
12744
12827
 
@@ -12797,7 +12880,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
12797
12880
  GGML_ASSERT(nb00 == sizeof(float));
12798
12881
  GGML_ASSERT(nb10 == sizeof(float));
12799
12882
 
12800
- if (params->type == GGML_TASK_INIT) {
12883
+ if (params->type == GGML_TASK_TYPE_INIT) {
12801
12884
  if (ith != 0) {
12802
12885
  return;
12803
12886
  }
@@ -12837,7 +12920,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
12837
12920
  return;
12838
12921
  }
12839
12922
 
12840
- if (params->type == GGML_TASK_FINALIZE) {
12923
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
12841
12924
  return;
12842
12925
  }
12843
12926
 
@@ -12941,11 +13024,11 @@ static void ggml_compute_forward_im2col_f32(
12941
13024
  GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
12942
13025
  GGML_ASSERT(nb10 == sizeof(float));
12943
13026
 
12944
- if (params->type == GGML_TASK_INIT) {
13027
+ if (params->type == GGML_TASK_TYPE_INIT) {
12945
13028
  return;
12946
13029
  }
12947
13030
 
12948
- if (params->type == GGML_TASK_FINALIZE) {
13031
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
12949
13032
  return;
12950
13033
  }
12951
13034
 
@@ -13029,11 +13112,11 @@ static void ggml_compute_forward_im2col_f16(
13029
13112
  GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
13030
13113
  GGML_ASSERT(nb10 == sizeof(float));
13031
13114
 
13032
- if (params->type == GGML_TASK_INIT) {
13115
+ if (params->type == GGML_TASK_TYPE_INIT) {
13033
13116
  return;
13034
13117
  }
13035
13118
 
13036
- if (params->type == GGML_TASK_FINALIZE) {
13119
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
13037
13120
  return;
13038
13121
  }
13039
13122
 
@@ -13115,7 +13198,7 @@ static void ggml_compute_forward_conv_transpose_2d(
13115
13198
  GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
13116
13199
  GGML_ASSERT(nb10 == sizeof(float));
13117
13200
 
13118
- if (params->type == GGML_TASK_INIT) {
13201
+ if (params->type == GGML_TASK_TYPE_INIT) {
13119
13202
  if (ith != 0) {
13120
13203
  return;
13121
13204
  }
@@ -13157,7 +13240,7 @@ static void ggml_compute_forward_conv_transpose_2d(
13157
13240
  return;
13158
13241
  }
13159
13242
 
13160
- if (params->type == GGML_TASK_FINALIZE) {
13243
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
13161
13244
  return;
13162
13245
  }
13163
13246
 
@@ -13209,7 +13292,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
13209
13292
  assert(src->type == GGML_TYPE_F32);
13210
13293
  assert(params->ith == 0);
13211
13294
 
13212
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
13295
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
13213
13296
  return;
13214
13297
  }
13215
13298
 
@@ -13278,7 +13361,7 @@ static void ggml_compute_forward_pool_2d(
13278
13361
  GGML_ASSERT(src->type == GGML_TYPE_F32);
13279
13362
  GGML_ASSERT(params->ith == 0);
13280
13363
 
13281
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
13364
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
13282
13365
  return;
13283
13366
  }
13284
13367
 
@@ -13351,7 +13434,7 @@ static void ggml_compute_forward_upscale_f32(
13351
13434
 
13352
13435
  const struct ggml_tensor * src0 = dst->src[0];
13353
13436
 
13354
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
13437
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
13355
13438
  return;
13356
13439
  }
13357
13440
 
@@ -13411,7 +13494,7 @@ static void ggml_compute_forward_pad_f32(
13411
13494
 
13412
13495
  const struct ggml_tensor * src0 = dst->src[0];
13413
13496
 
13414
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
13497
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
13415
13498
  return;
13416
13499
  }
13417
13500
 
@@ -13472,7 +13555,7 @@ static void ggml_compute_forward_argsort_f32(
13472
13555
 
13473
13556
  const struct ggml_tensor * src0 = dst->src[0];
13474
13557
 
13475
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
13558
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
13476
13559
  return;
13477
13560
  }
13478
13561
 
@@ -13498,8 +13581,8 @@ static void ggml_compute_forward_argsort_f32(
13498
13581
  // C doesn't have a functional sort, so we do a bubble sort instead
13499
13582
  for (int64_t j = 0; j < ne0; j++) {
13500
13583
  for (int64_t k = j + 1; k < ne0; k++) {
13501
- if ((order == GGML_SORT_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
13502
- (order == GGML_SORT_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
13584
+ if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
13585
+ (order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
13503
13586
  int32_t tmp = dst_data[j];
13504
13587
  dst_data[j] = dst_data[k];
13505
13588
  dst_data[k] = tmp;
@@ -13582,11 +13665,11 @@ static void ggml_compute_forward_flash_attn_f32(
13582
13665
  GGML_ASSERT(nb1 <= nb2);
13583
13666
  GGML_ASSERT(nb2 <= nb3);
13584
13667
 
13585
- if (params->type == GGML_TASK_INIT) {
13668
+ if (params->type == GGML_TASK_TYPE_INIT) {
13586
13669
  return;
13587
13670
  }
13588
13671
 
13589
- if (params->type == GGML_TASK_FINALIZE) {
13672
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
13590
13673
  return;
13591
13674
  }
13592
13675
 
@@ -13774,11 +13857,11 @@ static void ggml_compute_forward_flash_attn_f16(
13774
13857
  GGML_ASSERT(nb1 <= nb2);
13775
13858
  GGML_ASSERT(nb2 <= nb3);
13776
13859
 
13777
- if (params->type == GGML_TASK_INIT) {
13860
+ if (params->type == GGML_TASK_TYPE_INIT) {
13778
13861
  return;
13779
13862
  }
13780
13863
 
13781
- if (params->type == GGML_TASK_FINALIZE) {
13864
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
13782
13865
  return;
13783
13866
  }
13784
13867
 
@@ -14033,11 +14116,11 @@ static void ggml_compute_forward_flash_ff_f16(
14033
14116
  GGML_ASSERT(nb1 <= nb2);
14034
14117
  GGML_ASSERT(nb2 <= nb3);
14035
14118
 
14036
- if (params->type == GGML_TASK_INIT) {
14119
+ if (params->type == GGML_TASK_TYPE_INIT) {
14037
14120
  return;
14038
14121
  }
14039
14122
 
14040
- if (params->type == GGML_TASK_FINALIZE) {
14123
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
14041
14124
  return;
14042
14125
  }
14043
14126
 
@@ -14192,14 +14275,14 @@ static void ggml_compute_forward_flash_attn_back_f32(
14192
14275
  GGML_ASSERT(nb1 <= nb2);
14193
14276
  GGML_ASSERT(nb2 <= nb3);
14194
14277
 
14195
- if (params->type == GGML_TASK_INIT) {
14278
+ if (params->type == GGML_TASK_TYPE_INIT) {
14196
14279
  if (ith == 0) {
14197
14280
  memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
14198
14281
  }
14199
14282
  return;
14200
14283
  }
14201
14284
 
14202
- if (params->type == GGML_TASK_FINALIZE) {
14285
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
14203
14286
  return;
14204
14287
  }
14205
14288
 
@@ -14515,7 +14598,7 @@ static void ggml_compute_forward_win_part_f32(
14515
14598
 
14516
14599
  const struct ggml_tensor * src0 = dst->src[0];
14517
14600
 
14518
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14601
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14519
14602
  return;
14520
14603
  }
14521
14604
 
@@ -14581,7 +14664,7 @@ static void ggml_compute_forward_win_unpart_f32(
14581
14664
 
14582
14665
  const struct ggml_tensor * src0 = dst->src[0];
14583
14666
 
14584
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14667
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14585
14668
  return;
14586
14669
  }
14587
14670
 
@@ -14709,7 +14792,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
14709
14792
 
14710
14793
  const struct ggml_tensor * src0 = dst->src[0];
14711
14794
 
14712
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14795
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14713
14796
  return;
14714
14797
  }
14715
14798
 
@@ -14761,14 +14844,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
14761
14844
  const struct ggml_tensor * src2 = dst->src[2];
14762
14845
 
14763
14846
  const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
14764
- if (!inplace && params->type == GGML_TASK_INIT) {
14847
+ if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
14765
14848
  if (params->ith != 0) {
14766
14849
  return;
14767
14850
  }
14768
14851
  memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
14769
14852
  return;
14770
14853
  }
14771
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14854
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14772
14855
  return;
14773
14856
  }
14774
14857
 
@@ -14850,7 +14933,7 @@ static void ggml_compute_forward_map_unary_f32(
14850
14933
 
14851
14934
  GGML_ASSERT(ggml_are_same_shape(src0, dst));
14852
14935
 
14853
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14936
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14854
14937
  return;
14855
14938
  }
14856
14939
 
@@ -14899,7 +14982,7 @@ static void ggml_compute_forward_map_binary_f32(
14899
14982
  assert(params->ith == 0);
14900
14983
  assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
14901
14984
 
14902
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
14985
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14903
14986
  return;
14904
14987
  }
14905
14988
 
@@ -14948,7 +15031,7 @@ static void ggml_compute_forward_map_custom1_f32(
14948
15031
 
14949
15032
  assert(params->ith == 0);
14950
15033
 
14951
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15034
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14952
15035
  return;
14953
15036
  }
14954
15037
 
@@ -14967,7 +15050,7 @@ static void ggml_compute_forward_map_custom2_f32(
14967
15050
 
14968
15051
  assert(params->ith == 0);
14969
15052
 
14970
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15053
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14971
15054
  return;
14972
15055
  }
14973
15056
 
@@ -14987,7 +15070,7 @@ static void ggml_compute_forward_map_custom3_f32(
14987
15070
 
14988
15071
  assert(params->ith == 0);
14989
15072
 
14990
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15073
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
14991
15074
  return;
14992
15075
  }
14993
15076
 
@@ -15002,13 +15085,14 @@ static void ggml_compute_forward_map_custom1(
15002
15085
 
15003
15086
  const struct ggml_tensor * a = dst->src[0];
15004
15087
 
15005
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15088
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
15006
15089
  return;
15007
15090
  }
15008
15091
 
15009
- struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
15092
+ struct ggml_map_custom1_op_params p;
15093
+ memcpy(&p, dst->op_params, sizeof(p));
15010
15094
 
15011
- p->fun(dst, a, params->ith, params->nth, p->userdata);
15095
+ p.fun(dst, a, params->ith, params->nth, p.userdata);
15012
15096
  }
15013
15097
 
15014
15098
  // ggml_compute_forward_map_custom2
@@ -15020,13 +15104,14 @@ static void ggml_compute_forward_map_custom2(
15020
15104
  const struct ggml_tensor * a = dst->src[0];
15021
15105
  const struct ggml_tensor * b = dst->src[1];
15022
15106
 
15023
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15107
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
15024
15108
  return;
15025
15109
  }
15026
15110
 
15027
- struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
15111
+ struct ggml_map_custom2_op_params p;
15112
+ memcpy(&p, dst->op_params, sizeof(p));
15028
15113
 
15029
- p->fun(dst, a, b, params->ith, params->nth, p->userdata);
15114
+ p.fun(dst, a, b, params->ith, params->nth, p.userdata);
15030
15115
  }
15031
15116
 
15032
15117
  // ggml_compute_forward_map_custom3
@@ -15039,13 +15124,14 @@ static void ggml_compute_forward_map_custom3(
15039
15124
  const struct ggml_tensor * b = dst->src[1];
15040
15125
  const struct ggml_tensor * c = dst->src[2];
15041
15126
 
15042
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15127
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
15043
15128
  return;
15044
15129
  }
15045
15130
 
15046
- struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
15131
+ struct ggml_map_custom3_op_params p;
15132
+ memcpy(&p, dst->op_params, sizeof(p));
15047
15133
 
15048
- p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
15134
+ p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
15049
15135
  }
15050
15136
 
15051
15137
  // ggml_compute_forward_cross_entropy_loss
@@ -15073,14 +15159,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
15073
15159
 
15074
15160
  GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
15075
15161
 
15076
- if (params->type == GGML_TASK_INIT) {
15162
+ if (params->type == GGML_TASK_TYPE_INIT) {
15077
15163
  if (ith == 0) {
15078
15164
  memset(sums, 0, sizeof(float) * (nth + nth * nc));
15079
15165
  }
15080
15166
  return;
15081
15167
  }
15082
15168
 
15083
- if (params->type == GGML_TASK_FINALIZE) {
15169
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
15084
15170
  if (ith == 0) {
15085
15171
  float * dp = (float *) dst->data;
15086
15172
  ggml_vec_sum_f32(nth, dp, sums);
@@ -15195,7 +15281,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
15195
15281
  const int64_t ith = params->ith;
15196
15282
  const int64_t nth = params->nth;
15197
15283
 
15198
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
15284
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
15199
15285
  return;
15200
15286
  }
15201
15287
 
@@ -15302,8 +15388,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
15302
15388
  if (skip_cpu) {
15303
15389
  return;
15304
15390
  }
15305
- GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
15306
- GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
15391
+ GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
15392
+ GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
15307
15393
  #elif defined(GGML_USE_VULKAN)
15308
15394
  const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
15309
15395
  #ifdef GGML_VULKAN_CHECK_RESULTS
@@ -15314,8 +15400,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
15314
15400
  if (skip_cpu) {
15315
15401
  return;
15316
15402
  }
15317
- GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
15318
- GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
15403
+ GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
15404
+ GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
15319
15405
  #endif // GGML_USE_CUBLAS
15320
15406
 
15321
15407
  #ifdef GGML_USE_SYCL
@@ -16861,7 +16947,7 @@ size_t ggml_graph_overhead(void) {
16861
16947
 
16862
16948
  struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
16863
16949
  const size_t obj_size = ggml_graph_nbytes(size, grads);
16864
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
16950
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
16865
16951
  struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
16866
16952
 
16867
16953
  struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
@@ -17311,29 +17397,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
17311
17397
  } break;
17312
17398
  case GGML_OP_MAP_CUSTOM1:
17313
17399
  {
17314
- struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
17315
- if (p->n_tasks == GGML_N_TASKS_MAX) {
17400
+ struct ggml_map_custom1_op_params p;
17401
+ memcpy(&p, node->op_params, sizeof(p));
17402
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
17316
17403
  n_tasks = n_threads;
17317
17404
  } else {
17318
- n_tasks = MIN(p->n_tasks, n_threads);
17405
+ n_tasks = MIN(p.n_tasks, n_threads);
17319
17406
  }
17320
17407
  } break;
17321
17408
  case GGML_OP_MAP_CUSTOM2:
17322
17409
  {
17323
- struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
17324
- if (p->n_tasks == GGML_N_TASKS_MAX) {
17410
+ struct ggml_map_custom2_op_params p;
17411
+ memcpy(&p, node->op_params, sizeof(p));
17412
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
17325
17413
  n_tasks = n_threads;
17326
17414
  } else {
17327
- n_tasks = MIN(p->n_tasks, n_threads);
17415
+ n_tasks = MIN(p.n_tasks, n_threads);
17328
17416
  }
17329
17417
  } break;
17330
17418
  case GGML_OP_MAP_CUSTOM3:
17331
17419
  {
17332
- struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
17333
- if (p->n_tasks == GGML_N_TASKS_MAX) {
17420
+ struct ggml_map_custom3_op_params p;
17421
+ memcpy(&p, node->op_params, sizeof(p));
17422
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
17334
17423
  n_tasks = n_threads;
17335
17424
  } else {
17336
- n_tasks = MIN(p->n_tasks, n_threads);
17425
+ n_tasks = MIN(p.n_tasks, n_threads);
17337
17426
  }
17338
17427
  } break;
17339
17428
  case GGML_OP_CROSS_ENTROPY_LOSS:
@@ -17408,7 +17497,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17408
17497
  set_numa_thread_affinity(state->ith);
17409
17498
 
17410
17499
  int node_n = -1;
17411
- int task_phase = GGML_TASK_FINALIZE;
17500
+ int task_phase = GGML_TASK_TYPE_FINALIZE;
17412
17501
 
17413
17502
  while (true) {
17414
17503
  if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
@@ -17420,7 +17509,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17420
17509
  // all other threads are finished and spinning
17421
17510
  // do finalize and init here so we don't have synchronize again
17422
17511
  struct ggml_compute_params params = {
17423
- /*.type =*/ GGML_TASK_FINALIZE,
17512
+ /*.type =*/ GGML_TASK_TYPE_FINALIZE,
17424
17513
  /*.ith =*/ 0,
17425
17514
  /*.nth =*/ 0,
17426
17515
  /*.wsize =*/ cplan->work_size,
@@ -17451,17 +17540,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17451
17540
  if (n_tasks == 1) {
17452
17541
  /* INIT */
17453
17542
  if (GGML_OP_HAS_INIT[node->op]) {
17454
- params.type = GGML_TASK_INIT;
17543
+ params.type = GGML_TASK_TYPE_INIT;
17455
17544
  ggml_compute_forward(&params, node);
17456
17545
  }
17457
17546
 
17458
17547
  // TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
17459
17548
  // they do something more efficient than spinning (?)
17460
- params.type = GGML_TASK_COMPUTE;
17549
+ params.type = GGML_TASK_TYPE_COMPUTE;
17461
17550
  ggml_compute_forward(&params, node);
17462
17551
 
17463
17552
  if (GGML_OP_HAS_FINALIZE[node->op]) {
17464
- params.type = GGML_TASK_FINALIZE;
17553
+ params.type = GGML_TASK_TYPE_FINALIZE;
17465
17554
  ggml_compute_forward(&params, node);
17466
17555
  }
17467
17556
 
@@ -17475,7 +17564,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17475
17564
  }
17476
17565
  }
17477
17566
 
17478
- task_phase = GGML_TASK_INIT;
17567
+ task_phase = GGML_TASK_TYPE_INIT;
17479
17568
  atomic_store(&state->shared->n_active, n_threads);
17480
17569
  atomic_store(&state->shared->node_n, node_n);
17481
17570
  atomic_store(&state->shared->node_task, task_phase);
@@ -17492,7 +17581,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17492
17581
  const int n_tasks = ggml_get_n_tasks(node, n_threads);
17493
17582
 
17494
17583
  struct ggml_compute_params params = {
17495
- /*.type =*/ GGML_TASK_INIT,
17584
+ /*.type =*/ GGML_TASK_TYPE_INIT,
17496
17585
  /*.ith =*/ state->ith,
17497
17586
  /*.nth =*/ n_tasks,
17498
17587
  /*.wsize =*/ cplan->work_size,
@@ -17506,7 +17595,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17506
17595
  }
17507
17596
 
17508
17597
  if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
17509
- task_phase = GGML_TASK_COMPUTE;
17598
+ task_phase = GGML_TASK_TYPE_COMPUTE;
17510
17599
  atomic_store(&state->shared->n_active, n_threads);
17511
17600
  atomic_store(&state->shared->node_task, task_phase);
17512
17601
  }
@@ -17521,12 +17610,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17521
17610
  }
17522
17611
 
17523
17612
  if (state->ith < n_tasks) {
17524
- params.type = GGML_TASK_COMPUTE;
17613
+ params.type = GGML_TASK_TYPE_COMPUTE;
17525
17614
  ggml_compute_forward(&params, node);
17526
17615
  }
17527
17616
 
17528
17617
  if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
17529
- task_phase = GGML_TASK_FINALIZE;
17618
+ task_phase = GGML_TASK_TYPE_FINALIZE;
17530
17619
  atomic_store(&state->shared->n_active, n_threads);
17531
17620
  atomic_store(&state->shared->node_task, task_phase);
17532
17621
  }
@@ -17762,7 +17851,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
17762
17851
  /*.n_threads =*/ n_threads,
17763
17852
  /*.n_active =*/ n_threads,
17764
17853
  /*.node_n =*/ -1,
17765
- /*.node_task =*/ GGML_TASK_FINALIZE,
17854
+ /*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
17766
17855
  /*.abort_callback =*/ NULL,
17767
17856
  /*.abort_callback_data =*/ NULL,
17768
17857
  };
@@ -17830,7 +17919,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
17830
17919
  void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
17831
17920
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
17832
17921
 
17833
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
17922
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
17834
17923
 
17835
17924
  cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
17836
17925
 
@@ -18638,7 +18727,7 @@ static enum ggml_opt_result ggml_opt_adam(
18638
18727
  float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
18639
18728
 
18640
18729
  struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
18641
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
18730
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
18642
18731
  cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
18643
18732
 
18644
18733
  bool cancel = false;
@@ -18650,7 +18739,7 @@ static enum ggml_opt_result ggml_opt_adam(
18650
18739
  if (callback) {
18651
18740
  callback(callback_data, accum_step, &sched, &cancel);
18652
18741
  if (cancel) {
18653
- return GGML_OPT_CANCEL;
18742
+ return GGML_OPT_RESULT_CANCEL;
18654
18743
  }
18655
18744
  }
18656
18745
  // ggml_graph_reset (gf);
@@ -18741,7 +18830,7 @@ static enum ggml_opt_result ggml_opt_adam(
18741
18830
  if (callback) {
18742
18831
  callback(callback_data, accum_step, &sched, &cancel);
18743
18832
  if (cancel) {
18744
- return GGML_OPT_CANCEL;;
18833
+ return GGML_OPT_RESULT_CANCEL;;
18745
18834
  }
18746
18835
  }
18747
18836
  // ggml_graph_reset (gf);
@@ -18758,7 +18847,7 @@ static enum ggml_opt_result ggml_opt_adam(
18758
18847
  if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
18759
18848
  GGML_PRINT_DEBUG("converged\n");
18760
18849
 
18761
- return GGML_OPT_OK;
18850
+ return GGML_OPT_RESULT_OK;
18762
18851
  }
18763
18852
 
18764
18853
  // delta-based convergence test
@@ -18768,7 +18857,7 @@ static enum ggml_opt_result ggml_opt_adam(
18768
18857
  const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
18769
18858
 
18770
18859
  if (fabsf(rate) < params.delta) {
18771
- return GGML_OPT_OK;
18860
+ return GGML_OPT_RESULT_OK;
18772
18861
  }
18773
18862
  }
18774
18863
 
@@ -18784,7 +18873,7 @@ static enum ggml_opt_result ggml_opt_adam(
18784
18873
  ++n_no_improvement[0];
18785
18874
 
18786
18875
  if (n_no_improvement[0] >= params.max_no_improvement) {
18787
- return GGML_OPT_OK;
18876
+ return GGML_OPT_RESULT_OK;
18788
18877
  }
18789
18878
  }
18790
18879
  }
@@ -18802,7 +18891,7 @@ static enum ggml_opt_result ggml_opt_adam(
18802
18891
  }
18803
18892
  }
18804
18893
 
18805
- return GGML_OPT_DID_NOT_CONVERGE;
18894
+ return GGML_OPT_RESULT_DID_NOT_CONVERGE;
18806
18895
  }
18807
18896
 
18808
18897
  //
@@ -18883,7 +18972,7 @@ static enum ggml_opt_result linesearch_backtracking(
18883
18972
  float sched = 0;
18884
18973
  callback(callback_data, accum_step, &sched, cancel);
18885
18974
  if (*cancel) {
18886
- return GGML_OPT_CANCEL;
18975
+ return GGML_OPT_RESULT_CANCEL;
18887
18976
  }
18888
18977
  }
18889
18978
  // ggml_graph_reset (gf);
@@ -18956,7 +19045,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
18956
19045
  if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
18957
19046
  params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
18958
19047
  if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
18959
- return GGML_OPT_INVALID_WOLFE;
19048
+ return GGML_OPT_RESULT_INVALID_WOLFE;
18960
19049
  }
18961
19050
  }
18962
19051
 
@@ -18985,7 +19074,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
18985
19074
  }
18986
19075
 
18987
19076
  struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
18988
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
19077
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
18989
19078
  cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
18990
19079
 
18991
19080
  float * x = opt->lbfgs.x->data; // current parameters
@@ -19026,7 +19115,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19026
19115
  float sched = 0;
19027
19116
  callback(callback_data, accum_step, &sched, &cancel);
19028
19117
  if (cancel) {
19029
- return GGML_OPT_CANCEL;
19118
+ return GGML_OPT_RESULT_CANCEL;
19030
19119
  }
19031
19120
  }
19032
19121
  // ggml_graph_reset (gf);
@@ -19054,7 +19143,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19054
19143
 
19055
19144
  // already optimized
19056
19145
  if (gnorm/xnorm <= params.lbfgs.eps) {
19057
- return GGML_OPT_OK;
19146
+ return GGML_OPT_RESULT_OK;
19058
19147
  }
19059
19148
 
19060
19149
  if (opt->just_initialized) {
@@ -19099,7 +19188,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19099
19188
  // way to test and don't want to break something with so many changes lined up
19100
19189
  ls = linesearch_backtracking(&params, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
19101
19190
  if (cancel) {
19102
- return GGML_OPT_CANCEL;
19191
+ return GGML_OPT_RESULT_CANCEL;
19103
19192
  }
19104
19193
 
19105
19194
  if (ls < 0) {
@@ -19122,7 +19211,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19122
19211
  }
19123
19212
  if (gnorm/xnorm <= params.lbfgs.eps) {
19124
19213
  // converged
19125
- return GGML_OPT_OK;
19214
+ return GGML_OPT_RESULT_OK;
19126
19215
  }
19127
19216
 
19128
19217
  // delta-based convergence test
@@ -19132,7 +19221,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19132
19221
  const float rate = (pf[k[0]%params.past] - fx)/fx;
19133
19222
 
19134
19223
  if (fabsf(rate) < params.delta) {
19135
- return GGML_OPT_OK;
19224
+ return GGML_OPT_RESULT_OK;
19136
19225
  }
19137
19226
  }
19138
19227
 
@@ -19148,14 +19237,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19148
19237
  n_no_improvement[0]++;
19149
19238
 
19150
19239
  if (n_no_improvement[0] >= params.max_no_improvement) {
19151
- return GGML_OPT_OK;
19240
+ return GGML_OPT_RESULT_OK;
19152
19241
  }
19153
19242
  }
19154
19243
  }
19155
19244
 
19156
19245
  if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
19157
19246
  // reached the maximum number of iterations
19158
- return GGML_OPT_DID_NOT_CONVERGE;
19247
+ return GGML_OPT_RESULT_DID_NOT_CONVERGE;
19159
19248
  }
19160
19249
 
19161
19250
  // update vectors s and y:
@@ -19211,17 +19300,17 @@ static enum ggml_opt_result ggml_opt_lbfgs(
19211
19300
 
19212
19301
  GGML_ASSERT(false && "lbfgs failed");
19213
19302
 
19214
- return GGML_OPT_DID_NOT_CONVERGE;
19303
+ return GGML_OPT_RESULT_DID_NOT_CONVERGE;
19215
19304
  }
19216
19305
 
19217
19306
  struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
19218
19307
  struct ggml_opt_params result;
19219
19308
 
19220
19309
  switch (type) {
19221
- case GGML_OPT_ADAM:
19310
+ case GGML_OPT_TYPE_ADAM:
19222
19311
  {
19223
19312
  result = (struct ggml_opt_params) {
19224
- .type = GGML_OPT_ADAM,
19313
+ .type = GGML_OPT_TYPE_ADAM,
19225
19314
  .graph_size = GGML_DEFAULT_GRAPH_SIZE,
19226
19315
  .n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
19227
19316
  .past = 0,
@@ -19249,10 +19338,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
19249
19338
  },
19250
19339
  };
19251
19340
  } break;
19252
- case GGML_OPT_LBFGS:
19341
+ case GGML_OPT_TYPE_LBFGS:
19253
19342
  {
19254
19343
  result = (struct ggml_opt_params) {
19255
- .type = GGML_OPT_LBFGS,
19344
+ .type = GGML_OPT_TYPE_LBFGS,
19256
19345
  .graph_size = GGML_DEFAULT_GRAPH_SIZE,
19257
19346
  .n_threads = 1,
19258
19347
  .past = 0,
@@ -19297,12 +19386,12 @@ GGML_API void ggml_opt_init(
19297
19386
  opt->just_initialized = true;
19298
19387
  if (opt->ctx == NULL) {
19299
19388
  struct ggml_init_params ctx_opt_params;
19300
- if (opt->params.type == GGML_OPT_ADAM) {
19389
+ if (opt->params.type == GGML_OPT_TYPE_ADAM) {
19301
19390
  ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
19302
19391
  if (opt->params.past > 0) {
19303
19392
  ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
19304
19393
  }
19305
- } else if (opt->params.type == GGML_OPT_LBFGS) {
19394
+ } else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
19306
19395
  ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
19307
19396
  if (opt->params.past > 0) {
19308
19397
  ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
@@ -19314,7 +19403,7 @@ GGML_API void ggml_opt_init(
19314
19403
  opt->ctx = ggml_init(ctx_opt_params);
19315
19404
  }
19316
19405
  switch (opt->params.type) {
19317
- case GGML_OPT_ADAM:
19406
+ case GGML_OPT_TYPE_ADAM:
19318
19407
  {
19319
19408
  opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
19320
19409
  opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
@@ -19328,7 +19417,7 @@ GGML_API void ggml_opt_init(
19328
19417
  ggml_set_zero(opt->adam.pf);
19329
19418
  }
19330
19419
  } break;
19331
- case GGML_OPT_LBFGS:
19420
+ case GGML_OPT_TYPE_LBFGS:
19332
19421
  {
19333
19422
  opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
19334
19423
  opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
@@ -19372,13 +19461,13 @@ enum ggml_opt_result ggml_opt(
19372
19461
 
19373
19462
  ctx = ggml_init(params_ctx);
19374
19463
  if (ctx == NULL) {
19375
- return GGML_OPT_NO_CONTEXT;
19464
+ return GGML_OPT_RESULT_NO_CONTEXT;
19376
19465
  }
19377
19466
 
19378
19467
  free_ctx = true;
19379
19468
  }
19380
19469
 
19381
- enum ggml_opt_result result = GGML_OPT_OK;
19470
+ enum ggml_opt_result result = GGML_OPT_RESULT_OK;
19382
19471
 
19383
19472
  struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
19384
19473
 
@@ -19417,14 +19506,14 @@ enum ggml_opt_result ggml_opt_resume_g(
19417
19506
  void * callback_data) {
19418
19507
 
19419
19508
  // build forward + backward compute graphs
19420
- enum ggml_opt_result result = GGML_OPT_OK;
19509
+ enum ggml_opt_result result = GGML_OPT_RESULT_OK;
19421
19510
 
19422
19511
  switch (opt->params.type) {
19423
- case GGML_OPT_ADAM:
19512
+ case GGML_OPT_TYPE_ADAM:
19424
19513
  {
19425
19514
  result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
19426
19515
  } break;
19427
- case GGML_OPT_LBFGS:
19516
+ case GGML_OPT_TYPE_LBFGS:
19428
19517
  {
19429
19518
  result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
19430
19519
  } break;
@@ -19461,8 +19550,10 @@ void ggml_quantize_init(enum ggml_type type) {
19461
19550
  switch (type) {
19462
19551
  case GGML_TYPE_IQ2_XXS:
19463
19552
  case GGML_TYPE_IQ2_XS:
19553
+ case GGML_TYPE_IQ2_S:
19464
19554
  case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
19465
19555
  case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
19556
+ case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
19466
19557
  default: // nothing
19467
19558
  break;
19468
19559
  }
@@ -19737,6 +19828,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
19737
19828
  result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19738
19829
  GGML_ASSERT(result == row_size * nrows);
19739
19830
  } break;
19831
+ case GGML_TYPE_IQ3_S:
19832
+ {
19833
+ GGML_ASSERT(start % QK_K == 0);
19834
+ GGML_ASSERT(start % n_per_row == 0);
19835
+ size_t start_row = start / n_per_row;
19836
+ size_t row_size = ggml_row_size(type, n_per_row);
19837
+ result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19838
+ GGML_ASSERT(result == row_size * nrows);
19839
+ } break;
19840
+ case GGML_TYPE_IQ2_S:
19841
+ {
19842
+ GGML_ASSERT(start % QK_K == 0);
19843
+ GGML_ASSERT(start % n_per_row == 0);
19844
+ size_t start_row = start / n_per_row;
19845
+ size_t row_size = ggml_row_size(type, n_per_row);
19846
+ result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19847
+ GGML_ASSERT(result == row_size * nrows);
19848
+ } break;
19740
19849
  case GGML_TYPE_IQ1_S:
19741
19850
  {
19742
19851
  GGML_ASSERT(start % QK_K == 0);
@@ -19747,6 +19856,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
19747
19856
  GGML_ASSERT(result == row_size * nrows);
19748
19857
  } break;
19749
19858
  case GGML_TYPE_IQ4_NL:
19859
+ #if QK_K == 64
19860
+ case GGML_TYPE_IQ4_XS:
19861
+ #endif
19750
19862
  {
19751
19863
  GGML_ASSERT(start % QK4_NL == 0);
19752
19864
  GGML_ASSERT(start % n_per_row == 0);
@@ -19755,6 +19867,17 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
19755
19867
  result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19756
19868
  GGML_ASSERT(result == row_size * nrows);
19757
19869
  } break;
19870
+ #if QK_K != 64
19871
+ case GGML_TYPE_IQ4_XS:
19872
+ {
19873
+ GGML_ASSERT(start % QK_K == 0);
19874
+ GGML_ASSERT(start % n_per_row == 0);
19875
+ size_t start_row = start / n_per_row;
19876
+ size_t row_size = ggml_row_size(type, n_per_row);
19877
+ result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19878
+ GGML_ASSERT(result == row_size * nrows);
19879
+ } break;
19880
+ #endif
19758
19881
  case GGML_TYPE_F16:
19759
19882
  {
19760
19883
  size_t elemsize = sizeof(ggml_fp16_t);