llama_cpp 0.12.3 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -218,6 +218,7 @@ inline static void * ggml_aligned_malloc(size_t size) {
218
218
  break;
219
219
  }
220
220
  GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
221
+ GGML_ASSERT(false);
221
222
  return NULL;
222
223
  }
223
224
  return aligned_memory;
@@ -230,6 +231,38 @@ inline static void * ggml_aligned_malloc(size_t size) {
230
231
  #endif
231
232
  #endif
232
233
 
234
+ inline static void * ggml_malloc(size_t size) {
235
+ if (size == 0) {
236
+ GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
237
+ return NULL;
238
+ }
239
+ void * result = malloc(size);
240
+ if (result == NULL) {
241
+ GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
242
+ GGML_ASSERT(false);
243
+ }
244
+ return result;
245
+ }
246
+
247
+ // calloc
248
+ inline static void * ggml_calloc(size_t num, size_t size) {
249
+ if (num == 0 || size == 0) {
250
+ GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
251
+ return NULL;
252
+ }
253
+ void * result = calloc(num, size);
254
+ if (result == NULL) {
255
+ GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
256
+ GGML_ASSERT(false);
257
+ }
258
+ return result;
259
+ }
260
+
261
+ #define GGML_MALLOC(size) ggml_malloc(size)
262
+ #define GGML_CALLOC(num, size) ggml_calloc(num, size)
263
+
264
+ #define GGML_FREE(ptr) free(ptr)
265
+
233
266
  #define UNUSED GGML_UNUSED
234
267
  #define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
235
268
 
@@ -248,6 +281,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
248
281
  #include "ggml-cuda.h"
249
282
  #elif defined(GGML_USE_CLBLAST)
250
283
  #include "ggml-opencl.h"
284
+ #elif defined(GGML_USE_VULKAN)
285
+ #include "ggml-vulkan.h"
286
+ #elif defined(GGML_USE_SYCL)
287
+ #include "ggml-sycl.h"
251
288
  #endif
252
289
 
253
290
  // floating point type used to accumulate sums
@@ -595,6 +632,17 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
595
632
  .vec_dot = ggml_vec_dot_iq2_xs_q8_K,
596
633
  .vec_dot_type = GGML_TYPE_Q8_K,
597
634
  },
635
+ [GGML_TYPE_IQ3_XXS] = {
636
+ .type_name = "iq3_xxs",
637
+ .blck_size = QK_K,
638
+ .type_size = sizeof(block_iq3_xxs),
639
+ .is_quantized = true,
640
+ .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
641
+ .from_float = quantize_row_iq3_xxs,
642
+ .from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
643
+ .vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
644
+ .vec_dot_type = GGML_TYPE_Q8_K,
645
+ },
598
646
  [GGML_TYPE_Q8_K] = {
599
647
  .type_name = "q8_K",
600
648
  .blck_size = QK_K,
@@ -2140,6 +2188,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
2140
2188
  case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
2141
2189
  case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
2142
2190
  case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
2191
+ case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
2143
2192
  case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
2144
2193
  case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
2145
2194
  }
@@ -2293,6 +2342,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
2293
2342
  ggml_init_cublas();
2294
2343
  #elif defined(GGML_USE_CLBLAST)
2295
2344
  ggml_cl_init();
2345
+ #elif defined(GGML_USE_VULKAN)
2346
+ ggml_vk_init();
2347
+ #elif defined(GGML_USE_SYCL)
2348
+ ggml_init_sycl();
2296
2349
  #endif
2297
2350
 
2298
2351
  ggml_setup_op_has_task_pass();
@@ -5296,7 +5349,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
5296
5349
  int s0,
5297
5350
  int p0,
5298
5351
  int d0) {
5299
- struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K]
5352
+ struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
5300
5353
 
5301
5354
  struct ggml_tensor * result =
5302
5355
  ggml_mul_mat(ctx,
@@ -5374,16 +5427,15 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
5374
5427
  int p1,
5375
5428
  int d0,
5376
5429
  int d1) {
5430
+
5377
5431
  struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
5378
5432
  struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
5379
5433
  ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
5380
- s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
5381
-
5382
- struct ggml_tensor * result =
5383
- ggml_mul_mat(ctx,
5384
- ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
5385
- ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
5434
+ s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
5435
+ struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
5386
5436
 
5437
+ new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
5438
+ struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
5387
5439
  result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
5388
5440
 
5389
5441
  return result;
@@ -5404,7 +5456,8 @@ struct ggml_tensor * ggml_im2col(
5404
5456
  int p1,
5405
5457
  int d0,
5406
5458
  int d1,
5407
- bool is_2D) {
5459
+ bool is_2D,
5460
+ enum ggml_type dst_type) {
5408
5461
 
5409
5462
  if(is_2D) {
5410
5463
  GGML_ASSERT(a->ne[2] == b->ne[2]);
@@ -5428,7 +5481,7 @@ struct ggml_tensor * ggml_im2col(
5428
5481
  is_2D ? b->ne[3] : 1,
5429
5482
  };
5430
5483
 
5431
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
5484
+ struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
5432
5485
  int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
5433
5486
  ggml_set_op_params(result, params, sizeof(params));
5434
5487
 
@@ -5453,7 +5506,7 @@ struct ggml_tensor * ggml_conv_2d(
5453
5506
  int p1,
5454
5507
  int d0,
5455
5508
  int d1) {
5456
- struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW]
5509
+ struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
5457
5510
 
5458
5511
  struct ggml_tensor * result =
5459
5512
  ggml_mul_mat(ctx,
@@ -5579,12 +5632,13 @@ struct ggml_tensor * ggml_pool_2d(
5579
5632
  is_node = true;
5580
5633
  }
5581
5634
 
5635
+ struct ggml_tensor * result;
5582
5636
  const int64_t ne[3] = {
5583
5637
  ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
5584
5638
  ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
5585
5639
  a->ne[2],
5586
5640
  };
5587
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5641
+ result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5588
5642
 
5589
5643
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
5590
5644
  ggml_set_op_params(result, params, sizeof(params));
@@ -5592,7 +5646,6 @@ struct ggml_tensor * ggml_pool_2d(
5592
5646
  result->op = GGML_OP_POOL_2D;
5593
5647
  result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
5594
5648
  result->src[0] = a;
5595
-
5596
5649
  return result;
5597
5650
  }
5598
5651
 
@@ -7207,6 +7260,17 @@ static void ggml_compute_forward_add_f32(
7207
7260
  const int ith = params->ith;
7208
7261
  const int nth = params->nth;
7209
7262
 
7263
+ #ifdef GGML_USE_CLBLAST
7264
+ if (src1->backend == GGML_BACKEND_GPU) {
7265
+ // TODO: OpenCL kernel support full broadcast
7266
+ GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
7267
+ if (ith == 0) {
7268
+ ggml_cl_add(src0, src1, dst);
7269
+ }
7270
+ return;
7271
+ }
7272
+ #endif
7273
+
7210
7274
  const int nr = ggml_nrows(src0);
7211
7275
 
7212
7276
  GGML_TENSOR_BINARY_OP_LOCALS
@@ -7487,7 +7551,12 @@ static void ggml_compute_forward_add(
7487
7551
  switch (src0->type) {
7488
7552
  case GGML_TYPE_F32:
7489
7553
  {
7490
- ggml_compute_forward_add_f32(params, src0, src1, dst);
7554
+ if (src1->type == GGML_TYPE_F32) {
7555
+ ggml_compute_forward_add_f32(params, src0, src1, dst);
7556
+ }
7557
+ else {
7558
+ GGML_ASSERT(false);
7559
+ }
7491
7560
  } break;
7492
7561
  case GGML_TYPE_F16:
7493
7562
  {
@@ -7513,6 +7582,7 @@ static void ggml_compute_forward_add(
7513
7582
  case GGML_TYPE_Q6_K:
7514
7583
  case GGML_TYPE_IQ2_XXS:
7515
7584
  case GGML_TYPE_IQ2_XS:
7585
+ case GGML_TYPE_IQ3_XXS:
7516
7586
  {
7517
7587
  ggml_compute_forward_add_q_f32(params, src0, src1, dst);
7518
7588
  } break;
@@ -7779,6 +7849,7 @@ static void ggml_compute_forward_add1(
7779
7849
  case GGML_TYPE_Q6_K:
7780
7850
  case GGML_TYPE_IQ2_XXS:
7781
7851
  case GGML_TYPE_IQ2_XS:
7852
+ case GGML_TYPE_IQ3_XXS:
7782
7853
  {
7783
7854
  ggml_compute_forward_add1_q_f32(params, src0, src1, dst);
7784
7855
  } break;
@@ -7898,6 +7969,7 @@ static void ggml_compute_forward_acc(
7898
7969
  case GGML_TYPE_Q6_K:
7899
7970
  case GGML_TYPE_IQ2_XXS:
7900
7971
  case GGML_TYPE_IQ2_XS:
7972
+ case GGML_TYPE_IQ3_XXS:
7901
7973
  default:
7902
7974
  {
7903
7975
  GGML_ASSERT(false);
@@ -7999,7 +8071,7 @@ static void ggml_compute_forward_mul_f32(
7999
8071
  const int ith = params->ith;
8000
8072
  const int nth = params->nth;
8001
8073
 
8002
- #ifdef GGML_USE_CLBLAST
8074
+ #if defined(GGML_USE_CLBLAST)
8003
8075
  if (src1->backend == GGML_BACKEND_GPU) {
8004
8076
  // TODO: OpenCL kernel support full broadcast
8005
8077
  GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
@@ -9954,7 +10026,7 @@ static void ggml_compute_forward_mul_mat(
9954
10026
  #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
9955
10027
  if (ggml_compute_forward_mul_mat_use_blas(dst)) {
9956
10028
  const int64_t ne_plane = ne01*ne00;
9957
- const int64_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
10029
+ const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
9958
10030
  UNUSED(desired_wsize);
9959
10031
 
9960
10032
  if (params->type == GGML_TASK_INIT) {
@@ -10649,6 +10721,7 @@ static void ggml_compute_forward_out_prod(
10649
10721
  case GGML_TYPE_Q6_K:
10650
10722
  case GGML_TYPE_IQ2_XXS:
10651
10723
  case GGML_TYPE_IQ2_XS:
10724
+ case GGML_TYPE_IQ3_XXS:
10652
10725
  {
10653
10726
  ggml_compute_forward_out_prod_q_f32(params, src0, src1, dst);
10654
10727
  } break;
@@ -10828,6 +10901,7 @@ static void ggml_compute_forward_set(
10828
10901
  case GGML_TYPE_Q6_K:
10829
10902
  case GGML_TYPE_IQ2_XXS:
10830
10903
  case GGML_TYPE_IQ2_XS:
10904
+ case GGML_TYPE_IQ3_XXS:
10831
10905
  default:
10832
10906
  {
10833
10907
  GGML_ASSERT(false);
@@ -11024,6 +11098,7 @@ static void ggml_compute_forward_get_rows(
11024
11098
  case GGML_TYPE_Q6_K:
11025
11099
  case GGML_TYPE_IQ2_XXS:
11026
11100
  case GGML_TYPE_IQ2_XS:
11101
+ case GGML_TYPE_IQ3_XXS:
11027
11102
  {
11028
11103
  ggml_compute_forward_get_rows_q(params, src0, src1, dst);
11029
11104
  } break;
@@ -11671,6 +11746,7 @@ static void ggml_compute_forward_alibi(
11671
11746
  case GGML_TYPE_Q6_K:
11672
11747
  case GGML_TYPE_IQ2_XXS:
11673
11748
  case GGML_TYPE_IQ2_XS:
11749
+ case GGML_TYPE_IQ3_XXS:
11674
11750
  case GGML_TYPE_Q8_K:
11675
11751
  case GGML_TYPE_I8:
11676
11752
  case GGML_TYPE_I16:
@@ -11747,6 +11823,7 @@ static void ggml_compute_forward_clamp(
11747
11823
  case GGML_TYPE_Q6_K:
11748
11824
  case GGML_TYPE_IQ2_XXS:
11749
11825
  case GGML_TYPE_IQ2_XS:
11826
+ case GGML_TYPE_IQ3_XXS:
11750
11827
  case GGML_TYPE_Q8_K:
11751
11828
  case GGML_TYPE_I8:
11752
11829
  case GGML_TYPE_I16:
@@ -12416,6 +12493,92 @@ static void ggml_compute_forward_conv_transpose_1d(
12416
12493
  }
12417
12494
  }
12418
12495
 
12496
+ // src0: kernel [OC, IC, KH, KW]
12497
+ // src1: image [N, IC, IH, IW]
12498
+ // dst: result [N, OH, OW, IC*KH*KW]
12499
+ static void ggml_compute_forward_im2col_f32(
12500
+ const struct ggml_compute_params * params,
12501
+ const struct ggml_tensor * src0,
12502
+ const struct ggml_tensor * src1,
12503
+ struct ggml_tensor * dst) {
12504
+ GGML_ASSERT(src0->type == GGML_TYPE_F16);
12505
+ GGML_ASSERT(src1->type == GGML_TYPE_F32);
12506
+ GGML_ASSERT( dst->type == GGML_TYPE_F32);
12507
+
12508
+ int64_t t0 = ggml_perf_time_us();
12509
+ UNUSED(t0);
12510
+
12511
+ GGML_TENSOR_BINARY_OP_LOCALS;
12512
+
12513
+ const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
12514
+ const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
12515
+ const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
12516
+ const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
12517
+ const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
12518
+ const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
12519
+ const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
12520
+
12521
+ const int ith = params->ith;
12522
+ const int nth = params->nth;
12523
+
12524
+ const int64_t N = is_2D ? ne13 : ne12;
12525
+ const int64_t IC = is_2D ? ne12 : ne11;
12526
+ const int64_t IH = is_2D ? ne11 : 1;
12527
+ const int64_t IW = ne10;
12528
+
12529
+ const int64_t KH = is_2D ? ne01 : 1;
12530
+ const int64_t KW = ne00;
12531
+
12532
+ const int64_t OH = is_2D ? ne2 : 1;
12533
+ const int64_t OW = ne1;
12534
+
12535
+ int ofs0 = is_2D ? nb13 : nb12;
12536
+ int ofs1 = is_2D ? nb12 : nb11;
12537
+
12538
+ GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
12539
+ GGML_ASSERT(nb10 == sizeof(float));
12540
+
12541
+ if (params->type == GGML_TASK_INIT) {
12542
+ return;
12543
+ }
12544
+
12545
+ if (params->type == GGML_TASK_FINALIZE) {
12546
+ return;
12547
+ }
12548
+
12549
+ // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
12550
+ {
12551
+ float * const wdata = (float *) dst->data;
12552
+
12553
+ for (int64_t in = 0; in < N; in++) {
12554
+ for (int64_t ioh = 0; ioh < OH; ioh++) { // 1
12555
+ for (int64_t iow = 0; iow < OW; iow++) {
12556
+ for (int64_t iic = ith; iic < IC; iic += nth) {
12557
+
12558
+ // micro kernel
12559
+ float * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
12560
+ const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
12561
+
12562
+ for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
12563
+ for (int64_t ikw = 0; ikw < KW; ikw++) {
12564
+ const int64_t iiw = iow*s0 + ikw*d0 - p0;
12565
+ const int64_t iih = ioh*s1 + ikh*d1 - p1;
12566
+
12567
+ if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
12568
+ dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
12569
+ } else {
12570
+ dst_data[iic*(KH*KW) + ikh*KW + ikw] = (src_data[iih*IW + iiw]);
12571
+ }
12572
+ }
12573
+ }
12574
+ }
12575
+ }
12576
+ }
12577
+ }
12578
+ }
12579
+ }
12580
+
12581
+
12419
12582
  // src0: kernel [OC, IC, KH, KW]
12420
12583
  // src1: image [N, IC, IH, IW]
12421
12584
  // dst: result [N, OH, OW, IC*KH*KW]
@@ -12506,14 +12669,14 @@ static void ggml_compute_forward_im2col(
12506
12669
  const struct ggml_tensor * src0,
12507
12670
  const struct ggml_tensor * src1,
12508
12671
  struct ggml_tensor * dst) {
12509
- switch (src0->type) {
12672
+ switch (dst->type) {
12510
12673
  case GGML_TYPE_F16:
12511
12674
  {
12512
12675
  ggml_compute_forward_im2col_f16(params, src0, src1, dst);
12513
12676
  } break;
12514
12677
  case GGML_TYPE_F32:
12515
12678
  {
12516
- GGML_ASSERT(false);
12679
+ ggml_compute_forward_im2col_f32(params, src0, src1, dst);
12517
12680
  } break;
12518
12681
  default:
12519
12682
  {
@@ -12704,8 +12867,8 @@ static void ggml_compute_forward_pool_2d(
12704
12867
  const struct ggml_compute_params * params,
12705
12868
  const struct ggml_tensor * src,
12706
12869
  struct ggml_tensor * dst) {
12707
- assert(src->type == GGML_TYPE_F32);
12708
- assert(params->ith == 0);
12870
+ GGML_ASSERT(src->type == GGML_TYPE_F32);
12871
+ GGML_ASSERT(params->ith == 0);
12709
12872
 
12710
12873
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
12711
12874
  return;
@@ -14683,8 +14846,26 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
14683
14846
  }
14684
14847
  GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
14685
14848
  GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
14849
+ #elif defined(GGML_USE_VULKAN)
14850
+ const bool skip_cpu = ggml_vk_compute_forward(params, tensor);
14851
+ #ifdef GGML_VULKAN_CHECK_RESULTS
14852
+ if (skip_cpu) {
14853
+ ggml_vk_check_results_1(params, tensor);
14854
+ }
14855
+ #endif
14856
+ if (skip_cpu) {
14857
+ return;
14858
+ }
14859
+ GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
14860
+ GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
14686
14861
  #endif // GGML_USE_CUBLAS
14687
14862
 
14863
+ #ifdef GGML_USE_SYCL
14864
+ bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
14865
+ if (skip_cpu) {
14866
+ return;
14867
+ }
14868
+ #endif // GGML_USE_SYCL
14688
14869
  switch (tensor->op) {
14689
14870
  case GGML_OP_DUP:
14690
14871
  {
@@ -15087,13 +15268,13 @@ struct ggml_hash_set ggml_hash_set_new(size_t size) {
15087
15268
  size = ggml_hash_size(size);
15088
15269
  struct ggml_hash_set result;
15089
15270
  result.size = size;
15090
- result.keys = malloc(sizeof(struct ggml_tensor *) * size);
15271
+ result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
15091
15272
  memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
15092
15273
  return result;
15093
15274
  }
15094
15275
 
15095
15276
  static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
15096
- free(hash_set.keys);
15277
+ GGML_FREE(hash_set.keys);
15097
15278
  }
15098
15279
 
15099
15280
  struct hash_map {
@@ -15102,17 +15283,17 @@ struct hash_map {
15102
15283
  };
15103
15284
 
15104
15285
  static struct hash_map * ggml_new_hash_map(size_t size) {
15105
- struct hash_map * result = malloc(sizeof(struct hash_map));
15286
+ struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
15106
15287
  result->set = ggml_hash_set_new(size);
15107
- result->vals = malloc(sizeof(struct ggml_tensor *) * result->set.size);
15288
+ result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size);
15108
15289
  memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
15109
15290
  return result;
15110
15291
  }
15111
15292
 
15112
15293
  static void ggml_hash_map_free(struct hash_map * map) {
15113
15294
  ggml_hash_set_free(map->set);
15114
- free(map->vals);
15115
- free(map);
15295
+ GGML_FREE(map->vals);
15296
+ GGML_FREE(map);
15116
15297
  }
15117
15298
 
15118
15299
  // gradient checkpointing
@@ -16597,7 +16778,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
16597
16778
  } break;
16598
16779
  case GGML_OP_SOFT_MAX:
16599
16780
  {
16600
- n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
16781
+ n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
16601
16782
  } break;
16602
16783
  case GGML_OP_CONV_TRANSPOSE_1D:
16603
16784
  {
@@ -16890,12 +17071,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
16890
17071
  struct ggml_cplan cplan;
16891
17072
  memset(&cplan, 0, sizeof(struct ggml_cplan));
16892
17073
 
17074
+ int max_tasks = 1;
17075
+
16893
17076
  // thread scheduling for the different operations + work buffer size estimation
16894
17077
  for (int i = 0; i < cgraph->n_nodes; i++) {
16895
17078
  struct ggml_tensor * node = cgraph->nodes[i];
16896
17079
 
16897
17080
  const int n_tasks = ggml_get_n_tasks(node, n_threads);
16898
17081
 
17082
+ max_tasks = MAX(max_tasks, n_tasks);
17083
+
16899
17084
  size_t cur = 0;
16900
17085
 
16901
17086
  switch (node->op) {
@@ -17062,7 +17247,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
17062
17247
  work_size += CACHE_LINE_SIZE*(n_threads - 1);
17063
17248
  }
17064
17249
 
17065
- cplan.n_threads = n_threads;
17250
+ cplan.n_threads = MIN(max_tasks, n_threads);
17066
17251
  cplan.work_size = work_size;
17067
17252
  cplan.work_data = NULL;
17068
17253
 
@@ -17079,6 +17264,17 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
17079
17264
  }
17080
17265
  }
17081
17266
 
17267
+ #ifdef GGML_USE_VULKAN
17268
+ for (int i = 0; i < cgraph->n_nodes; i++) {
17269
+ ggml_vk_preallocate_buffers_graph(cgraph->nodes[i]);
17270
+ }
17271
+ ggml_vk_preallocate_buffers();
17272
+
17273
+ for (int i = 0; i < cgraph->n_nodes; i++) {
17274
+ ggml_vk_build_graph(cgraph->nodes[i], i == cgraph->n_nodes - 1);
17275
+ }
17276
+ #endif
17277
+
17082
17278
  const int n_threads = cplan->n_threads;
17083
17279
 
17084
17280
  struct ggml_compute_state_shared state_shared = {
@@ -17130,6 +17326,10 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
17130
17326
  }
17131
17327
  }
17132
17328
 
17329
+ #ifdef GGML_USE_VULKAN
17330
+ ggml_vk_graph_cleanup();
17331
+ #endif
17332
+
17133
17333
  // performance stats (graph)
17134
17334
  {
17135
17335
  int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles;
@@ -18770,6 +18970,7 @@ void ggml_quantize_init(enum ggml_type type) {
18770
18970
  switch (type) {
18771
18971
  case GGML_TYPE_IQ2_XXS: iq2xs_init_impl(256); break;
18772
18972
  case GGML_TYPE_IQ2_XS: iq2xs_init_impl(512); break;
18973
+ case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
18773
18974
  default: // nothing
18774
18975
  break;
18775
18976
  }
@@ -19032,6 +19233,15 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
19032
19233
  result = quantize_iq2_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19033
19234
  GGML_ASSERT(result == row_size * nrows);
19034
19235
  } break;
19236
+ case GGML_TYPE_IQ3_XXS:
19237
+ {
19238
+ GGML_ASSERT(start % QK_K == 0);
19239
+ GGML_ASSERT(start % n_per_row == 0);
19240
+ size_t start_row = start / n_per_row;
19241
+ size_t row_size = ggml_row_size(type, n_per_row);
19242
+ result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
19243
+ GGML_ASSERT(result == row_size * nrows);
19244
+ } break;
19035
19245
  case GGML_TYPE_F16:
19036
19246
  {
19037
19247
  size_t elemsize = sizeof(ggml_fp16_t);
@@ -19158,6 +19368,25 @@ struct gguf_context {
19158
19368
  void * data;
19159
19369
  };
19160
19370
 
19371
+ static size_t gguf_type_size(enum gguf_type type) {
19372
+ GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
19373
+ return GGUF_TYPE_SIZE[type];
19374
+ }
19375
+
19376
+ static void gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
19377
+ GGML_ASSERT(info->n_dims <= GGML_MAX_DIMS);
19378
+ GGML_ASSERT(0 <= info->type && info->type < GGML_TYPE_COUNT);
19379
+
19380
+ for (uint32_t i = 0; i < info->n_dims; ++i) {
19381
+ GGML_ASSERT(info->ne[i] > 0);
19382
+ }
19383
+
19384
+ // prevent overflow for total number of elements
19385
+ GGML_ASSERT(INT64_MAX/info->ne[1] > info->ne[0]);
19386
+ GGML_ASSERT(INT64_MAX/info->ne[2] > info->ne[0]*info->ne[1]);
19387
+ GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
19388
+ }
19389
+
19161
19390
  static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
19162
19391
  const size_t n = fread(dst, 1, size, file);
19163
19392
  *offset += n;
@@ -19170,8 +19399,17 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
19170
19399
 
19171
19400
  bool ok = true;
19172
19401
 
19173
- ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
19174
- ok = ok && gguf_fread_el(file, p->data, p->n, offset);
19402
+ ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
19403
+
19404
+ // early exit if string length is invalid, prevents from integer overflow
19405
+ if (p->n == SIZE_MAX) {
19406
+ fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
19407
+ return false;
19408
+ }
19409
+
19410
+ p->data = GGML_CALLOC(p->n + 1, 1);
19411
+
19412
+ ok = ok && gguf_fread_el(file, p->data, p->n, offset);
19175
19413
 
19176
19414
  return ok;
19177
19415
  }
@@ -19243,6 +19481,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19243
19481
  return NULL;
19244
19482
  }
19245
19483
 
19484
+ // sanity-checks to prevent from integer/buffer overflows
19485
+
19486
+ ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info));
19487
+ ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead());
19488
+ ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct gguf_kv));
19489
+
19246
19490
  if (!ok) {
19247
19491
  fprintf(stderr, "%s: failed to read header\n", __func__);
19248
19492
  fclose(file);
@@ -19253,7 +19497,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19253
19497
 
19254
19498
  // read the kv pairs
19255
19499
  {
19256
- ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
19500
+ ctx->kv = GGML_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
19257
19501
 
19258
19502
  for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
19259
19503
  struct gguf_kv * kv = &ctx->kv[i];
@@ -19281,7 +19525,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19281
19525
  case GGUF_TYPE_ARRAY:
19282
19526
  {
19283
19527
  ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
19284
- ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
19528
+ ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
19285
19529
 
19286
19530
  switch (kv->value.arr.type) {
19287
19531
  case GGUF_TYPE_UINT8:
@@ -19296,21 +19540,39 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19296
19540
  case GGUF_TYPE_FLOAT64:
19297
19541
  case GGUF_TYPE_BOOL:
19298
19542
  {
19299
- kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
19300
- ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
19543
+ // prevent from integer overflow in the malloc below
19544
+ if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
19545
+ fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
19546
+ fclose(file);
19547
+ gguf_free(ctx);
19548
+ return NULL;
19549
+ }
19550
+
19551
+ kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * gguf_type_size(kv->value.arr.type));
19552
+
19553
+ ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
19301
19554
  } break;
19302
19555
  case GGUF_TYPE_STRING:
19303
19556
  {
19304
- kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
19557
+ // prevent from integer overflow in the malloc below
19558
+ if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
19559
+ fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
19560
+ fclose(file);
19561
+ gguf_free(ctx);
19562
+ return NULL;
19563
+ }
19564
+
19565
+ kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * sizeof(struct gguf_str));
19566
+
19305
19567
  for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
19306
19568
  ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
19307
19569
  }
19308
19570
  } break;
19309
19571
  case GGUF_TYPE_ARRAY:
19310
- case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
19572
+ default: GGML_ASSERT(false && "invalid type"); break;
19311
19573
  }
19312
19574
  } break;
19313
- case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
19575
+ default: GGML_ASSERT(false && "invalid type");
19314
19576
  }
19315
19577
 
19316
19578
  if (!ok) {
@@ -19328,7 +19590,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19328
19590
 
19329
19591
  // read the tensor infos
19330
19592
  {
19331
- ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
19593
+ ctx->infos = GGML_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
19332
19594
 
19333
19595
  for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
19334
19596
  struct gguf_tensor_info * info = &ctx->infos[i];
@@ -19339,12 +19601,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
19339
19601
 
19340
19602
  ok = ok && gguf_fread_str(file, &info->name, &offset);
19341
19603
  ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
19604
+
19605
+ ok = ok && (info->n_dims <= GGML_MAX_DIMS);
19606
+
19342
19607
  for (uint32_t j = 0; j < info->n_dims; ++j) {
19343
19608
  ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
19344
19609
  }
19610
+
19345
19611
  ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
19346
19612
  ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
19347
19613
 
19614
+ gguf_tensor_info_sanitize(info);
19615
+
19348
19616
  if (!ok) {
19349
19617
  fprintf(stderr, "%s: failed to read tensor info\n", __func__);
19350
19618
  fclose(file);
@@ -19498,12 +19766,12 @@ void gguf_free(struct gguf_context * ctx) {
19498
19766
  struct gguf_kv * kv = &ctx->kv[i];
19499
19767
 
19500
19768
  if (kv->key.data) {
19501
- free(kv->key.data);
19769
+ GGML_FREE(kv->key.data);
19502
19770
  }
19503
19771
 
19504
19772
  if (kv->type == GGUF_TYPE_STRING) {
19505
19773
  if (kv->value.str.data) {
19506
- free(kv->value.str.data);
19774
+ GGML_FREE(kv->value.str.data);
19507
19775
  }
19508
19776
  }
19509
19777
 
@@ -19513,16 +19781,16 @@ void gguf_free(struct gguf_context * ctx) {
19513
19781
  for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
19514
19782
  struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
19515
19783
  if (str->data) {
19516
- free(str->data);
19784
+ GGML_FREE(str->data);
19517
19785
  }
19518
19786
  }
19519
19787
  }
19520
- free(kv->value.arr.data);
19788
+ GGML_FREE(kv->value.arr.data);
19521
19789
  }
19522
19790
  }
19523
19791
  }
19524
19792
 
19525
- free(ctx->kv);
19793
+ GGML_FREE(ctx->kv);
19526
19794
  }
19527
19795
 
19528
19796
  if (ctx->infos) {
@@ -19530,11 +19798,11 @@ void gguf_free(struct gguf_context * ctx) {
19530
19798
  struct gguf_tensor_info * info = &ctx->infos[i];
19531
19799
 
19532
19800
  if (info->name.data) {
19533
- free(info->name.data);
19801
+ GGML_FREE(info->name.data);
19534
19802
  }
19535
19803
  }
19536
19804
 
19537
- free(ctx->infos);
19805
+ GGML_FREE(ctx->infos);
19538
19806
  }
19539
19807
 
19540
19808
  GGML_ALIGNED_FREE(ctx);
@@ -19835,8 +20103,8 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
19835
20103
  ctx->kv[idx].type = GGUF_TYPE_ARRAY;
19836
20104
  ctx->kv[idx].value.arr.type = type;
19837
20105
  ctx->kv[idx].value.arr.n = n;
19838
- ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]);
19839
- memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]);
20106
+ ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
20107
+ memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
19840
20108
  }
19841
20109
 
19842
20110
  void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
@@ -19845,7 +20113,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
19845
20113
  ctx->kv[idx].type = GGUF_TYPE_ARRAY;
19846
20114
  ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
19847
20115
  ctx->kv[idx].value.arr.n = n;
19848
- ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
20116
+ ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
19849
20117
  for (int i = 0; i < n; i++) {
19850
20118
  struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
19851
20119
  str->n = strlen(data[i]);
@@ -19872,19 +20140,19 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
19872
20140
  case GGUF_TYPE_ARRAY:
19873
20141
  {
19874
20142
  if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
19875
- const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
20143
+ const char ** data = GGML_MALLOC(src->kv[i].value.arr.n*sizeof(char *));
19876
20144
  for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
19877
20145
  data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
19878
20146
  }
19879
20147
  gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
19880
- free((void *)data);
20148
+ GGML_FREE((void *)data);
19881
20149
  } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
19882
20150
  GGML_ASSERT(false && "nested arrays not supported");
19883
20151
  } else {
19884
20152
  gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
19885
20153
  }
19886
20154
  } break;
19887
- case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
20155
+ default: GGML_ASSERT(false && "invalid type"); break;
19888
20156
  }
19889
20157
  }
19890
20158
  }
@@ -19960,7 +20228,7 @@ struct gguf_buf {
19960
20228
 
19961
20229
  static struct gguf_buf gguf_buf_init(size_t size) {
19962
20230
  struct gguf_buf buf = {
19963
- /*buf.data =*/ size == 0 ? NULL : malloc(size),
20231
+ /*buf.data =*/ size == 0 ? NULL : GGML_MALLOC(size),
19964
20232
  /*buf.size =*/ size,
19965
20233
  /*buf.offset =*/ 0,
19966
20234
  };
@@ -19970,7 +20238,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
19970
20238
 
19971
20239
  static void gguf_buf_free(struct gguf_buf buf) {
19972
20240
  if (buf.data) {
19973
- free(buf.data);
20241
+ GGML_FREE(buf.data);
19974
20242
  }
19975
20243
  }
19976
20244
 
@@ -20051,7 +20319,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
20051
20319
  case GGUF_TYPE_FLOAT64:
20052
20320
  case GGUF_TYPE_BOOL:
20053
20321
  {
20054
- gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
20322
+ gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type));
20055
20323
  } break;
20056
20324
  case GGUF_TYPE_STRING:
20057
20325
  {
@@ -20060,10 +20328,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
20060
20328
  }
20061
20329
  } break;
20062
20330
  case GGUF_TYPE_ARRAY:
20063
- case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
20331
+ default: GGML_ASSERT(false && "invalid type"); break;
20064
20332
  }
20065
20333
  } break;
20066
- case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
20334
+ default: GGML_ASSERT(false && "invalid type");
20067
20335
  }
20068
20336
  }
20069
20337
 
@@ -20264,7 +20532,7 @@ int ggml_cpu_has_wasm_simd(void) {
20264
20532
  }
20265
20533
 
20266
20534
  int ggml_cpu_has_blas(void) {
20267
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
20535
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
20268
20536
  return 1;
20269
20537
  #else
20270
20538
  return 0;
@@ -20287,8 +20555,33 @@ int ggml_cpu_has_clblast(void) {
20287
20555
  #endif
20288
20556
  }
20289
20557
 
20558
+ int ggml_cpu_has_vulkan(void) {
20559
+ #if defined(GGML_USE_VULKAN)
20560
+ return 1;
20561
+ #else
20562
+ return 0;
20563
+ #endif
20564
+ }
20565
+
20566
+ int ggml_cpu_has_kompute(void) {
20567
+ #if defined(GGML_USE_KOMPUTE)
20568
+ return 1;
20569
+ #else
20570
+ return 0;
20571
+ #endif
20572
+ }
20573
+
20574
+ int ggml_cpu_has_sycl(void) {
20575
+ #if defined(GGML_USE_SYCL)
20576
+ return 1;
20577
+ #else
20578
+ return 0;
20579
+ #endif
20580
+ }
20581
+
20290
20582
  int ggml_cpu_has_gpublas(void) {
20291
- return ggml_cpu_has_cublas() || ggml_cpu_has_clblast();
20583
+ return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
20584
+ ggml_cpu_has_sycl();
20292
20585
  }
20293
20586
 
20294
20587
  int ggml_cpu_has_sse3(void) {