llama_cpp 0.11.1 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
87
87
 
88
88
  // same as ggml_graph_compute but uses Metal
89
89
  // creates gf->n_threads command buffers in parallel
90
- void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
90
+ bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
91
91
 
92
92
  //
93
93
  // backend API
@@ -88,6 +88,8 @@ struct ggml_metal_context {
88
88
  GGML_METAL_DECL_KERNEL(get_rows_q5_K);
89
89
  GGML_METAL_DECL_KERNEL(get_rows_q6_K);
90
90
  GGML_METAL_DECL_KERNEL(get_rows_i32);
91
+ GGML_METAL_DECL_KERNEL(get_rows_iq2_xxs);
92
+ GGML_METAL_DECL_KERNEL(get_rows_iq2_xs);
91
93
  GGML_METAL_DECL_KERNEL(rms_norm);
92
94
  GGML_METAL_DECL_KERNEL(group_norm);
93
95
  GGML_METAL_DECL_KERNEL(norm);
@@ -106,6 +108,8 @@ struct ggml_metal_context {
106
108
  GGML_METAL_DECL_KERNEL(mul_mv_q4_K_f32);
107
109
  GGML_METAL_DECL_KERNEL(mul_mv_q5_K_f32);
108
110
  GGML_METAL_DECL_KERNEL(mul_mv_q6_K_f32);
111
+ GGML_METAL_DECL_KERNEL(mul_mv_iq2_xxs_f32);
112
+ GGML_METAL_DECL_KERNEL(mul_mv_iq2_xs_f32);
109
113
  GGML_METAL_DECL_KERNEL(mul_mv_id_f32_f32);
110
114
  //GGML_METAL_DECL_KERNEL(mul_mv_id_f16_f16);
111
115
  GGML_METAL_DECL_KERNEL(mul_mv_id_f16_f32);
@@ -121,6 +125,8 @@ struct ggml_metal_context {
121
125
  GGML_METAL_DECL_KERNEL(mul_mv_id_q4_K_f32);
122
126
  GGML_METAL_DECL_KERNEL(mul_mv_id_q5_K_f32);
123
127
  GGML_METAL_DECL_KERNEL(mul_mv_id_q6_K_f32);
128
+ GGML_METAL_DECL_KERNEL(mul_mv_id_iq2_xxs_f32);
129
+ GGML_METAL_DECL_KERNEL(mul_mv_id_iq2_xs_f32);
124
130
  GGML_METAL_DECL_KERNEL(mul_mm_f32_f32);
125
131
  GGML_METAL_DECL_KERNEL(mul_mm_f16_f32);
126
132
  GGML_METAL_DECL_KERNEL(mul_mm_q4_0_f32);
@@ -133,6 +139,8 @@ struct ggml_metal_context {
133
139
  GGML_METAL_DECL_KERNEL(mul_mm_q4_K_f32);
134
140
  GGML_METAL_DECL_KERNEL(mul_mm_q5_K_f32);
135
141
  GGML_METAL_DECL_KERNEL(mul_mm_q6_K_f32);
142
+ GGML_METAL_DECL_KERNEL(mul_mm_iq2_xxs_f32);
143
+ GGML_METAL_DECL_KERNEL(mul_mm_iq2_xs_f32);
136
144
  GGML_METAL_DECL_KERNEL(mul_mm_id_f32_f32);
137
145
  GGML_METAL_DECL_KERNEL(mul_mm_id_f16_f32);
138
146
  GGML_METAL_DECL_KERNEL(mul_mm_id_q4_0_f32);
@@ -145,6 +153,8 @@ struct ggml_metal_context {
145
153
  GGML_METAL_DECL_KERNEL(mul_mm_id_q4_K_f32);
146
154
  GGML_METAL_DECL_KERNEL(mul_mm_id_q5_K_f32);
147
155
  GGML_METAL_DECL_KERNEL(mul_mm_id_q6_K_f32);
156
+ GGML_METAL_DECL_KERNEL(mul_mm_id_iq2_xxs_f32);
157
+ GGML_METAL_DECL_KERNEL(mul_mm_id_iq2_xs_f32);
148
158
  GGML_METAL_DECL_KERNEL(rope_f32);
149
159
  GGML_METAL_DECL_KERNEL(rope_f16);
150
160
  GGML_METAL_DECL_KERNEL(alibi_f32);
@@ -258,14 +268,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
258
268
  bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
259
269
  #endif
260
270
  NSError * error = nil;
261
- NSString * libPath = [bundle pathForResource:@"ggml" ofType:@"metallib"];
271
+ NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"];
262
272
  if (libPath != nil) {
263
273
  // pre-compiled library found
264
274
  NSURL * libURL = [NSURL fileURLWithPath:libPath];
265
275
  GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
266
276
  ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
267
277
  } else {
268
- GGML_METAL_LOG_INFO("%s: ggml.metallib not found, loading from source\n", __func__);
278
+ GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
269
279
 
270
280
  NSString * sourcePath;
271
281
  NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
@@ -295,7 +305,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
295
305
  #endif
296
306
  // try to disable fast-math
297
307
  // NOTE: this seems to have no effect whatsoever
298
- // instead, in order to disable fast-math, we have to build ggml.metallib from the command line
308
+ // instead, in order to disable fast-math, we have to build default.metallib from the command line
299
309
  // using xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
300
310
  // and go through the "pre-compiled library found" path above
301
311
  //[options setFastMathEnabled:false];
@@ -379,6 +389,8 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
379
389
  GGML_METAL_ADD_KERNEL(get_rows_q5_K);
380
390
  GGML_METAL_ADD_KERNEL(get_rows_q6_K);
381
391
  GGML_METAL_ADD_KERNEL(get_rows_i32);
392
+ GGML_METAL_ADD_KERNEL(get_rows_iq2_xxs);
393
+ GGML_METAL_ADD_KERNEL(get_rows_iq2_xs);
382
394
  GGML_METAL_ADD_KERNEL(rms_norm);
383
395
  GGML_METAL_ADD_KERNEL(group_norm);
384
396
  GGML_METAL_ADD_KERNEL(norm);
@@ -397,6 +409,8 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
397
409
  GGML_METAL_ADD_KERNEL(mul_mv_q4_K_f32);
398
410
  GGML_METAL_ADD_KERNEL(mul_mv_q5_K_f32);
399
411
  GGML_METAL_ADD_KERNEL(mul_mv_q6_K_f32);
412
+ GGML_METAL_ADD_KERNEL(mul_mv_iq2_xxs_f32);
413
+ GGML_METAL_ADD_KERNEL(mul_mv_iq2_xs_f32);
400
414
  GGML_METAL_ADD_KERNEL(mul_mv_id_f32_f32);
401
415
  //GGML_METAL_ADD_KERNEL(mul_mv_id_f16_f16);
402
416
  GGML_METAL_ADD_KERNEL(mul_mv_id_f16_f32);
@@ -412,6 +426,8 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
412
426
  GGML_METAL_ADD_KERNEL(mul_mv_id_q4_K_f32);
413
427
  GGML_METAL_ADD_KERNEL(mul_mv_id_q5_K_f32);
414
428
  GGML_METAL_ADD_KERNEL(mul_mv_id_q6_K_f32);
429
+ GGML_METAL_ADD_KERNEL(mul_mv_id_iq2_xxs_f32);
430
+ GGML_METAL_ADD_KERNEL(mul_mv_id_iq2_xs_f32);
415
431
  if ([ctx->device supportsFamily:MTLGPUFamilyApple7]) {
416
432
  GGML_METAL_ADD_KERNEL(mul_mm_f32_f32);
417
433
  GGML_METAL_ADD_KERNEL(mul_mm_f16_f32);
@@ -425,6 +441,8 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
425
441
  GGML_METAL_ADD_KERNEL(mul_mm_q4_K_f32);
426
442
  GGML_METAL_ADD_KERNEL(mul_mm_q5_K_f32);
427
443
  GGML_METAL_ADD_KERNEL(mul_mm_q6_K_f32);
444
+ GGML_METAL_ADD_KERNEL(mul_mm_iq2_xxs_f32);
445
+ GGML_METAL_ADD_KERNEL(mul_mm_iq2_xs_f32);
428
446
  GGML_METAL_ADD_KERNEL(mul_mm_id_f32_f32);
429
447
  GGML_METAL_ADD_KERNEL(mul_mm_id_f16_f32);
430
448
  GGML_METAL_ADD_KERNEL(mul_mm_id_q4_0_f32);
@@ -437,6 +455,8 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
437
455
  GGML_METAL_ADD_KERNEL(mul_mm_id_q4_K_f32);
438
456
  GGML_METAL_ADD_KERNEL(mul_mm_id_q5_K_f32);
439
457
  GGML_METAL_ADD_KERNEL(mul_mm_id_q6_K_f32);
458
+ GGML_METAL_ADD_KERNEL(mul_mm_id_iq2_xxs_f32);
459
+ GGML_METAL_ADD_KERNEL(mul_mm_id_iq2_xs_f32);
440
460
  }
441
461
  GGML_METAL_ADD_KERNEL(rope_f32);
442
462
  GGML_METAL_ADD_KERNEL(rope_f16);
@@ -502,6 +522,8 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
502
522
  GGML_METAL_DEL_KERNEL(get_rows_q5_K);
503
523
  GGML_METAL_DEL_KERNEL(get_rows_q6_K);
504
524
  GGML_METAL_DEL_KERNEL(get_rows_i32);
525
+ GGML_METAL_DEL_KERNEL(get_rows_iq2_xxs);
526
+ GGML_METAL_DEL_KERNEL(get_rows_iq2_xs);
505
527
  GGML_METAL_DEL_KERNEL(rms_norm);
506
528
  GGML_METAL_DEL_KERNEL(group_norm);
507
529
  GGML_METAL_DEL_KERNEL(norm);
@@ -520,6 +542,8 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
520
542
  GGML_METAL_DEL_KERNEL(mul_mv_q4_K_f32);
521
543
  GGML_METAL_DEL_KERNEL(mul_mv_q5_K_f32);
522
544
  GGML_METAL_DEL_KERNEL(mul_mv_q6_K_f32);
545
+ GGML_METAL_DEL_KERNEL(mul_mv_iq2_xxs_f32);
546
+ GGML_METAL_DEL_KERNEL(mul_mv_iq2_xs_f32);
523
547
  GGML_METAL_DEL_KERNEL(mul_mv_id_f32_f32);
524
548
  //GGML_METAL_DEL_KERNEL(mul_mv_id_f16_f16);
525
549
  GGML_METAL_DEL_KERNEL(mul_mv_id_f16_f32);
@@ -535,6 +559,8 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
535
559
  GGML_METAL_DEL_KERNEL(mul_mv_id_q4_K_f32);
536
560
  GGML_METAL_DEL_KERNEL(mul_mv_id_q5_K_f32);
537
561
  GGML_METAL_DEL_KERNEL(mul_mv_id_q6_K_f32);
562
+ GGML_METAL_DEL_KERNEL(mul_mv_id_iq2_xxs_f32);
563
+ GGML_METAL_DEL_KERNEL(mul_mv_id_iq2_xs_f32);
538
564
  if ([ctx->device supportsFamily:MTLGPUFamilyApple7]) {
539
565
  GGML_METAL_DEL_KERNEL(mul_mm_f32_f32);
540
566
  GGML_METAL_DEL_KERNEL(mul_mm_f16_f32);
@@ -548,6 +574,8 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
548
574
  GGML_METAL_DEL_KERNEL(mul_mm_q4_K_f32);
549
575
  GGML_METAL_DEL_KERNEL(mul_mm_q5_K_f32);
550
576
  GGML_METAL_DEL_KERNEL(mul_mm_q6_K_f32);
577
+ GGML_METAL_DEL_KERNEL(mul_mm_iq2_xxs_f32);
578
+ GGML_METAL_DEL_KERNEL(mul_mm_iq2_xs_f32);
551
579
  GGML_METAL_DEL_KERNEL(mul_mm_id_f32_f32);
552
580
  GGML_METAL_DEL_KERNEL(mul_mm_id_f16_f32);
553
581
  GGML_METAL_DEL_KERNEL(mul_mm_id_q4_0_f32);
@@ -560,6 +588,8 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
560
588
  GGML_METAL_DEL_KERNEL(mul_mm_id_q4_K_f32);
561
589
  GGML_METAL_DEL_KERNEL(mul_mm_id_q5_K_f32);
562
590
  GGML_METAL_DEL_KERNEL(mul_mm_id_q6_K_f32);
591
+ GGML_METAL_DEL_KERNEL(mul_mm_id_iq2_xxs_f32);
592
+ GGML_METAL_DEL_KERNEL(mul_mm_id_iq2_xs_f32);
563
593
  }
564
594
  GGML_METAL_DEL_KERNEL(rope_f32);
565
595
  GGML_METAL_DEL_KERNEL(rope_f16);
@@ -977,7 +1007,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) {
977
1007
  return false;
978
1008
  }
979
1009
  }
980
- void ggml_metal_graph_compute(
1010
+ bool ggml_metal_graph_compute(
981
1011
  struct ggml_metal_context * ctx,
982
1012
  struct ggml_cgraph * gf) {
983
1013
  @autoreleasepool {
@@ -1052,6 +1082,10 @@ void ggml_metal_graph_compute(
1052
1082
  GGML_ASSERT(!"unsupported op");
1053
1083
  }
1054
1084
 
1085
+ #ifndef GGML_METAL_NDEBUG
1086
+ [encoder pushDebugGroup:[NSString stringWithCString:ggml_op_desc(dst) encoding:NSUTF8StringEncoding]];
1087
+ #endif
1088
+
1055
1089
  const int64_t ne00 = src0 ? src0->ne[0] : 0;
1056
1090
  const int64_t ne01 = src0 ? src0->ne[1] : 0;
1057
1091
  const int64_t ne02 = src0 ? src0->ne[2] : 0;
@@ -1541,6 +1575,8 @@ void ggml_metal_graph_compute(
1541
1575
  case GGML_TYPE_Q4_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_K_f32]; break;
1542
1576
  case GGML_TYPE_Q5_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q5_K_f32]; break;
1543
1577
  case GGML_TYPE_Q6_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q6_K_f32]; break;
1578
+ case GGML_TYPE_IQ2_XXS: [encoder setComputePipelineState:ctx->pipeline_mul_mm_iq2_xxs_f32]; break;
1579
+ case GGML_TYPE_IQ2_XS : [encoder setComputePipelineState:ctx->pipeline_mul_mm_iq2_xs_f32]; break;
1544
1580
  default: GGML_ASSERT(false && "MUL MAT-MAT not implemented");
1545
1581
  }
1546
1582
  [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
@@ -1653,6 +1689,18 @@ void ggml_metal_graph_compute(
1653
1689
  nth1 = 32;
1654
1690
  [encoder setComputePipelineState:ctx->pipeline_mul_mv_q6_K_f32];
1655
1691
  } break;
1692
+ case GGML_TYPE_IQ2_XXS:
1693
+ {
1694
+ nth0 = 4;
1695
+ nth1 = 16;
1696
+ [encoder setComputePipelineState:ctx->pipeline_mul_mv_iq2_xxs_f32];
1697
+ } break;
1698
+ case GGML_TYPE_IQ2_XS:
1699
+ {
1700
+ nth0 = 4;
1701
+ nth1 = 16;
1702
+ [encoder setComputePipelineState:ctx->pipeline_mul_mv_iq2_xs_f32];
1703
+ } break;
1656
1704
  default:
1657
1705
  {
1658
1706
  GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
@@ -1689,6 +1737,11 @@ void ggml_metal_graph_compute(
1689
1737
  src0t == GGML_TYPE_Q2_K) { // || src0t == GGML_TYPE_Q4_K) {
1690
1738
  [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7)/8, ne11, ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
1691
1739
  }
1740
+ else if (src0t == GGML_TYPE_IQ2_XXS || src0t == GGML_TYPE_IQ2_XS) {
1741
+ const int mem_size = src0t == GGML_TYPE_IQ2_XXS ? 256*8+128 : 512*8+128;
1742
+ [encoder setThreadgroupMemoryLength:mem_size atIndex:0];
1743
+ [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7)/8, ne11, ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
1744
+ }
1692
1745
  else if (src0t == GGML_TYPE_Q4_K) {
1693
1746
  [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3)/4, ne11, ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
1694
1747
  }
@@ -1778,6 +1831,8 @@ void ggml_metal_graph_compute(
1778
1831
  case GGML_TYPE_Q4_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_id_q4_K_f32]; break;
1779
1832
  case GGML_TYPE_Q5_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_id_q5_K_f32]; break;
1780
1833
  case GGML_TYPE_Q6_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_id_q6_K_f32]; break;
1834
+ case GGML_TYPE_IQ2_XXS: [encoder setComputePipelineState:ctx->pipeline_mul_mm_id_iq2_xxs_f32]; break;
1835
+ case GGML_TYPE_IQ2_XS : [encoder setComputePipelineState:ctx->pipeline_mul_mm_id_iq2_xs_f32]; break;
1781
1836
  default: GGML_ASSERT(false && "MUL_MAT_ID not implemented");
1782
1837
  }
1783
1838
  [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
@@ -1893,6 +1948,18 @@ void ggml_metal_graph_compute(
1893
1948
  nth1 = 32;
1894
1949
  [encoder setComputePipelineState:ctx->pipeline_mul_mv_id_q6_K_f32];
1895
1950
  } break;
1951
+ case GGML_TYPE_IQ2_XXS:
1952
+ {
1953
+ nth0 = 4;
1954
+ nth1 = 16;
1955
+ [encoder setComputePipelineState:ctx->pipeline_mul_mv_id_iq2_xxs_f32];
1956
+ } break;
1957
+ case GGML_TYPE_IQ2_XS:
1958
+ {
1959
+ nth0 = 4;
1960
+ nth1 = 16;
1961
+ [encoder setComputePipelineState:ctx->pipeline_mul_mv_id_iq2_xs_f32];
1962
+ } break;
1896
1963
  default:
1897
1964
  {
1898
1965
  GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t);
@@ -1945,6 +2012,11 @@ void ggml_metal_graph_compute(
1945
2012
  src2t == GGML_TYPE_Q2_K) { // || src2t == GGML_TYPE_Q4_K) {
1946
2013
  [encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
1947
2014
  }
2015
+ else if (src2t == GGML_TYPE_IQ2_XXS || src2t == GGML_TYPE_IQ2_XS) {
2016
+ const int mem_size = src2t == GGML_TYPE_IQ2_XXS ? 256*8+128 : 512*8+128;
2017
+ [encoder setThreadgroupMemoryLength:mem_size atIndex:0];
2018
+ [encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
2019
+ }
1948
2020
  else if (src2t == GGML_TYPE_Q4_K) {
1949
2021
  [encoder dispatchThreadgroups:MTLSizeMake((ne21 + 3)/4, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
1950
2022
  }
@@ -1982,6 +2054,8 @@ void ggml_metal_graph_compute(
1982
2054
  case GGML_TYPE_Q5_K: [encoder setComputePipelineState:ctx->pipeline_get_rows_q5_K]; break;
1983
2055
  case GGML_TYPE_Q6_K: [encoder setComputePipelineState:ctx->pipeline_get_rows_q6_K]; break;
1984
2056
  case GGML_TYPE_I32: [encoder setComputePipelineState:ctx->pipeline_get_rows_i32]; break;
2057
+ case GGML_TYPE_IQ2_XXS: [encoder setComputePipelineState:ctx->pipeline_get_rows_iq2_xxs]; break;
2058
+ case GGML_TYPE_IQ2_XS : [encoder setComputePipelineState:ctx->pipeline_get_rows_iq2_xs]; break;
1985
2059
  default: GGML_ASSERT(false && "not implemented");
1986
2060
  }
1987
2061
 
@@ -2383,6 +2457,10 @@ void ggml_metal_graph_compute(
2383
2457
  GGML_ASSERT(false);
2384
2458
  }
2385
2459
  }
2460
+
2461
+ #ifndef GGML_METAL_NDEBUG
2462
+ [encoder popDebugGroup];
2463
+ #endif
2386
2464
  }
2387
2465
 
2388
2466
  if (encoder != nil) {
@@ -2405,10 +2483,11 @@ void ggml_metal_graph_compute(
2405
2483
  MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
2406
2484
  if (status != MTLCommandBufferStatusCompleted) {
2407
2485
  GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2408
- GGML_ASSERT(false);
2486
+ return false;
2409
2487
  }
2410
2488
  }
2411
2489
 
2490
+ return true;
2412
2491
  }
2413
2492
  }
2414
2493
 
@@ -2688,10 +2767,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm
2688
2767
  UNUSED(backend);
2689
2768
  }
2690
2769
 
2691
- static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2770
+ static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2692
2771
  struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
2693
2772
 
2694
- ggml_metal_graph_compute(metal_ctx, cgraph);
2773
+ return ggml_metal_graph_compute(metal_ctx, cgraph);
2695
2774
  }
2696
2775
 
2697
2776
  static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {