llama_cpp 0.14.7 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +2 -2
- data/ext/llama_cpp/extconf.rb +2 -1
- data/ext/llama_cpp/llama_cpp.cpp +59 -9
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +24 -3
- data/vendor/tmp/llama.cpp/Makefile +42 -18
- data/vendor/tmp/llama.cpp/ggml-backend.c +7 -5
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +295 -17
- data/vendor/tmp/llama.cpp/ggml-impl.h +78 -1
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +7 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +399 -184
- data/vendor/tmp/llama.cpp/ggml-metal.metal +654 -18
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +1 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +302 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +28 -16
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +46843 -39205
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +951 -263
- data/vendor/tmp/llama.cpp/ggml.c +1457 -92
- data/vendor/tmp/llama.cpp/ggml.h +37 -7
- data/vendor/tmp/llama.cpp/llama.cpp +671 -403
- data/vendor/tmp/llama.cpp/llama.h +34 -10
- data/vendor/tmp/llama.cpp/sgemm.cpp +134 -103
- data/vendor/tmp/llama.cpp/sgemm.h +4 -2
- data/vendor/tmp/llama.cpp/unicode-data.cpp +1188 -656
- data/vendor/tmp/llama.cpp/unicode-data.h +4 -3
- data/vendor/tmp/llama.cpp/unicode.cpp +590 -49
- data/vendor/tmp/llama.cpp/unicode.h +6 -3
- metadata +3 -3
@@ -46,8 +46,10 @@ enum ggml_metal_kernel_type {
|
|
46
46
|
GGML_METAL_KERNEL_TYPE_GELU_QUICK_4,
|
47
47
|
GGML_METAL_KERNEL_TYPE_SILU,
|
48
48
|
GGML_METAL_KERNEL_TYPE_SILU_4,
|
49
|
-
|
50
|
-
|
49
|
+
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16,
|
50
|
+
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4,
|
51
|
+
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32,
|
52
|
+
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32_4,
|
51
53
|
GGML_METAL_KERNEL_TYPE_DIAG_MASK_INF,
|
52
54
|
GGML_METAL_KERNEL_TYPE_DIAG_MASK_INF_8,
|
53
55
|
GGML_METAL_KERNEL_TYPE_GET_ROWS_F32,
|
@@ -177,6 +179,14 @@ enum ggml_metal_kernel_type {
|
|
177
179
|
GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC,
|
178
180
|
GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC,
|
179
181
|
GGML_METAL_KERNEL_TYPE_LEAKY_RELU_F32,
|
182
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H64,
|
183
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H80,
|
184
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H96,
|
185
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H112,
|
186
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H128,
|
187
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H256,
|
188
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H128,
|
189
|
+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H256,
|
180
190
|
GGML_METAL_KERNEL_TYPE_CPY_F32_F16,
|
181
191
|
GGML_METAL_KERNEL_TYPE_CPY_F32_F32,
|
182
192
|
GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0,
|
@@ -255,11 +265,20 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
|
|
255
265
|
|
256
266
|
static void * ggml_metal_host_malloc(size_t n) {
|
257
267
|
void * data = NULL;
|
268
|
+
|
269
|
+
#if TARGET_OS_OSX
|
270
|
+
kern_return_t err = vm_allocate((vm_map_t) mach_task_self(), (void *) &data, n, VM_FLAGS_ANYWHERE);
|
271
|
+
if (err != KERN_SUCCESS) {
|
272
|
+
GGML_METAL_LOG_ERROR("%s: error: vm_allocate failed\n", __func__);
|
273
|
+
return NULL;
|
274
|
+
}
|
275
|
+
#else
|
258
276
|
const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
|
259
277
|
if (result != 0) {
|
260
278
|
GGML_METAL_LOG_ERROR("%s: error: posix_memalign failed\n", __func__);
|
261
279
|
return NULL;
|
262
280
|
}
|
281
|
+
#endif
|
263
282
|
|
264
283
|
return data;
|
265
284
|
}
|
@@ -443,7 +462,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
443
462
|
}
|
444
463
|
|
445
464
|
/*
|
446
|
-
GGML_METAL_LOG_INFO("%s: loaded %-
|
465
|
+
GGML_METAL_LOG_INFO("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) kernel->pipeline, \
|
447
466
|
(int) kernel->pipeline.maxTotalThreadsPerThreadgroup, \
|
448
467
|
(int) kernel->pipeline.threadExecutionWidth); \
|
449
468
|
*/
|
@@ -459,172 +478,182 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
459
478
|
return NULL; \
|
460
479
|
} \
|
461
480
|
} else { \
|
462
|
-
GGML_METAL_LOG_WARN("%s: skipping %-
|
481
|
+
GGML_METAL_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_"#name); \
|
463
482
|
}
|
464
483
|
|
465
484
|
// simd_sum and simd_max requires MTLGPUFamilyApple7
|
466
485
|
|
467
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ADD,
|
468
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ADD_ROW,
|
469
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL,
|
470
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_ROW,
|
471
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIV,
|
472
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIV_ROW,
|
473
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE,
|
474
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE_4,
|
475
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CLAMP,
|
476
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TANH,
|
477
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RELU,
|
478
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU,
|
479
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_4,
|
480
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK,
|
481
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK_4,
|
482
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU,
|
483
|
-
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU_4,
|
484
|
-
GGML_METAL_ADD_KERNEL(
|
485
|
-
GGML_METAL_ADD_KERNEL(
|
486
|
-
GGML_METAL_ADD_KERNEL(
|
487
|
-
GGML_METAL_ADD_KERNEL(
|
488
|
-
GGML_METAL_ADD_KERNEL(
|
489
|
-
GGML_METAL_ADD_KERNEL(
|
490
|
-
GGML_METAL_ADD_KERNEL(
|
491
|
-
GGML_METAL_ADD_KERNEL(
|
492
|
-
GGML_METAL_ADD_KERNEL(
|
493
|
-
GGML_METAL_ADD_KERNEL(
|
494
|
-
GGML_METAL_ADD_KERNEL(
|
495
|
-
GGML_METAL_ADD_KERNEL(
|
496
|
-
GGML_METAL_ADD_KERNEL(
|
497
|
-
GGML_METAL_ADD_KERNEL(
|
498
|
-
GGML_METAL_ADD_KERNEL(
|
499
|
-
GGML_METAL_ADD_KERNEL(
|
500
|
-
GGML_METAL_ADD_KERNEL(
|
501
|
-
GGML_METAL_ADD_KERNEL(
|
502
|
-
GGML_METAL_ADD_KERNEL(
|
503
|
-
GGML_METAL_ADD_KERNEL(
|
504
|
-
GGML_METAL_ADD_KERNEL(
|
505
|
-
GGML_METAL_ADD_KERNEL(
|
506
|
-
GGML_METAL_ADD_KERNEL(
|
507
|
-
GGML_METAL_ADD_KERNEL(
|
508
|
-
GGML_METAL_ADD_KERNEL(
|
509
|
-
GGML_METAL_ADD_KERNEL(
|
510
|
-
GGML_METAL_ADD_KERNEL(
|
511
|
-
GGML_METAL_ADD_KERNEL(
|
512
|
-
GGML_METAL_ADD_KERNEL(
|
513
|
-
GGML_METAL_ADD_KERNEL(
|
514
|
-
GGML_METAL_ADD_KERNEL(
|
515
|
-
GGML_METAL_ADD_KERNEL(
|
516
|
-
GGML_METAL_ADD_KERNEL(
|
517
|
-
GGML_METAL_ADD_KERNEL(
|
518
|
-
GGML_METAL_ADD_KERNEL(
|
519
|
-
GGML_METAL_ADD_KERNEL(
|
520
|
-
GGML_METAL_ADD_KERNEL(
|
521
|
-
GGML_METAL_ADD_KERNEL(
|
522
|
-
GGML_METAL_ADD_KERNEL(
|
523
|
-
GGML_METAL_ADD_KERNEL(
|
524
|
-
GGML_METAL_ADD_KERNEL(
|
525
|
-
GGML_METAL_ADD_KERNEL(
|
526
|
-
GGML_METAL_ADD_KERNEL(
|
527
|
-
GGML_METAL_ADD_KERNEL(
|
528
|
-
GGML_METAL_ADD_KERNEL(
|
529
|
-
GGML_METAL_ADD_KERNEL(
|
530
|
-
GGML_METAL_ADD_KERNEL(
|
531
|
-
GGML_METAL_ADD_KERNEL(
|
532
|
-
GGML_METAL_ADD_KERNEL(
|
533
|
-
GGML_METAL_ADD_KERNEL(
|
534
|
-
GGML_METAL_ADD_KERNEL(
|
535
|
-
GGML_METAL_ADD_KERNEL(
|
536
|
-
GGML_METAL_ADD_KERNEL(
|
537
|
-
GGML_METAL_ADD_KERNEL(
|
538
|
-
|
539
|
-
GGML_METAL_ADD_KERNEL(
|
540
|
-
//GGML_METAL_ADD_KERNEL(
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
GGML_METAL_ADD_KERNEL(
|
545
|
-
GGML_METAL_ADD_KERNEL(
|
546
|
-
GGML_METAL_ADD_KERNEL(
|
547
|
-
GGML_METAL_ADD_KERNEL(
|
548
|
-
GGML_METAL_ADD_KERNEL(
|
549
|
-
GGML_METAL_ADD_KERNEL(
|
550
|
-
GGML_METAL_ADD_KERNEL(
|
551
|
-
GGML_METAL_ADD_KERNEL(
|
552
|
-
GGML_METAL_ADD_KERNEL(
|
553
|
-
GGML_METAL_ADD_KERNEL(
|
554
|
-
GGML_METAL_ADD_KERNEL(
|
555
|
-
GGML_METAL_ADD_KERNEL(
|
556
|
-
GGML_METAL_ADD_KERNEL(
|
557
|
-
GGML_METAL_ADD_KERNEL(
|
558
|
-
GGML_METAL_ADD_KERNEL(
|
559
|
-
GGML_METAL_ADD_KERNEL(
|
560
|
-
GGML_METAL_ADD_KERNEL(
|
561
|
-
GGML_METAL_ADD_KERNEL(
|
562
|
-
GGML_METAL_ADD_KERNEL(
|
563
|
-
GGML_METAL_ADD_KERNEL(
|
564
|
-
GGML_METAL_ADD_KERNEL(
|
565
|
-
GGML_METAL_ADD_KERNEL(
|
566
|
-
GGML_METAL_ADD_KERNEL(
|
567
|
-
GGML_METAL_ADD_KERNEL(
|
568
|
-
GGML_METAL_ADD_KERNEL(
|
569
|
-
GGML_METAL_ADD_KERNEL(
|
570
|
-
GGML_METAL_ADD_KERNEL(
|
571
|
-
GGML_METAL_ADD_KERNEL(
|
572
|
-
GGML_METAL_ADD_KERNEL(
|
573
|
-
GGML_METAL_ADD_KERNEL(
|
574
|
-
GGML_METAL_ADD_KERNEL(
|
575
|
-
GGML_METAL_ADD_KERNEL(
|
576
|
-
GGML_METAL_ADD_KERNEL(
|
577
|
-
GGML_METAL_ADD_KERNEL(
|
578
|
-
GGML_METAL_ADD_KERNEL(
|
579
|
-
GGML_METAL_ADD_KERNEL(
|
580
|
-
GGML_METAL_ADD_KERNEL(
|
581
|
-
GGML_METAL_ADD_KERNEL(
|
582
|
-
GGML_METAL_ADD_KERNEL(
|
583
|
-
GGML_METAL_ADD_KERNEL(
|
584
|
-
GGML_METAL_ADD_KERNEL(
|
585
|
-
GGML_METAL_ADD_KERNEL(
|
586
|
-
GGML_METAL_ADD_KERNEL(
|
587
|
-
GGML_METAL_ADD_KERNEL(
|
588
|
-
GGML_METAL_ADD_KERNEL(
|
589
|
-
GGML_METAL_ADD_KERNEL(
|
590
|
-
GGML_METAL_ADD_KERNEL(
|
591
|
-
GGML_METAL_ADD_KERNEL(
|
592
|
-
GGML_METAL_ADD_KERNEL(
|
593
|
-
GGML_METAL_ADD_KERNEL(
|
594
|
-
GGML_METAL_ADD_KERNEL(
|
595
|
-
GGML_METAL_ADD_KERNEL(
|
596
|
-
GGML_METAL_ADD_KERNEL(
|
597
|
-
GGML_METAL_ADD_KERNEL(
|
598
|
-
GGML_METAL_ADD_KERNEL(
|
599
|
-
GGML_METAL_ADD_KERNEL(
|
600
|
-
GGML_METAL_ADD_KERNEL(
|
601
|
-
GGML_METAL_ADD_KERNEL(
|
602
|
-
GGML_METAL_ADD_KERNEL(
|
603
|
-
GGML_METAL_ADD_KERNEL(
|
604
|
-
GGML_METAL_ADD_KERNEL(
|
605
|
-
GGML_METAL_ADD_KERNEL(
|
606
|
-
GGML_METAL_ADD_KERNEL(
|
607
|
-
GGML_METAL_ADD_KERNEL(
|
608
|
-
GGML_METAL_ADD_KERNEL(
|
609
|
-
GGML_METAL_ADD_KERNEL(
|
610
|
-
GGML_METAL_ADD_KERNEL(
|
611
|
-
GGML_METAL_ADD_KERNEL(
|
612
|
-
GGML_METAL_ADD_KERNEL(
|
613
|
-
GGML_METAL_ADD_KERNEL(
|
614
|
-
GGML_METAL_ADD_KERNEL(
|
615
|
-
GGML_METAL_ADD_KERNEL(
|
616
|
-
GGML_METAL_ADD_KERNEL(
|
617
|
-
GGML_METAL_ADD_KERNEL(
|
618
|
-
GGML_METAL_ADD_KERNEL(
|
619
|
-
GGML_METAL_ADD_KERNEL(
|
620
|
-
GGML_METAL_ADD_KERNEL(
|
621
|
-
GGML_METAL_ADD_KERNEL(
|
622
|
-
GGML_METAL_ADD_KERNEL(
|
623
|
-
GGML_METAL_ADD_KERNEL(
|
624
|
-
GGML_METAL_ADD_KERNEL(
|
625
|
-
GGML_METAL_ADD_KERNEL(
|
626
|
-
GGML_METAL_ADD_KERNEL(
|
627
|
-
GGML_METAL_ADD_KERNEL(
|
486
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ADD, add, true);
|
487
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ADD_ROW, add_row, true);
|
488
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL, mul, true);
|
489
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_ROW, mul_row, true);
|
490
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIV, div, true);
|
491
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIV_ROW, div_row, true);
|
492
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE, scale, true);
|
493
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE_4, scale_4, true);
|
494
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CLAMP, clamp, true);
|
495
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TANH, tanh, true);
|
496
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RELU, relu, true);
|
497
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU, gelu, true);
|
498
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_4, gelu_4, true);
|
499
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK, gelu_quick, true);
|
500
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK_4, gelu_quick_4, true);
|
501
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU, silu, true);
|
502
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU_4, silu_4, true);
|
503
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, soft_max_f16, ctx->support_simdgroup_reduction);
|
504
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, soft_max_f16_4, ctx->support_simdgroup_reduction);
|
505
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, soft_max_f32, ctx->support_simdgroup_reduction);
|
506
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32_4, soft_max_f32_4, ctx->support_simdgroup_reduction);
|
507
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIAG_MASK_INF, diag_mask_inf, true);
|
508
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIAG_MASK_INF_8, diag_mask_inf_8, true);
|
509
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_F32, get_rows_f32, true);
|
510
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_F16, get_rows_f16, true);
|
511
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q4_0, get_rows_q4_0, true);
|
512
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q4_1, get_rows_q4_1, true);
|
513
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q5_0, get_rows_q5_0, true);
|
514
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q5_1, get_rows_q5_1, true);
|
515
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q8_0, get_rows_q8_0, true);
|
516
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q2_K, get_rows_q2_K, true);
|
517
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q3_K, get_rows_q3_K, true);
|
518
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q4_K, get_rows_q4_K, true);
|
519
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q5_K, get_rows_q5_K, true);
|
520
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_Q6_K, get_rows_q6_K, true);
|
521
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XXS, get_rows_iq2_xxs, true);
|
522
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_XS, get_rows_iq2_xs, true);
|
523
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_XXS, get_rows_iq3_xxs, true);
|
524
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ3_S, get_rows_iq3_s, true);
|
525
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ2_S, get_rows_iq2_s, true);
|
526
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_S, get_rows_iq1_s, true);
|
527
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ1_M, get_rows_iq1_m, true);
|
528
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL, get_rows_iq4_nl, true);
|
529
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS, get_rows_iq4_xs, true);
|
530
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, get_rows_i32, true);
|
531
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RMS_NORM, rms_norm, ctx->support_simdgroup_reduction);
|
532
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GROUP_NORM, group_norm, ctx->support_simdgroup_reduction);
|
533
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NORM, norm, true);
|
534
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32, mul_mv_f32_f32, ctx->support_simdgroup_reduction);
|
535
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F16, mul_mv_f16_f16, ctx->support_simdgroup_reduction);
|
536
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32, mul_mv_f16_f32, ctx->support_simdgroup_reduction);
|
537
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW, mul_mv_f16_f32_1row, ctx->support_simdgroup_reduction);
|
538
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4, mul_mv_f16_f32_l4, ctx->support_simdgroup_reduction);
|
539
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q4_0_F32, mul_mv_q4_0_f32, ctx->support_simdgroup_reduction);
|
540
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q4_1_F32, mul_mv_q4_1_f32, ctx->support_simdgroup_reduction);
|
541
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q5_0_F32, mul_mv_q5_0_f32, ctx->support_simdgroup_reduction);
|
542
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q5_1_F32, mul_mv_q5_1_f32, ctx->support_simdgroup_reduction);
|
543
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q8_0_F32, mul_mv_q8_0_f32, ctx->support_simdgroup_reduction);
|
544
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q2_K_F32, mul_mv_q2_K_f32, ctx->support_simdgroup_reduction);
|
545
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q3_K_F32, mul_mv_q3_K_f32, ctx->support_simdgroup_reduction);
|
546
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q4_K_F32, mul_mv_q4_K_f32, ctx->support_simdgroup_reduction);
|
547
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q5_K_F32, mul_mv_q5_K_f32, ctx->support_simdgroup_reduction);
|
548
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_Q6_K_F32, mul_mv_q6_K_f32, ctx->support_simdgroup_reduction);
|
549
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XXS_F32, mul_mv_iq2_xxs_f32, ctx->support_simdgroup_reduction);
|
550
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_XS_F32, mul_mv_iq2_xs_f32, ctx->support_simdgroup_reduction);
|
551
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_XXS_F32, mul_mv_iq3_xxs_f32, ctx->support_simdgroup_reduction);
|
552
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ3_S_F32, mul_mv_iq3_s_f32, ctx->support_simdgroup_reduction);
|
553
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ2_S_F32, mul_mv_iq2_s_f32, ctx->support_simdgroup_reduction);
|
554
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_S_F32, mul_mv_iq1_s_f32, ctx->support_simdgroup_reduction);
|
555
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ1_M_F32, mul_mv_iq1_m_f32, ctx->support_simdgroup_reduction);
|
556
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ4_NL_F32, mul_mv_iq4_nl_f32, ctx->support_simdgroup_reduction);
|
557
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_IQ4_XS_F32, mul_mv_iq4_xs_f32, ctx->support_simdgroup_reduction);
|
558
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F32_F32, mul_mv_id_f32_f32, ctx->support_simdgroup_reduction);
|
559
|
+
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F16, mul_mv_id_f16_f16, ctx->support_simdgroup_reduction);
|
560
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32, mul_mv_id_f16_f32, ctx->support_simdgroup_reduction);
|
561
|
+
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32_1ROW, mul_mv_id_f16_f32_1row, ctx->support_simdgroup_reduction);
|
562
|
+
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_F16_F32_L4, mul_mv_id_f16_f32_l4, ctx->support_simdgroup_reduction);
|
563
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q4_0_F32, mul_mv_id_q4_0_f32, ctx->support_simdgroup_reduction);
|
564
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q4_1_F32, mul_mv_id_q4_1_f32, ctx->support_simdgroup_reduction);
|
565
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q5_0_F32, mul_mv_id_q5_0_f32, ctx->support_simdgroup_reduction);
|
566
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q5_1_F32, mul_mv_id_q5_1_f32, ctx->support_simdgroup_reduction);
|
567
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q8_0_F32, mul_mv_id_q8_0_f32, ctx->support_simdgroup_reduction);
|
568
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q2_K_F32, mul_mv_id_q2_K_f32, ctx->support_simdgroup_reduction);
|
569
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q3_K_F32, mul_mv_id_q3_K_f32, ctx->support_simdgroup_reduction);
|
570
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q4_K_F32, mul_mv_id_q4_K_f32, ctx->support_simdgroup_reduction);
|
571
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q5_K_F32, mul_mv_id_q5_K_f32, ctx->support_simdgroup_reduction);
|
572
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_Q6_K_F32, mul_mv_id_q6_K_f32, ctx->support_simdgroup_reduction);
|
573
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XXS_F32, mul_mv_id_iq2_xxs_f32, ctx->support_simdgroup_reduction);
|
574
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_XS_F32, mul_mv_id_iq2_xs_f32, ctx->support_simdgroup_reduction);
|
575
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_XXS_F32, mul_mv_id_iq3_xxs_f32, ctx->support_simdgroup_reduction);
|
576
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ3_S_F32, mul_mv_id_iq3_s_f32, ctx->support_simdgroup_reduction);
|
577
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ2_S_F32, mul_mv_id_iq2_s_f32, ctx->support_simdgroup_reduction);
|
578
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_S_F32, mul_mv_id_iq1_s_f32, ctx->support_simdgroup_reduction);
|
579
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ1_M_F32, mul_mv_id_iq1_m_f32, ctx->support_simdgroup_reduction);
|
580
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_NL_F32, mul_mv_id_iq4_nl_f32, ctx->support_simdgroup_reduction);
|
581
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_XS_F32, mul_mv_id_iq4_xs_f32, ctx->support_simdgroup_reduction);
|
582
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_F32_F32, mul_mm_f32_f32, ctx->support_simdgroup_mm);
|
583
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_F16_F32, mul_mm_f16_f32, ctx->support_simdgroup_mm);
|
584
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_0_F32, mul_mm_q4_0_f32, ctx->support_simdgroup_mm);
|
585
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_1_F32, mul_mm_q4_1_f32, ctx->support_simdgroup_mm);
|
586
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_0_F32, mul_mm_q5_0_f32, ctx->support_simdgroup_mm);
|
587
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_1_F32, mul_mm_q5_1_f32, ctx->support_simdgroup_mm);
|
588
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q8_0_F32, mul_mm_q8_0_f32, ctx->support_simdgroup_mm);
|
589
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q2_K_F32, mul_mm_q2_K_f32, ctx->support_simdgroup_mm);
|
590
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q3_K_F32, mul_mm_q3_K_f32, ctx->support_simdgroup_mm);
|
591
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q4_K_F32, mul_mm_q4_K_f32, ctx->support_simdgroup_mm);
|
592
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q5_K_F32, mul_mm_q5_K_f32, ctx->support_simdgroup_mm);
|
593
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_Q6_K_F32, mul_mm_q6_K_f32, ctx->support_simdgroup_mm);
|
594
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XXS_F32, mul_mm_iq2_xxs_f32, ctx->support_simdgroup_mm);
|
595
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_XS_F32, mul_mm_iq2_xs_f32, ctx->support_simdgroup_mm);
|
596
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_XXS_F32, mul_mm_iq3_xxs_f32, ctx->support_simdgroup_mm);
|
597
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ3_S_F32, mul_mm_iq3_s_f32, ctx->support_simdgroup_mm);
|
598
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ2_S_F32, mul_mm_iq2_s_f32, ctx->support_simdgroup_mm);
|
599
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_S_F32, mul_mm_iq1_s_f32, ctx->support_simdgroup_mm);
|
600
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_M_F32, mul_mm_iq1_m_f32, ctx->support_simdgroup_mm);
|
601
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32, mul_mm_iq4_nl_f32, ctx->support_simdgroup_mm);
|
602
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_XS_F32, mul_mm_iq4_xs_f32, ctx->support_simdgroup_mm);
|
603
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32, mul_mm_id_f32_f32, ctx->support_simdgroup_mm);
|
604
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32, mul_mm_id_f16_f32, ctx->support_simdgroup_mm);
|
605
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32, mul_mm_id_q4_0_f32, ctx->support_simdgroup_mm);
|
606
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_1_F32, mul_mm_id_q4_1_f32, ctx->support_simdgroup_mm);
|
607
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q5_0_F32, mul_mm_id_q5_0_f32, ctx->support_simdgroup_mm);
|
608
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q5_1_F32, mul_mm_id_q5_1_f32, ctx->support_simdgroup_mm);
|
609
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q8_0_F32, mul_mm_id_q8_0_f32, ctx->support_simdgroup_mm);
|
610
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q2_K_F32, mul_mm_id_q2_K_f32, ctx->support_simdgroup_mm);
|
611
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q3_K_F32, mul_mm_id_q3_K_f32, ctx->support_simdgroup_mm);
|
612
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_K_F32, mul_mm_id_q4_K_f32, ctx->support_simdgroup_mm);
|
613
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q5_K_F32, mul_mm_id_q5_K_f32, ctx->support_simdgroup_mm);
|
614
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q6_K_F32, mul_mm_id_q6_K_f32, ctx->support_simdgroup_mm);
|
615
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XXS_F32, mul_mm_id_iq2_xxs_f32, ctx->support_simdgroup_mm);
|
616
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_XS_F32, mul_mm_id_iq2_xs_f32, ctx->support_simdgroup_mm);
|
617
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_XXS_F32, mul_mm_id_iq3_xxs_f32, ctx->support_simdgroup_mm);
|
618
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ3_S_F32, mul_mm_id_iq3_s_f32, ctx->support_simdgroup_mm);
|
619
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ2_S_F32, mul_mm_id_iq2_s_f32, ctx->support_simdgroup_mm);
|
620
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_S_F32, mul_mm_id_iq1_s_f32, ctx->support_simdgroup_mm);
|
621
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_M_F32, mul_mm_id_iq1_m_f32, ctx->support_simdgroup_mm);
|
622
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32, mul_mm_id_iq4_nl_f32, ctx->support_simdgroup_mm);
|
623
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_XS_F32, mul_mm_id_iq4_xs_f32, ctx->support_simdgroup_mm);
|
624
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ROPE_F32, rope_f32, true);
|
625
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ROPE_F16, rope_f16, true);
|
626
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ALIBI_F32, alibi_f32, true);
|
627
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_IM2COL_F16, im2col_f16, true);
|
628
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_IM2COL_F32, im2col_f32, true);
|
629
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true);
|
630
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true);
|
631
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32, timestep_embedding_f32, true);
|
632
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARANGE_F32, arange_f32, true);
|
633
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, argsort_f32_i32_asc, true);
|
634
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC, argsort_f32_i32_desc, true);
|
635
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_LEAKY_RELU_F32, leaky_relu_f32, true);
|
636
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H64, flash_attn_ext_f16_h64, true);
|
637
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H80, flash_attn_ext_f16_h80, true);
|
638
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H96, flash_attn_ext_f16_h96, true);
|
639
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H112, flash_attn_ext_f16_h112, true);
|
640
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H128, flash_attn_ext_f16_h128, true);
|
641
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H256, flash_attn_ext_f16_h256, true);
|
642
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H128, flash_attn_ext_vec_f16_h128, true);
|
643
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H256, flash_attn_ext_vec_f16_h256, true);
|
644
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_F16, cpy_f32_f16, true);
|
645
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_F32, cpy_f32_f32, true);
|
646
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0, cpy_f32_q8_0, true);
|
647
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0, cpy_f32_q4_0, true);
|
648
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1, cpy_f32_q4_1, true);
|
649
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0, cpy_f32_q5_0, true);
|
650
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1, cpy_f32_q5_1, true);
|
651
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL, cpy_f32_iq4_nl, true);
|
652
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F16, cpy_f16_f16, true);
|
653
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F32, cpy_f16_f32, true);
|
654
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CONCAT, concat, true);
|
655
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SQR, sqr, true);
|
656
|
+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
|
628
657
|
}
|
629
658
|
|
630
659
|
[metal_library release];
|
@@ -743,6 +772,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
|
743
772
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
744
773
|
case GGML_OP_ARGSORT:
|
745
774
|
case GGML_OP_LEAKY_RELU:
|
775
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
746
776
|
return true;
|
747
777
|
case GGML_OP_MUL_MAT:
|
748
778
|
case GGML_OP_MUL_MAT_ID:
|
@@ -782,7 +812,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
|
782
812
|
case GGML_OP_DIAG_MASK_INF:
|
783
813
|
case GGML_OP_GET_ROWS:
|
784
814
|
{
|
785
|
-
return op->ne[3] == 1;
|
815
|
+
return op->src[0]->type != GGML_TYPE_BF16 && op->ne[3] == 1;
|
786
816
|
}
|
787
817
|
default:
|
788
818
|
return false;
|
@@ -1326,20 +1356,33 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
1326
1356
|
} break;
|
1327
1357
|
case GGML_OP_SOFT_MAX:
|
1328
1358
|
{
|
1359
|
+
GGML_ASSERT(!src1 || src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
|
1360
|
+
GGML_ASSERT(!src2 || src2->type == GGML_TYPE_F16 || src2->type == GGML_TYPE_F32);
|
1361
|
+
|
1329
1362
|
int nth = 32; // SIMD width
|
1330
1363
|
|
1331
1364
|
id<MTLComputePipelineState> pipeline = nil;
|
1332
1365
|
|
1366
|
+
const bool use_f16 = (src1 && src1->type == GGML_TYPE_F16) || (src2 && src2->type == GGML_TYPE_F16);
|
1367
|
+
|
1333
1368
|
if (ne00%4 == 0) {
|
1334
1369
|
while (nth < ne00/4 && nth < 256) {
|
1335
1370
|
nth *= 2;
|
1336
1371
|
}
|
1337
|
-
|
1372
|
+
if (use_f16) {
|
1373
|
+
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4].pipeline;
|
1374
|
+
} else {
|
1375
|
+
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32_4].pipeline;
|
1376
|
+
}
|
1338
1377
|
} else {
|
1339
1378
|
while (nth < ne00 && nth < 1024) {
|
1340
1379
|
nth *= 2;
|
1341
1380
|
}
|
1342
|
-
|
1381
|
+
if (use_f16) {
|
1382
|
+
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16].pipeline;
|
1383
|
+
} else {
|
1384
|
+
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32].pipeline;
|
1385
|
+
}
|
1343
1386
|
}
|
1344
1387
|
|
1345
1388
|
float scale;
|
@@ -2503,6 +2546,161 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
2503
2546
|
|
2504
2547
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
2505
2548
|
} break;
|
2549
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
2550
|
+
{
|
2551
|
+
GGML_ASSERT(ne00 % 4 == 0);
|
2552
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
2553
|
+
|
2554
|
+
struct ggml_tensor * src3 = gf->nodes[i]->src[3];
|
2555
|
+
|
2556
|
+
GGML_ASSERT(ggml_are_same_shape(src1, src2));
|
2557
|
+
GGML_ASSERT(src3);
|
2558
|
+
|
2559
|
+
size_t offs_src3 = 0;
|
2560
|
+
|
2561
|
+
id<MTLBuffer> id_src3 = src3 ? ggml_metal_get_buffer(src3, &offs_src3) : nil;
|
2562
|
+
|
2563
|
+
GGML_ASSERT(!src3 || src3->type == GGML_TYPE_F16);
|
2564
|
+
GGML_ASSERT(!src3 || src3->ne[1] >= GGML_PAD(src0->ne[1], 8) &&
|
2565
|
+
"the Flash-Attention Metal kernel requires the mask to be padded to 8 and at least n_queries big");
|
2566
|
+
|
2567
|
+
const int64_t ne30 = src3 ? src3->ne[0] : 0; GGML_UNUSED(ne30);
|
2568
|
+
const int64_t ne31 = src3 ? src3->ne[1] : 0;
|
2569
|
+
const int64_t ne32 = src3 ? src3->ne[2] : 0; GGML_UNUSED(ne32);
|
2570
|
+
const int64_t ne33 = src3 ? src3->ne[3] : 0; GGML_UNUSED(ne33);
|
2571
|
+
|
2572
|
+
const uint64_t nb30 = src3 ? src3->nb[0] : 0; GGML_UNUSED(nb30);
|
2573
|
+
const uint64_t nb31 = src3 ? src3->nb[1] : 0;
|
2574
|
+
const uint64_t nb32 = src3 ? src3->nb[2] : 0; GGML_UNUSED(nb32);
|
2575
|
+
const uint64_t nb33 = src3 ? src3->nb[3] : 0; GGML_UNUSED(nb33);
|
2576
|
+
|
2577
|
+
const enum ggml_type src2t = src2 ? src2->type : GGML_TYPE_COUNT; GGML_UNUSED(src2t);
|
2578
|
+
|
2579
|
+
float scale;
|
2580
|
+
memcpy(&scale, dst->op_params, sizeof(float));
|
2581
|
+
|
2582
|
+
id<MTLComputePipelineState> pipeline = nil;
|
2583
|
+
|
2584
|
+
bool use_vec_kernel = false;
|
2585
|
+
|
2586
|
+
if (ne01 >= 4 || (ne00%128 != 0)) {
|
2587
|
+
switch (ne00) {
|
2588
|
+
case 64: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H64 ].pipeline; break;
|
2589
|
+
case 80: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H80 ].pipeline; break;
|
2590
|
+
case 96: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H96 ].pipeline; break;
|
2591
|
+
case 112: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H112].pipeline; break;
|
2592
|
+
case 128: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H128].pipeline; break;
|
2593
|
+
case 256: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H256].pipeline; break;
|
2594
|
+
default:
|
2595
|
+
{
|
2596
|
+
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
2597
|
+
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
2598
|
+
GGML_ASSERT(false && "add template specialization for this size");
|
2599
|
+
}
|
2600
|
+
}
|
2601
|
+
} else {
|
2602
|
+
use_vec_kernel = true;
|
2603
|
+
|
2604
|
+
switch (ne00) {
|
2605
|
+
case 128: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H128].pipeline; break;
|
2606
|
+
case 256: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H256].pipeline; break;
|
2607
|
+
default:
|
2608
|
+
{
|
2609
|
+
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
|
2610
|
+
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
|
2611
|
+
GGML_ASSERT(false && "add template specialization for this size");
|
2612
|
+
}
|
2613
|
+
}
|
2614
|
+
}
|
2615
|
+
|
2616
|
+
[encoder setComputePipelineState:pipeline];
|
2617
|
+
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
2618
|
+
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
2619
|
+
[encoder setBuffer:id_src2 offset:offs_src2 atIndex:2];
|
2620
|
+
[encoder setBuffer:id_src3 offset:offs_src3 atIndex:3];
|
2621
|
+
[encoder setBuffer:id_dst offset:offs_dst atIndex:4];
|
2622
|
+
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:5];
|
2623
|
+
[encoder setBytes:&ne01 length:sizeof( int64_t) atIndex:6];
|
2624
|
+
[encoder setBytes:&ne02 length:sizeof( int64_t) atIndex:7];
|
2625
|
+
[encoder setBytes:&ne03 length:sizeof( int64_t) atIndex:8];
|
2626
|
+
[encoder setBytes:&nb00 length:sizeof(uint64_t) atIndex:9];
|
2627
|
+
[encoder setBytes:&nb01 length:sizeof(uint64_t) atIndex:10];
|
2628
|
+
[encoder setBytes:&nb02 length:sizeof(uint64_t) atIndex:11];
|
2629
|
+
[encoder setBytes:&nb03 length:sizeof(uint64_t) atIndex:12];
|
2630
|
+
[encoder setBytes:&ne10 length:sizeof( int64_t) atIndex:13];
|
2631
|
+
[encoder setBytes:&ne11 length:sizeof( int64_t) atIndex:14];
|
2632
|
+
[encoder setBytes:&ne12 length:sizeof( int64_t) atIndex:15];
|
2633
|
+
[encoder setBytes:&ne13 length:sizeof( int64_t) atIndex:16];
|
2634
|
+
[encoder setBytes:&nb10 length:sizeof(uint64_t) atIndex:17];
|
2635
|
+
[encoder setBytes:&nb11 length:sizeof(uint64_t) atIndex:18];
|
2636
|
+
[encoder setBytes:&nb12 length:sizeof(uint64_t) atIndex:19];
|
2637
|
+
[encoder setBytes:&nb13 length:sizeof(uint64_t) atIndex:20];
|
2638
|
+
[encoder setBytes:&ne31 length:sizeof( int64_t) atIndex:21];
|
2639
|
+
[encoder setBytes:&nb31 length:sizeof(uint64_t) atIndex:22];
|
2640
|
+
[encoder setBytes:&ne0 length:sizeof( int64_t) atIndex:23];
|
2641
|
+
[encoder setBytes:&ne1 length:sizeof( int64_t) atIndex:24];
|
2642
|
+
[encoder setBytes:&ne2 length:sizeof( int64_t) atIndex:25];
|
2643
|
+
[encoder setBytes:&ne3 length:sizeof( int64_t) atIndex:26];
|
2644
|
+
[encoder setBytes:&scale length:sizeof( float) atIndex:27];
|
2645
|
+
|
2646
|
+
if (!use_vec_kernel) {
|
2647
|
+
// half8x8 kernel
|
2648
|
+
const int64_t nqptg = 8; // queries per threadgroup !! sync with kernel template arguments !!
|
2649
|
+
const int64_t ncpsg = 32; // cache values per simdgroup !! sync with kernel template arguments !!
|
2650
|
+
|
2651
|
+
GGML_ASSERT(nqptg <= 32);
|
2652
|
+
GGML_ASSERT(nqptg % 8 == 0);
|
2653
|
+
GGML_ASSERT(ncpsg % 32 == 0);
|
2654
|
+
|
2655
|
+
int64_t nsgmax = 2;
|
2656
|
+
|
2657
|
+
while (true) {
|
2658
|
+
const size_t smem = nqptg*(ne00 + 2*nsgmax*(ncpsg + nqptg))*(sizeof(float)/2);
|
2659
|
+
if (smem > ctx->device.maxThreadgroupMemoryLength) {
|
2660
|
+
break;
|
2661
|
+
}
|
2662
|
+
nsgmax *= 2;
|
2663
|
+
}
|
2664
|
+
nsgmax /= 2;
|
2665
|
+
|
2666
|
+
// simdgroups per threadgroup (a.k.a. warps)
|
2667
|
+
const int64_t nsg = ne01 <= nqptg ? MAX(4, MIN(nsgmax, MIN(ne11/ncpsg, (int64_t) pipeline.maxTotalThreadsPerThreadgroup/32))) : 4;
|
2668
|
+
|
2669
|
+
const size_t smem = nqptg*(ne00 + 2*nsg*(ncpsg + nqptg))*(sizeof(float)/2);
|
2670
|
+
|
2671
|
+
//printf("smem: %zu, max: %zu\n", smem, ctx->device.maxThreadgroupMemoryLength);
|
2672
|
+
GGML_ASSERT(smem <= ctx->device.maxThreadgroupMemoryLength);
|
2673
|
+
|
2674
|
+
[encoder setThreadgroupMemoryLength:GGML_PAD(smem, 16) atIndex:0];
|
2675
|
+
|
2676
|
+
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + nqptg - 1)/nqptg, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(32, nsg, 1)];
|
2677
|
+
} else {
|
2678
|
+
// half1x4 kernel
|
2679
|
+
const int64_t nqptg = 1; // queries per threadgroup !! sync with kernel template arguments !!
|
2680
|
+
const int64_t ncpsg = 32; // cache values per simdgroup !! sync with kernel template arguments !!
|
2681
|
+
|
2682
|
+
GGML_ASSERT(nqptg <= 32);
|
2683
|
+
GGML_ASSERT(nqptg % 1 == 0);
|
2684
|
+
GGML_ASSERT(ncpsg % 32 == 0);
|
2685
|
+
|
2686
|
+
// simdgroups per threadgroup (a.k.a. warps)
|
2687
|
+
const int64_t nsgt = MAX(2, MIN(ne11/ncpsg, (int64_t) pipeline.maxTotalThreadsPerThreadgroup/32));
|
2688
|
+
|
2689
|
+
int64_t nsg = 1;
|
2690
|
+
while (nsg <= nsgt) {
|
2691
|
+
nsg *= 2;
|
2692
|
+
}
|
2693
|
+
nsg /= 2;
|
2694
|
+
|
2695
|
+
const size_t smem = (nqptg*(ne00 + 2*nsg*(ncpsg + nqptg)) + nsg*ne00)*(sizeof(float)/2);
|
2696
|
+
|
2697
|
+
//printf("smem: %zu, max: %zu\n", smem, ctx->device.maxThreadgroupMemoryLength);
|
2698
|
+
GGML_ASSERT(smem <= ctx->device.maxThreadgroupMemoryLength);
|
2699
|
+
[encoder setThreadgroupMemoryLength:GGML_PAD(smem, 16) atIndex:0];
|
2700
|
+
|
2701
|
+
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + nqptg - 1)/nqptg, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(32, nsg, 1)];
|
2702
|
+
}
|
2703
|
+
} break;
|
2506
2704
|
case GGML_OP_DUP:
|
2507
2705
|
case GGML_OP_CPY:
|
2508
2706
|
case GGML_OP_CONT:
|
@@ -2590,6 +2788,11 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
2590
2788
|
MTLCommandBufferStatus status = [command_buffer status];
|
2591
2789
|
if (status != MTLCommandBufferStatusCompleted) {
|
2592
2790
|
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
|
2791
|
+
if (status == MTLCommandBufferStatusError) {
|
2792
|
+
NSString * error_code = [command_buffer error].localizedDescription;
|
2793
|
+
GGML_METAL_LOG_INFO("error: %s\n", [error_code UTF8String]);
|
2794
|
+
}
|
2795
|
+
|
2593
2796
|
return GGML_STATUS_FAILED;
|
2594
2797
|
}
|
2595
2798
|
}
|
@@ -2646,7 +2849,11 @@ GGML_CALL static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_
|
|
2646
2849
|
ggml_backend_metal_free_device();
|
2647
2850
|
|
2648
2851
|
if (ctx->owned) {
|
2852
|
+
#if TARGET_OS_OSX
|
2853
|
+
vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ctx->all_data, ctx->all_size);
|
2854
|
+
#else
|
2649
2855
|
free(ctx->all_data);
|
2856
|
+
#endif
|
2650
2857
|
}
|
2651
2858
|
|
2652
2859
|
free(ctx);
|
@@ -2706,10 +2913,13 @@ GGML_CALL static const char * ggml_backend_metal_buffer_type_get_name(ggml_backe
|
|
2706
2913
|
UNUSED(buft);
|
2707
2914
|
}
|
2708
2915
|
|
2709
|
-
static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device) {
|
2916
|
+
static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) {
|
2917
|
+
#ifndef GGML_METAL_NDEBUG
|
2710
2918
|
#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
|
2711
2919
|
if (@available(macOS 10.12, iOS 16.0, *)) {
|
2712
|
-
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
|
2920
|
+
GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB, (%8.2f / %8.2f)",
|
2921
|
+
__func__,
|
2922
|
+
size_aligned / 1024.0 / 1024.0,
|
2713
2923
|
device.currentAllocatedSize / 1024.0 / 1024.0,
|
2714
2924
|
device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
2715
2925
|
|
@@ -2719,10 +2929,15 @@ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device) {
|
|
2719
2929
|
GGML_METAL_LOG_INFO("\n");
|
2720
2930
|
}
|
2721
2931
|
} else {
|
2722
|
-
GGML_METAL_LOG_INFO(", (%8.2f)\n",
|
2932
|
+
GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB, (%8.2f)\n",
|
2933
|
+
__func__,
|
2934
|
+
size_aligned / 1024.0 / 1024.0,
|
2935
|
+
device.currentAllocatedSize / 1024.0 / 1024.0);
|
2723
2936
|
}
|
2937
|
+
#endif
|
2724
2938
|
#endif
|
2725
2939
|
UNUSED(device);
|
2940
|
+
UNUSED(size_aligned);
|
2726
2941
|
}
|
2727
2942
|
|
2728
2943
|
GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
@@ -2742,22 +2957,23 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buff
|
|
2742
2957
|
ctx->owned = true;
|
2743
2958
|
ctx->n_buffers = 1;
|
2744
2959
|
|
2745
|
-
ctx->
|
2746
|
-
|
2747
|
-
|
2748
|
-
|
2749
|
-
|
2750
|
-
|
2960
|
+
if (ctx->all_data != NULL) {
|
2961
|
+
ctx->buffers[0].data = ctx->all_data;
|
2962
|
+
ctx->buffers[0].size = size;
|
2963
|
+
ctx->buffers[0].metal = [device newBufferWithBytesNoCopy:ctx->all_data
|
2964
|
+
length:size_aligned
|
2965
|
+
options:MTLResourceStorageModeShared
|
2966
|
+
deallocator:nil];
|
2967
|
+
}
|
2751
2968
|
|
2752
|
-
if (ctx->buffers[0].metal == nil) {
|
2969
|
+
if (ctx->all_data == NULL || ctx->buffers[0].metal == nil) {
|
2753
2970
|
GGML_METAL_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
|
2754
2971
|
free(ctx);
|
2755
2972
|
ggml_backend_metal_free_device();
|
2756
2973
|
return NULL;
|
2757
2974
|
}
|
2758
2975
|
|
2759
|
-
|
2760
|
-
ggml_backend_metal_log_allocated_size(device);
|
2976
|
+
//ggml_backend_metal_log_allocated_size(device, size_aligned);
|
2761
2977
|
|
2762
2978
|
return ggml_backend_buffer_init(buft, ggml_backend_metal_buffer_i, ctx, size);
|
2763
2979
|
}
|
@@ -2844,7 +3060,7 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data,
|
|
2844
3060
|
return false;
|
2845
3061
|
}
|
2846
3062
|
|
2847
|
-
|
3063
|
+
ggml_backend_metal_log_allocated_size(device, size_aligned);
|
2848
3064
|
|
2849
3065
|
++ctx->n_buffers;
|
2850
3066
|
} else {
|
@@ -2867,7 +3083,8 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data,
|
|
2867
3083
|
return false;
|
2868
3084
|
}
|
2869
3085
|
|
2870
|
-
|
3086
|
+
ggml_backend_metal_log_allocated_size(device, size_step_aligned);
|
3087
|
+
|
2871
3088
|
if (i + size_step < size) {
|
2872
3089
|
GGML_METAL_LOG_INFO("\n");
|
2873
3090
|
}
|
@@ -2876,8 +3093,6 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data,
|
|
2876
3093
|
}
|
2877
3094
|
}
|
2878
3095
|
|
2879
|
-
ggml_backend_metal_log_allocated_size(device);
|
2880
|
-
|
2881
3096
|
return ggml_backend_buffer_init(ggml_backend_metal_buffer_type(), ggml_backend_metal_buffer_i, ctx, size);
|
2882
3097
|
}
|
2883
3098
|
|