llama_cpp 0.12.4 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +146 -53
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +386 -134
- data/vendor/tmp/llama.cpp/ggml-quants.h +68 -59
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +139 -145
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +1516 -10656
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1777 -1238
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +14 -9
- data/vendor/tmp/llama.cpp/ggml.c +147 -70
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +920 -173
- data/vendor/tmp/llama.cpp/llama.h +7 -1
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -428,8 +428,8 @@ int64_t ggml_cycles_per_ms(void) {
|
|
428
428
|
|
429
429
|
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
430
430
|
|
431
|
-
static void ggml_vec_dot_f32(
|
432
|
-
static void ggml_vec_dot_f16(
|
431
|
+
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
|
432
|
+
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
433
433
|
|
434
434
|
static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
435
435
|
[GGML_TYPE_I8] = {
|
@@ -457,6 +457,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
457
457
|
.is_quantized = false,
|
458
458
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
459
459
|
.vec_dot_type = GGML_TYPE_F32,
|
460
|
+
.nrows = 1,
|
460
461
|
},
|
461
462
|
[GGML_TYPE_F16] = {
|
462
463
|
.type_name = "f16",
|
@@ -468,6 +469,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
468
469
|
.from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
469
470
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
|
470
471
|
.vec_dot_type = GGML_TYPE_F16,
|
472
|
+
.nrows = 1,
|
471
473
|
},
|
472
474
|
[GGML_TYPE_Q4_0] = {
|
473
475
|
.type_name = "q4_0",
|
@@ -479,6 +481,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
479
481
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
480
482
|
.vec_dot = ggml_vec_dot_q4_0_q8_0,
|
481
483
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
484
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
485
|
+
.nrows = 2,
|
486
|
+
#else
|
487
|
+
.nrows = 1,
|
488
|
+
#endif
|
482
489
|
},
|
483
490
|
[GGML_TYPE_Q4_1] = {
|
484
491
|
.type_name = "q4_1",
|
@@ -490,6 +497,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
490
497
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
491
498
|
.vec_dot = ggml_vec_dot_q4_1_q8_1,
|
492
499
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
500
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
501
|
+
.nrows = 2,
|
502
|
+
#else
|
503
|
+
.nrows = 1,
|
504
|
+
#endif
|
493
505
|
},
|
494
506
|
[4] = { // GGML_TYPE_Q4_2
|
495
507
|
.type_name = "DEPRECATED",
|
@@ -501,6 +513,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
501
513
|
.from_float_reference = NULL,
|
502
514
|
.vec_dot = NULL,
|
503
515
|
.vec_dot_type = GGML_TYPE_COUNT,
|
516
|
+
.nrows = 1,
|
504
517
|
},
|
505
518
|
[5] = { // GGML_TYPE_Q4_3
|
506
519
|
.type_name = "DEPRECATED",
|
@@ -512,6 +525,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
512
525
|
.from_float_reference = NULL,
|
513
526
|
.vec_dot = NULL,
|
514
527
|
.vec_dot_type = GGML_TYPE_COUNT,
|
528
|
+
.nrows = 1,
|
515
529
|
},
|
516
530
|
[GGML_TYPE_Q5_0] = {
|
517
531
|
.type_name = "q5_0",
|
@@ -523,6 +537,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
523
537
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
524
538
|
.vec_dot = ggml_vec_dot_q5_0_q8_0,
|
525
539
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
540
|
+
.nrows = 1,
|
526
541
|
},
|
527
542
|
[GGML_TYPE_Q5_1] = {
|
528
543
|
.type_name = "q5_1",
|
@@ -534,6 +549,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
534
549
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
535
550
|
.vec_dot = ggml_vec_dot_q5_1_q8_1,
|
536
551
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
552
|
+
.nrows = 1,
|
537
553
|
},
|
538
554
|
[GGML_TYPE_Q8_0] = {
|
539
555
|
.type_name = "q8_0",
|
@@ -545,6 +561,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
545
561
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
546
562
|
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
547
563
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
564
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
565
|
+
.nrows = 2,
|
566
|
+
#else
|
567
|
+
.nrows = 1,
|
568
|
+
#endif
|
548
569
|
},
|
549
570
|
[GGML_TYPE_Q8_1] = {
|
550
571
|
.type_name = "q8_1",
|
@@ -554,6 +575,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
554
575
|
.from_float = quantize_row_q8_1,
|
555
576
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
556
577
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
578
|
+
.nrows = 1,
|
557
579
|
},
|
558
580
|
[GGML_TYPE_Q2_K] = {
|
559
581
|
.type_name = "q2_K",
|
@@ -565,6 +587,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
565
587
|
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
566
588
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
567
589
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
590
|
+
.nrows = 1,
|
568
591
|
},
|
569
592
|
[GGML_TYPE_Q3_K] = {
|
570
593
|
.type_name = "q3_K",
|
@@ -576,6 +599,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
576
599
|
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
577
600
|
.vec_dot = ggml_vec_dot_q3_K_q8_K,
|
578
601
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
602
|
+
.nrows = 1,
|
579
603
|
},
|
580
604
|
[GGML_TYPE_Q4_K] = {
|
581
605
|
.type_name = "q4_K",
|
@@ -587,6 +611,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
587
611
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
588
612
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
589
613
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
614
|
+
.nrows = 1,
|
590
615
|
},
|
591
616
|
[GGML_TYPE_Q5_K] = {
|
592
617
|
.type_name = "q5_K",
|
@@ -598,6 +623,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
598
623
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
599
624
|
.vec_dot = ggml_vec_dot_q5_K_q8_K,
|
600
625
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
626
|
+
.nrows = 1,
|
601
627
|
},
|
602
628
|
[GGML_TYPE_Q6_K] = {
|
603
629
|
.type_name = "q6_K",
|
@@ -609,6 +635,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
609
635
|
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
610
636
|
.vec_dot = ggml_vec_dot_q6_K_q8_K,
|
611
637
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
638
|
+
.nrows = 1,
|
612
639
|
},
|
613
640
|
[GGML_TYPE_IQ2_XXS] = {
|
614
641
|
.type_name = "iq2_xxs",
|
@@ -620,6 +647,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
620
647
|
.from_float_reference = NULL,
|
621
648
|
.vec_dot = ggml_vec_dot_iq2_xxs_q8_K,
|
622
649
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
650
|
+
.nrows = 1,
|
623
651
|
},
|
624
652
|
[GGML_TYPE_IQ2_XS] = {
|
625
653
|
.type_name = "iq2_xs",
|
@@ -631,6 +659,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
631
659
|
.from_float_reference = NULL,
|
632
660
|
.vec_dot = ggml_vec_dot_iq2_xs_q8_K,
|
633
661
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
662
|
+
.nrows = 1,
|
634
663
|
},
|
635
664
|
[GGML_TYPE_IQ3_XXS] = {
|
636
665
|
.type_name = "iq3_xxs",
|
@@ -642,6 +671,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
642
671
|
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
|
643
672
|
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
644
673
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
674
|
+
.nrows = 1,
|
645
675
|
},
|
646
676
|
[GGML_TYPE_Q8_K] = {
|
647
677
|
.type_name = "q8_K",
|
@@ -1212,7 +1242,13 @@ inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x)
|
|
1212
1242
|
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
|
1213
1243
|
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
|
1214
1244
|
|
1215
|
-
static void ggml_vec_dot_f32(
|
1245
|
+
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) {
|
1246
|
+
assert(nrc == 1);
|
1247
|
+
UNUSED(nrc);
|
1248
|
+
UNUSED(bx);
|
1249
|
+
UNUSED(by);
|
1250
|
+
UNUSED(bs);
|
1251
|
+
|
1216
1252
|
#ifdef GGML_SIMD
|
1217
1253
|
float sumf = 0.0f;
|
1218
1254
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
@@ -1249,7 +1285,13 @@ static void ggml_vec_dot_f32(const int n, float * restrict s, const float * rest
|
|
1249
1285
|
*s = sumf;
|
1250
1286
|
}
|
1251
1287
|
|
1252
|
-
static void ggml_vec_dot_f16(
|
1288
|
+
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc) {
|
1289
|
+
assert(nrc == 1);
|
1290
|
+
UNUSED(nrc);
|
1291
|
+
UNUSED(bx);
|
1292
|
+
UNUSED(by);
|
1293
|
+
UNUSED(bs);
|
1294
|
+
|
1253
1295
|
ggml_float sumf = 0.0;
|
1254
1296
|
|
1255
1297
|
#if defined(GGML_SIMD)
|
@@ -1455,7 +1497,7 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
|
1455
1497
|
#endif
|
1456
1498
|
}
|
1457
1499
|
|
1458
|
-
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); }
|
1500
|
+
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, 0, x, 0, x, 0, 1); *s = sqrtf(*s); }
|
1459
1501
|
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
|
1460
1502
|
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
|
1461
1503
|
inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
|
@@ -2343,7 +2385,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
2343
2385
|
#elif defined(GGML_USE_CLBLAST)
|
2344
2386
|
ggml_cl_init();
|
2345
2387
|
#elif defined(GGML_USE_VULKAN)
|
2346
|
-
|
2388
|
+
ggml_vk_init_cpu_assist();
|
2347
2389
|
#elif defined(GGML_USE_SYCL)
|
2348
2390
|
ggml_init_sycl();
|
2349
2391
|
#endif
|
@@ -2470,7 +2512,8 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
2470
2512
|
size_t max_size = 0;
|
2471
2513
|
|
2472
2514
|
for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
|
2473
|
-
|
2515
|
+
size_t bytes = ggml_nbytes(tensor);
|
2516
|
+
max_size = MAX(max_size, bytes);
|
2474
2517
|
}
|
2475
2518
|
|
2476
2519
|
return max_size;
|
@@ -2606,7 +2649,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2606
2649
|
/*.nb =*/ { 0, 0, 0, 0 },
|
2607
2650
|
/*.op =*/ GGML_OP_NONE,
|
2608
2651
|
/*.op_params =*/ { 0 },
|
2609
|
-
/*.
|
2652
|
+
/*.flags =*/ 0,
|
2610
2653
|
/*.grad =*/ NULL,
|
2611
2654
|
/*.src =*/ { NULL },
|
2612
2655
|
/*.perf_runs =*/ 0,
|
@@ -6508,7 +6551,7 @@ struct ggml_tensor * ggml_cross_entropy_loss_back(
|
|
6508
6551
|
void ggml_set_param(
|
6509
6552
|
struct ggml_context * ctx,
|
6510
6553
|
struct ggml_tensor * tensor) {
|
6511
|
-
tensor->
|
6554
|
+
tensor->flags |= GGML_TENSOR_FLAG_PARAM;
|
6512
6555
|
|
6513
6556
|
GGML_ASSERT(tensor->grad == NULL);
|
6514
6557
|
tensor->grad = ggml_dup_tensor(ctx, tensor);
|
@@ -9991,6 +10034,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9991
10034
|
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
|
9992
10035
|
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
|
9993
10036
|
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
|
10037
|
+
int64_t const vec_dot_num_rows = type_traits[type].nrows;
|
9994
10038
|
|
9995
10039
|
GGML_ASSERT(ne0 == ne01);
|
9996
10040
|
GGML_ASSERT(ne1 == ne11);
|
@@ -10158,12 +10202,23 @@ static void ggml_compute_forward_mul_mat(
|
|
10158
10202
|
const int64_t blck_0 = 16;
|
10159
10203
|
const int64_t blck_1 = 16;
|
10160
10204
|
|
10205
|
+
// dot kernels can handle 1 row and col at a time, but mmla kernels can process 2 rows and cols
|
10206
|
+
int64_t nrc = vec_dot_num_rows;
|
10207
|
+
// TODO: currently the mmla kernels support only even numbered rows/cols.
|
10208
|
+
// this check can be removed once they are extended to support odd numbered rows/cols too
|
10209
|
+
if ((nr0 % 2 != 0) || (ne11 % 2 != 0)) {
|
10210
|
+
nrc = 1;
|
10211
|
+
}
|
10212
|
+
|
10213
|
+
const size_t src1_col_stride = src1_cont || src1->type != vec_dot_type ? row_size : nb11;
|
10214
|
+
|
10161
10215
|
// attempt to reduce false-sharing (does not seem to make a difference)
|
10162
|
-
|
10216
|
+
// 16 * 2, accounting for mmla kernels
|
10217
|
+
float tmp[32];
|
10163
10218
|
|
10164
10219
|
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
10165
10220
|
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
10166
|
-
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111;
|
10221
|
+
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ir1 += nrc) {
|
10167
10222
|
const int64_t i13 = (ir1/(ne12*ne1));
|
10168
10223
|
const int64_t i12 = (ir1 - i13*ne12*ne1)/ne1;
|
10169
10224
|
const int64_t i11 = (ir1 - i13*ne12*ne1 - i12*ne1);
|
@@ -10186,17 +10241,19 @@ static void ggml_compute_forward_mul_mat(
|
|
10186
10241
|
(src1_cont || src1->type != vec_dot_type
|
10187
10242
|
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
10188
10243
|
: (i11*nb11 + i12*nb12 + i13*nb13));
|
10189
|
-
|
10190
10244
|
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
10191
10245
|
|
10192
10246
|
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
10193
10247
|
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
10194
10248
|
//}
|
10195
10249
|
|
10196
|
-
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011;
|
10197
|
-
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
10250
|
+
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ir0 += nrc) {
|
10251
|
+
vec_dot(ne00, &tmp[ir0 - iir0], (nrc>1 ? 16 : 0), src0_row + ir0*nb01, (nrc>1 ? nb01 : 0), src1_col, (nrc>1 ? src1_col_stride : 0), nrc);
|
10252
|
+
}
|
10253
|
+
|
10254
|
+
for (int cn = 0; cn < nrc; ++cn) {
|
10255
|
+
memcpy(&dst_col[iir0 + cn*nb1/nb0], tmp + (cn*16), (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10198
10256
|
}
|
10199
|
-
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10200
10257
|
}
|
10201
10258
|
}
|
10202
10259
|
}
|
@@ -10385,7 +10442,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10385
10442
|
//}
|
10386
10443
|
|
10387
10444
|
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
10388
|
-
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
10445
|
+
vec_dot(ne00, &tmp[ir0 - iir0], 0, src0_row + ir0*nb01, 0, src1_col, 0, 1);
|
10389
10446
|
}
|
10390
10447
|
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10391
10448
|
}
|
@@ -11567,7 +11624,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11567
11624
|
|
11568
11625
|
// linear runtime, no additional memory
|
11569
11626
|
float dot_y_dy = 0;
|
11570
|
-
ggml_vec_dot_f32 (nc, &dot_y_dy, y, dy);
|
11627
|
+
ggml_vec_dot_f32 (nc, &dot_y_dy, 0, y, 0, dy, 0, 1);
|
11571
11628
|
ggml_vec_cpy_f32 (nc, dx, dy);
|
11572
11629
|
ggml_vec_acc1_f32(nc, dx, -dot_y_dy);
|
11573
11630
|
ggml_vec_mul_f32 (nc, dx, dx, y);
|
@@ -11887,8 +11944,10 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
11887
11944
|
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
|
11888
11945
|
) {
|
11889
11946
|
// start and end correction dims
|
11890
|
-
|
11891
|
-
|
11947
|
+
float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base));
|
11948
|
+
float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base));
|
11949
|
+
dims[0] = MAX(0, start);
|
11950
|
+
dims[1] = MIN(n_dims - 1, end);
|
11892
11951
|
}
|
11893
11952
|
|
11894
11953
|
static void ggml_compute_forward_rope_f32(
|
@@ -12366,9 +12425,9 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12366
12425
|
const int i1n = i10*ne11;
|
12367
12426
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12368
12427
|
float v = 0;
|
12369
|
-
ggml_vec_dot_f16(ne02, &v,
|
12370
|
-
(ggml_fp16_t *) wdata_src + i1n,
|
12371
|
-
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
12428
|
+
ggml_vec_dot_f16(ne02, &v, 0,
|
12429
|
+
(ggml_fp16_t *) wdata_src + i1n, 0,
|
12430
|
+
(ggml_fp16_t *) wdata_kernel + i00*ne02, 0, 1);
|
12372
12431
|
dst_data[i10*s0 + i00] += v;
|
12373
12432
|
}
|
12374
12433
|
}
|
@@ -12463,9 +12522,9 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12463
12522
|
const int i1n = i10*ne11;
|
12464
12523
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12465
12524
|
float v = 0;
|
12466
|
-
ggml_vec_dot_f32(ne02, &v,
|
12467
|
-
wdata_src + i1n,
|
12468
|
-
wdata_kernel + i00*ne02);
|
12525
|
+
ggml_vec_dot_f32(ne02, &v, 0,
|
12526
|
+
wdata_src + i1n, 0,
|
12527
|
+
wdata_kernel + i00*ne02, 0, 1);
|
12469
12528
|
dst_data[i10*s0 + i00] += v;
|
12470
12529
|
}
|
12471
12530
|
}
|
@@ -12780,9 +12839,9 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
12780
12839
|
for (int i01 = 0; i01 < ne01; i01++) {
|
12781
12840
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12782
12841
|
float v = 0;
|
12783
|
-
ggml_vec_dot_f16(ne03, &v,
|
12784
|
-
wdata_src + i1n,
|
12785
|
-
wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
12842
|
+
ggml_vec_dot_f16(ne03, &v, 0,
|
12843
|
+
wdata_src + i1n, 0,
|
12844
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03, 0, 1);
|
12786
12845
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
12787
12846
|
}
|
12788
12847
|
}
|
@@ -13211,9 +13270,9 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13211
13270
|
const int i1 = ik1;
|
13212
13271
|
|
13213
13272
|
ggml_vec_dot_f32(neq0,
|
13214
|
-
S + i1,
|
13215
|
-
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13216
|
-
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13273
|
+
S + i1, 0,
|
13274
|
+
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13275
|
+
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13217
13276
|
}
|
13218
13277
|
|
13219
13278
|
// scale
|
@@ -13296,9 +13355,9 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13296
13355
|
const int iv3 = iq3;
|
13297
13356
|
|
13298
13357
|
ggml_vec_dot_f32(masked_begin,
|
13299
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13300
|
-
(float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)),
|
13301
|
-
S);
|
13358
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13359
|
+
(float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0,
|
13360
|
+
S, 0, 1);
|
13302
13361
|
}
|
13303
13362
|
}
|
13304
13363
|
}
|
@@ -13401,9 +13460,9 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13401
13460
|
const int i1 = ik1;
|
13402
13461
|
|
13403
13462
|
ggml_vec_dot_f16(neq0,
|
13404
|
-
S + i1,
|
13405
|
-
(ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13406
|
-
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13463
|
+
S + i1, 0,
|
13464
|
+
(ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13465
|
+
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13407
13466
|
}
|
13408
13467
|
} else {
|
13409
13468
|
for (int64_t ic = 0; ic < nek1; ic += GGML_VEC_DOT_UNROLL) {
|
@@ -13505,9 +13564,9 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13505
13564
|
const int iv3 = iq3;
|
13506
13565
|
|
13507
13566
|
ggml_vec_dot_f16(nev0,
|
13508
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13509
|
-
(ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)),
|
13510
|
-
S16);
|
13567
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13568
|
+
(ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0,
|
13569
|
+
S16, 0, 1);
|
13511
13570
|
}
|
13512
13571
|
} else {
|
13513
13572
|
for (int64_t ic = 0; ic < nev1; ic += GGML_VEC_DOT_UNROLL) {
|
@@ -13649,9 +13708,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13649
13708
|
const int i1 = ib01;
|
13650
13709
|
|
13651
13710
|
ggml_vec_dot_f16(nea0,
|
13652
|
-
S + i1,
|
13653
|
-
(ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)),
|
13654
|
-
(ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)));
|
13711
|
+
S + i1, 0,
|
13712
|
+
(ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), 0,
|
13713
|
+
(ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)), 0, 1);
|
13655
13714
|
}
|
13656
13715
|
|
13657
13716
|
ggml_vec_add_f32(neb01, S, S, (float *) b1->data);
|
@@ -13674,9 +13733,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13674
13733
|
for (int64_t ic = 0; ic < nec01; ++ic) {
|
13675
13734
|
|
13676
13735
|
ggml_vec_dot_f16(neb01,
|
13677
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13678
|
-
(ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)),
|
13679
|
-
S16);
|
13736
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13737
|
+
(ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), 0,
|
13738
|
+
S16, 0, 1);
|
13680
13739
|
}
|
13681
13740
|
|
13682
13741
|
ggml_vec_add_f32(nec01,
|
@@ -13863,9 +13922,9 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
13863
13922
|
const int i1 = ik1;
|
13864
13923
|
|
13865
13924
|
ggml_vec_dot_f32(neq0,
|
13866
|
-
S + i1,
|
13867
|
-
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13868
|
-
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13925
|
+
S + i1, 0,
|
13926
|
+
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13927
|
+
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13869
13928
|
}
|
13870
13929
|
|
13871
13930
|
// scale
|
@@ -14010,7 +14069,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14010
14069
|
|
14011
14070
|
// S = SM * (S - dot(SM, S))
|
14012
14071
|
float dot_SM_gradSM = 0;
|
14013
|
-
ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, SM, S);
|
14072
|
+
ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, 0, SM, 0, S, 0, 1);
|
14014
14073
|
ggml_vec_acc1_f32(M, S, -dot_SM_gradSM);
|
14015
14074
|
ggml_vec_mul_f32 (masked_begin, S, S, SM);
|
14016
14075
|
|
@@ -14847,10 +14906,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14847
14906
|
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
|
14848
14907
|
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
|
14849
14908
|
#elif defined(GGML_USE_VULKAN)
|
14850
|
-
const bool skip_cpu =
|
14909
|
+
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
14851
14910
|
#ifdef GGML_VULKAN_CHECK_RESULTS
|
14852
14911
|
if (skip_cpu) {
|
14853
|
-
|
14912
|
+
ggml_vk_check_results_1_cpu_assist(params, tensor);
|
14854
14913
|
}
|
14855
14914
|
#endif
|
14856
14915
|
if (skip_cpu) {
|
@@ -15308,7 +15367,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
15308
15367
|
return NULL;
|
15309
15368
|
}
|
15310
15369
|
|
15311
|
-
if (node->
|
15370
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
15312
15371
|
return node;
|
15313
15372
|
}
|
15314
15373
|
|
@@ -15342,7 +15401,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
15342
15401
|
|
15343
15402
|
clone->op = node->op;
|
15344
15403
|
clone->grad = node->grad;
|
15345
|
-
clone->
|
15404
|
+
clone->flags = node->flags;
|
15346
15405
|
clone->extra = node->extra;
|
15347
15406
|
for (int k = 0; k < GGML_MAX_DIMS; ++k) {
|
15348
15407
|
clone->nb[k] = node->nb[k];
|
@@ -16374,7 +16433,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
16374
16433
|
for (int i = 0; i < gf->n_nodes; i++) {
|
16375
16434
|
struct ggml_tensor * node = gf->nodes[i];
|
16376
16435
|
|
16377
|
-
if (node->
|
16436
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
16378
16437
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16379
16438
|
ggml_build_forward_expand(gb, node->grad);
|
16380
16439
|
}
|
@@ -16646,7 +16705,7 @@ struct ggml_compute_state_shared {
|
|
16646
16705
|
atomic_int node_n; // active graph node
|
16647
16706
|
atomic_int node_task; // active graph node task phase
|
16648
16707
|
|
16649
|
-
|
16708
|
+
ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
|
16650
16709
|
void * abort_callback_data;
|
16651
16710
|
};
|
16652
16711
|
|
@@ -17266,12 +17325,12 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17266
17325
|
|
17267
17326
|
#ifdef GGML_USE_VULKAN
|
17268
17327
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
17269
|
-
|
17328
|
+
ggml_vk_preallocate_buffers_graph_cpu_assist(cgraph->nodes[i]);
|
17270
17329
|
}
|
17271
|
-
|
17330
|
+
ggml_vk_preallocate_buffers_cpu_assist();
|
17272
17331
|
|
17273
17332
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
17274
|
-
|
17333
|
+
ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1);
|
17275
17334
|
}
|
17276
17335
|
#endif
|
17277
17336
|
|
@@ -17327,7 +17386,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17327
17386
|
}
|
17328
17387
|
|
17329
17388
|
#ifdef GGML_USE_VULKAN
|
17330
|
-
|
17389
|
+
ggml_vk_graph_cleanup_cpu_assist();
|
17331
17390
|
#endif
|
17332
17391
|
|
17333
17392
|
// performance stats (graph)
|
@@ -17859,7 +17918,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17859
17918
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17860
17919
|
i,
|
17861
17920
|
node->ne[0], node->ne[1], node->ne[2],
|
17862
|
-
ggml_op_name(node->op), node->
|
17921
|
+
ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17863
17922
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17864
17923
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17865
17924
|
(double) node->perf_time_us / 1000.0,
|
@@ -17952,7 +18011,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17952
18011
|
continue;
|
17953
18012
|
}
|
17954
18013
|
|
17955
|
-
if (node->
|
18014
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
17956
18015
|
snprintf(color, sizeof(color), "yellow");
|
17957
18016
|
} else if (node->grad) {
|
17958
18017
|
if (ggml_graph_find(gf, node)) {
|
@@ -18126,7 +18185,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18126
18185
|
int np = 0;
|
18127
18186
|
int64_t nx = 0;
|
18128
18187
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18129
|
-
if (gf->nodes[i]->
|
18188
|
+
if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
|
18130
18189
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
18131
18190
|
|
18132
18191
|
GGML_ASSERT(np < GGML_MAX_PARAMS);
|
@@ -18379,7 +18438,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18379
18438
|
}
|
18380
18439
|
|
18381
18440
|
// compute the initial gradient in the search direction
|
18382
|
-
ggml_vec_dot_f32(nx, &dginit, g, d);
|
18441
|
+
ggml_vec_dot_f32(nx, &dginit, 0, g, 0, d, 0, 1);
|
18383
18442
|
|
18384
18443
|
// make sure that d points to a descent direction
|
18385
18444
|
if (0 < dginit) {
|
@@ -18429,7 +18488,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18429
18488
|
return count;
|
18430
18489
|
}
|
18431
18490
|
|
18432
|
-
ggml_vec_dot_f32(nx, &dg, g, d);
|
18491
|
+
ggml_vec_dot_f32(nx, &dg, 0, g, 0, d, 0, 1);
|
18433
18492
|
|
18434
18493
|
// check the Wolfe condition
|
18435
18494
|
if (dg < params->lbfgs.wolfe * dginit) {
|
@@ -18489,7 +18548,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18489
18548
|
int np = 0;
|
18490
18549
|
int nx = 0;
|
18491
18550
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18492
|
-
if (gf->nodes[i]->
|
18551
|
+
if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
|
18493
18552
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
18494
18553
|
|
18495
18554
|
GGML_ASSERT(np < GGML_MAX_PARAMS);
|
@@ -18690,8 +18749,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18690
18749
|
// ys = y^t \cdot s -> 1 / \rho.
|
18691
18750
|
// yy = y^t \cdot y.
|
18692
18751
|
//
|
18693
|
-
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]);
|
18694
|
-
ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]);
|
18752
|
+
ggml_vec_dot_f32(nx, &ys, 0, &lm_y[end[0]*nx], 0, &lm_s[end[0]*nx], 0, 1);
|
18753
|
+
ggml_vec_dot_f32(nx, &yy, 0, &lm_y[end[0]*nx], 0, &lm_y[end[0]*nx], 0, 1);
|
18695
18754
|
|
18696
18755
|
lm_ys[end[0]] = ys;
|
18697
18756
|
|
@@ -18710,7 +18769,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18710
18769
|
for (int i = 0; i < bound; ++i) {
|
18711
18770
|
j[0] = (j[0] + m - 1) % m;
|
18712
18771
|
// \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}
|
18713
|
-
ggml_vec_dot_f32(nx, &lm_alpha[j[0]], &lm_s[j[0]*nx], d);
|
18772
|
+
ggml_vec_dot_f32(nx, &lm_alpha[j[0]], 0, &lm_s[j[0]*nx], 0, d, 0, 1);
|
18714
18773
|
lm_alpha[j[0]] /= lm_ys[j[0]];
|
18715
18774
|
// q_{i} = q_{i+1} - \alpha_{i} y_{i}
|
18716
18775
|
ggml_vec_mad_f32(nx, d, &lm_y[j[0]*nx], -lm_alpha[j[0]]);
|
@@ -18720,7 +18779,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18720
18779
|
|
18721
18780
|
for (int i = 0; i < bound; ++i) {
|
18722
18781
|
// \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}
|
18723
|
-
ggml_vec_dot_f32(nx, &beta, &lm_y[j[0]*nx], d);
|
18782
|
+
ggml_vec_dot_f32(nx, &beta, 0, &lm_y[j[0]*nx], 0, d, 0, 1);
|
18724
18783
|
beta /= lm_ys[j[0]];
|
18725
18784
|
// \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}
|
18726
18785
|
ggml_vec_mad_f32(nx, d, &lm_s[j[0]*nx], lm_alpha[j[0]] - beta);
|
@@ -18964,6 +19023,16 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
18964
19023
|
|
18965
19024
|
////////////////////////////////////////////////////////////////////////////////
|
18966
19025
|
|
19026
|
+
void ggml_set_input(struct ggml_tensor * tensor) {
|
19027
|
+
tensor->flags |= GGML_TENSOR_FLAG_INPUT;
|
19028
|
+
}
|
19029
|
+
|
19030
|
+
void ggml_set_output(struct ggml_tensor * tensor) {
|
19031
|
+
tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
|
19032
|
+
}
|
19033
|
+
|
19034
|
+
////////////////////////////////////////////////////////////////////////////////
|
19035
|
+
|
18967
19036
|
void ggml_quantize_init(enum ggml_type type) {
|
18968
19037
|
ggml_critical_section_start();
|
18969
19038
|
|
@@ -20608,4 +20677,12 @@ int ggml_cpu_has_vsx(void) {
|
|
20608
20677
|
#endif
|
20609
20678
|
}
|
20610
20679
|
|
20680
|
+
int ggml_cpu_has_matmul_int8(void) {
|
20681
|
+
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
20682
|
+
return 1;
|
20683
|
+
#else
|
20684
|
+
return 0;
|
20685
|
+
#endif
|
20686
|
+
}
|
20687
|
+
|
20611
20688
|
////////////////////////////////////////////////////////////////////////////////
|