llama_cpp 0.12.5 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -428,8 +428,8 @@ int64_t ggml_cycles_per_ms(void) {
|
|
428
428
|
|
429
429
|
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
430
430
|
|
431
|
-
static void ggml_vec_dot_f32(
|
432
|
-
static void ggml_vec_dot_f16(
|
431
|
+
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
|
432
|
+
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
433
433
|
|
434
434
|
static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
435
435
|
[GGML_TYPE_I8] = {
|
@@ -457,6 +457,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
457
457
|
.is_quantized = false,
|
458
458
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
459
459
|
.vec_dot_type = GGML_TYPE_F32,
|
460
|
+
.nrows = 1,
|
460
461
|
},
|
461
462
|
[GGML_TYPE_F16] = {
|
462
463
|
.type_name = "f16",
|
@@ -468,6 +469,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
468
469
|
.from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
469
470
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
|
470
471
|
.vec_dot_type = GGML_TYPE_F16,
|
472
|
+
.nrows = 1,
|
471
473
|
},
|
472
474
|
[GGML_TYPE_Q4_0] = {
|
473
475
|
.type_name = "q4_0",
|
@@ -479,6 +481,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
479
481
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
480
482
|
.vec_dot = ggml_vec_dot_q4_0_q8_0,
|
481
483
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
484
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
485
|
+
.nrows = 2,
|
486
|
+
#else
|
487
|
+
.nrows = 1,
|
488
|
+
#endif
|
482
489
|
},
|
483
490
|
[GGML_TYPE_Q4_1] = {
|
484
491
|
.type_name = "q4_1",
|
@@ -490,6 +497,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
490
497
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
491
498
|
.vec_dot = ggml_vec_dot_q4_1_q8_1,
|
492
499
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
500
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
501
|
+
.nrows = 2,
|
502
|
+
#else
|
503
|
+
.nrows = 1,
|
504
|
+
#endif
|
493
505
|
},
|
494
506
|
[4] = { // GGML_TYPE_Q4_2
|
495
507
|
.type_name = "DEPRECATED",
|
@@ -501,6 +513,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
501
513
|
.from_float_reference = NULL,
|
502
514
|
.vec_dot = NULL,
|
503
515
|
.vec_dot_type = GGML_TYPE_COUNT,
|
516
|
+
.nrows = 1,
|
504
517
|
},
|
505
518
|
[5] = { // GGML_TYPE_Q4_3
|
506
519
|
.type_name = "DEPRECATED",
|
@@ -512,6 +525,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
512
525
|
.from_float_reference = NULL,
|
513
526
|
.vec_dot = NULL,
|
514
527
|
.vec_dot_type = GGML_TYPE_COUNT,
|
528
|
+
.nrows = 1,
|
515
529
|
},
|
516
530
|
[GGML_TYPE_Q5_0] = {
|
517
531
|
.type_name = "q5_0",
|
@@ -523,6 +537,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
523
537
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
524
538
|
.vec_dot = ggml_vec_dot_q5_0_q8_0,
|
525
539
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
540
|
+
.nrows = 1,
|
526
541
|
},
|
527
542
|
[GGML_TYPE_Q5_1] = {
|
528
543
|
.type_name = "q5_1",
|
@@ -534,6 +549,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
534
549
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
535
550
|
.vec_dot = ggml_vec_dot_q5_1_q8_1,
|
536
551
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
552
|
+
.nrows = 1,
|
537
553
|
},
|
538
554
|
[GGML_TYPE_Q8_0] = {
|
539
555
|
.type_name = "q8_0",
|
@@ -545,6 +561,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
545
561
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
546
562
|
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
547
563
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
564
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
565
|
+
.nrows = 2,
|
566
|
+
#else
|
567
|
+
.nrows = 1,
|
568
|
+
#endif
|
548
569
|
},
|
549
570
|
[GGML_TYPE_Q8_1] = {
|
550
571
|
.type_name = "q8_1",
|
@@ -554,6 +575,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
554
575
|
.from_float = quantize_row_q8_1,
|
555
576
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
556
577
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
578
|
+
.nrows = 1,
|
557
579
|
},
|
558
580
|
[GGML_TYPE_Q2_K] = {
|
559
581
|
.type_name = "q2_K",
|
@@ -565,6 +587,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
565
587
|
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
566
588
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
567
589
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
590
|
+
.nrows = 1,
|
568
591
|
},
|
569
592
|
[GGML_TYPE_Q3_K] = {
|
570
593
|
.type_name = "q3_K",
|
@@ -576,6 +599,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
576
599
|
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
577
600
|
.vec_dot = ggml_vec_dot_q3_K_q8_K,
|
578
601
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
602
|
+
.nrows = 1,
|
579
603
|
},
|
580
604
|
[GGML_TYPE_Q4_K] = {
|
581
605
|
.type_name = "q4_K",
|
@@ -587,6 +611,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
587
611
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
588
612
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
589
613
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
614
|
+
.nrows = 1,
|
590
615
|
},
|
591
616
|
[GGML_TYPE_Q5_K] = {
|
592
617
|
.type_name = "q5_K",
|
@@ -598,6 +623,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
598
623
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
599
624
|
.vec_dot = ggml_vec_dot_q5_K_q8_K,
|
600
625
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
626
|
+
.nrows = 1,
|
601
627
|
},
|
602
628
|
[GGML_TYPE_Q6_K] = {
|
603
629
|
.type_name = "q6_K",
|
@@ -609,6 +635,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
609
635
|
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
610
636
|
.vec_dot = ggml_vec_dot_q6_K_q8_K,
|
611
637
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
638
|
+
.nrows = 1,
|
612
639
|
},
|
613
640
|
[GGML_TYPE_IQ2_XXS] = {
|
614
641
|
.type_name = "iq2_xxs",
|
@@ -620,6 +647,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
620
647
|
.from_float_reference = NULL,
|
621
648
|
.vec_dot = ggml_vec_dot_iq2_xxs_q8_K,
|
622
649
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
650
|
+
.nrows = 1,
|
623
651
|
},
|
624
652
|
[GGML_TYPE_IQ2_XS] = {
|
625
653
|
.type_name = "iq2_xs",
|
@@ -631,6 +659,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
631
659
|
.from_float_reference = NULL,
|
632
660
|
.vec_dot = ggml_vec_dot_iq2_xs_q8_K,
|
633
661
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
662
|
+
.nrows = 1,
|
634
663
|
},
|
635
664
|
[GGML_TYPE_IQ3_XXS] = {
|
636
665
|
.type_name = "iq3_xxs",
|
@@ -642,6 +671,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
642
671
|
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
|
643
672
|
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
644
673
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
674
|
+
.nrows = 1,
|
645
675
|
},
|
646
676
|
[GGML_TYPE_Q8_K] = {
|
647
677
|
.type_name = "q8_K",
|
@@ -1212,7 +1242,13 @@ inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x)
|
|
1212
1242
|
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
|
1213
1243
|
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
|
1214
1244
|
|
1215
|
-
static void ggml_vec_dot_f32(
|
1245
|
+
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) {
|
1246
|
+
assert(nrc == 1);
|
1247
|
+
UNUSED(nrc);
|
1248
|
+
UNUSED(bx);
|
1249
|
+
UNUSED(by);
|
1250
|
+
UNUSED(bs);
|
1251
|
+
|
1216
1252
|
#ifdef GGML_SIMD
|
1217
1253
|
float sumf = 0.0f;
|
1218
1254
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
@@ -1249,7 +1285,13 @@ static void ggml_vec_dot_f32(const int n, float * restrict s, const float * rest
|
|
1249
1285
|
*s = sumf;
|
1250
1286
|
}
|
1251
1287
|
|
1252
|
-
static void ggml_vec_dot_f16(
|
1288
|
+
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc) {
|
1289
|
+
assert(nrc == 1);
|
1290
|
+
UNUSED(nrc);
|
1291
|
+
UNUSED(bx);
|
1292
|
+
UNUSED(by);
|
1293
|
+
UNUSED(bs);
|
1294
|
+
|
1253
1295
|
ggml_float sumf = 0.0;
|
1254
1296
|
|
1255
1297
|
#if defined(GGML_SIMD)
|
@@ -1455,7 +1497,7 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
|
1455
1497
|
#endif
|
1456
1498
|
}
|
1457
1499
|
|
1458
|
-
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); }
|
1500
|
+
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, 0, x, 0, x, 0, 1); *s = sqrtf(*s); }
|
1459
1501
|
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
|
1460
1502
|
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
|
1461
1503
|
inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
|
@@ -2607,7 +2649,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2607
2649
|
/*.nb =*/ { 0, 0, 0, 0 },
|
2608
2650
|
/*.op =*/ GGML_OP_NONE,
|
2609
2651
|
/*.op_params =*/ { 0 },
|
2610
|
-
/*.
|
2652
|
+
/*.flags =*/ 0,
|
2611
2653
|
/*.grad =*/ NULL,
|
2612
2654
|
/*.src =*/ { NULL },
|
2613
2655
|
/*.perf_runs =*/ 0,
|
@@ -6509,7 +6551,7 @@ struct ggml_tensor * ggml_cross_entropy_loss_back(
|
|
6509
6551
|
void ggml_set_param(
|
6510
6552
|
struct ggml_context * ctx,
|
6511
6553
|
struct ggml_tensor * tensor) {
|
6512
|
-
tensor->
|
6554
|
+
tensor->flags |= GGML_TENSOR_FLAG_PARAM;
|
6513
6555
|
|
6514
6556
|
GGML_ASSERT(tensor->grad == NULL);
|
6515
6557
|
tensor->grad = ggml_dup_tensor(ctx, tensor);
|
@@ -9992,6 +10034,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9992
10034
|
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
|
9993
10035
|
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
|
9994
10036
|
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
|
10037
|
+
int64_t const vec_dot_num_rows = type_traits[type].nrows;
|
9995
10038
|
|
9996
10039
|
GGML_ASSERT(ne0 == ne01);
|
9997
10040
|
GGML_ASSERT(ne1 == ne11);
|
@@ -10159,12 +10202,23 @@ static void ggml_compute_forward_mul_mat(
|
|
10159
10202
|
const int64_t blck_0 = 16;
|
10160
10203
|
const int64_t blck_1 = 16;
|
10161
10204
|
|
10205
|
+
// dot kernels can handle 1 row and col at a time, but mmla kernels can process 2 rows and cols
|
10206
|
+
int64_t nrc = vec_dot_num_rows;
|
10207
|
+
// TODO: currently the mmla kernels support only even numbered rows/cols.
|
10208
|
+
// this check can be removed once they are extended to support odd numbered rows/cols too
|
10209
|
+
if ((nr0 % 2 != 0) || (ne11 % 2 != 0)) {
|
10210
|
+
nrc = 1;
|
10211
|
+
}
|
10212
|
+
|
10213
|
+
const size_t src1_col_stride = src1_cont || src1->type != vec_dot_type ? row_size : nb11;
|
10214
|
+
|
10162
10215
|
// attempt to reduce false-sharing (does not seem to make a difference)
|
10163
|
-
|
10216
|
+
// 16 * 2, accounting for mmla kernels
|
10217
|
+
float tmp[32];
|
10164
10218
|
|
10165
10219
|
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
10166
10220
|
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
10167
|
-
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111;
|
10221
|
+
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ir1 += nrc) {
|
10168
10222
|
const int64_t i13 = (ir1/(ne12*ne1));
|
10169
10223
|
const int64_t i12 = (ir1 - i13*ne12*ne1)/ne1;
|
10170
10224
|
const int64_t i11 = (ir1 - i13*ne12*ne1 - i12*ne1);
|
@@ -10187,17 +10241,19 @@ static void ggml_compute_forward_mul_mat(
|
|
10187
10241
|
(src1_cont || src1->type != vec_dot_type
|
10188
10242
|
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
10189
10243
|
: (i11*nb11 + i12*nb12 + i13*nb13));
|
10190
|
-
|
10191
10244
|
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
10192
10245
|
|
10193
10246
|
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
10194
10247
|
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
10195
10248
|
//}
|
10196
10249
|
|
10197
|
-
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011;
|
10198
|
-
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
10250
|
+
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ir0 += nrc) {
|
10251
|
+
vec_dot(ne00, &tmp[ir0 - iir0], (nrc>1 ? 16 : 0), src0_row + ir0*nb01, (nrc>1 ? nb01 : 0), src1_col, (nrc>1 ? src1_col_stride : 0), nrc);
|
10252
|
+
}
|
10253
|
+
|
10254
|
+
for (int cn = 0; cn < nrc; ++cn) {
|
10255
|
+
memcpy(&dst_col[iir0 + cn*nb1/nb0], tmp + (cn*16), (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10199
10256
|
}
|
10200
|
-
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10201
10257
|
}
|
10202
10258
|
}
|
10203
10259
|
}
|
@@ -10386,7 +10442,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10386
10442
|
//}
|
10387
10443
|
|
10388
10444
|
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
10389
|
-
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
10445
|
+
vec_dot(ne00, &tmp[ir0 - iir0], 0, src0_row + ir0*nb01, 0, src1_col, 0, 1);
|
10390
10446
|
}
|
10391
10447
|
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10392
10448
|
}
|
@@ -11568,7 +11624,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11568
11624
|
|
11569
11625
|
// linear runtime, no additional memory
|
11570
11626
|
float dot_y_dy = 0;
|
11571
|
-
ggml_vec_dot_f32 (nc, &dot_y_dy, y, dy);
|
11627
|
+
ggml_vec_dot_f32 (nc, &dot_y_dy, 0, y, 0, dy, 0, 1);
|
11572
11628
|
ggml_vec_cpy_f32 (nc, dx, dy);
|
11573
11629
|
ggml_vec_acc1_f32(nc, dx, -dot_y_dy);
|
11574
11630
|
ggml_vec_mul_f32 (nc, dx, dx, y);
|
@@ -12369,9 +12425,9 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12369
12425
|
const int i1n = i10*ne11;
|
12370
12426
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12371
12427
|
float v = 0;
|
12372
|
-
ggml_vec_dot_f16(ne02, &v,
|
12373
|
-
(ggml_fp16_t *) wdata_src + i1n,
|
12374
|
-
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
12428
|
+
ggml_vec_dot_f16(ne02, &v, 0,
|
12429
|
+
(ggml_fp16_t *) wdata_src + i1n, 0,
|
12430
|
+
(ggml_fp16_t *) wdata_kernel + i00*ne02, 0, 1);
|
12375
12431
|
dst_data[i10*s0 + i00] += v;
|
12376
12432
|
}
|
12377
12433
|
}
|
@@ -12466,9 +12522,9 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12466
12522
|
const int i1n = i10*ne11;
|
12467
12523
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12468
12524
|
float v = 0;
|
12469
|
-
ggml_vec_dot_f32(ne02, &v,
|
12470
|
-
wdata_src + i1n,
|
12471
|
-
wdata_kernel + i00*ne02);
|
12525
|
+
ggml_vec_dot_f32(ne02, &v, 0,
|
12526
|
+
wdata_src + i1n, 0,
|
12527
|
+
wdata_kernel + i00*ne02, 0, 1);
|
12472
12528
|
dst_data[i10*s0 + i00] += v;
|
12473
12529
|
}
|
12474
12530
|
}
|
@@ -12783,9 +12839,9 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
12783
12839
|
for (int i01 = 0; i01 < ne01; i01++) {
|
12784
12840
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12785
12841
|
float v = 0;
|
12786
|
-
ggml_vec_dot_f16(ne03, &v,
|
12787
|
-
wdata_src + i1n,
|
12788
|
-
wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
12842
|
+
ggml_vec_dot_f16(ne03, &v, 0,
|
12843
|
+
wdata_src + i1n, 0,
|
12844
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03, 0, 1);
|
12789
12845
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
12790
12846
|
}
|
12791
12847
|
}
|
@@ -13214,9 +13270,9 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13214
13270
|
const int i1 = ik1;
|
13215
13271
|
|
13216
13272
|
ggml_vec_dot_f32(neq0,
|
13217
|
-
S + i1,
|
13218
|
-
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13219
|
-
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13273
|
+
S + i1, 0,
|
13274
|
+
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13275
|
+
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13220
13276
|
}
|
13221
13277
|
|
13222
13278
|
// scale
|
@@ -13299,9 +13355,9 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13299
13355
|
const int iv3 = iq3;
|
13300
13356
|
|
13301
13357
|
ggml_vec_dot_f32(masked_begin,
|
13302
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13303
|
-
(float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)),
|
13304
|
-
S);
|
13358
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13359
|
+
(float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0,
|
13360
|
+
S, 0, 1);
|
13305
13361
|
}
|
13306
13362
|
}
|
13307
13363
|
}
|
@@ -13404,9 +13460,9 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13404
13460
|
const int i1 = ik1;
|
13405
13461
|
|
13406
13462
|
ggml_vec_dot_f16(neq0,
|
13407
|
-
S + i1,
|
13408
|
-
(ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13409
|
-
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13463
|
+
S + i1, 0,
|
13464
|
+
(ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13465
|
+
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13410
13466
|
}
|
13411
13467
|
} else {
|
13412
13468
|
for (int64_t ic = 0; ic < nek1; ic += GGML_VEC_DOT_UNROLL) {
|
@@ -13508,9 +13564,9 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13508
13564
|
const int iv3 = iq3;
|
13509
13565
|
|
13510
13566
|
ggml_vec_dot_f16(nev0,
|
13511
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13512
|
-
(ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)),
|
13513
|
-
S16);
|
13567
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13568
|
+
(ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0,
|
13569
|
+
S16, 0, 1);
|
13514
13570
|
}
|
13515
13571
|
} else {
|
13516
13572
|
for (int64_t ic = 0; ic < nev1; ic += GGML_VEC_DOT_UNROLL) {
|
@@ -13652,9 +13708,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13652
13708
|
const int i1 = ib01;
|
13653
13709
|
|
13654
13710
|
ggml_vec_dot_f16(nea0,
|
13655
|
-
S + i1,
|
13656
|
-
(ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)),
|
13657
|
-
(ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)));
|
13711
|
+
S + i1, 0,
|
13712
|
+
(ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), 0,
|
13713
|
+
(ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)), 0, 1);
|
13658
13714
|
}
|
13659
13715
|
|
13660
13716
|
ggml_vec_add_f32(neb01, S, S, (float *) b1->data);
|
@@ -13677,9 +13733,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13677
13733
|
for (int64_t ic = 0; ic < nec01; ++ic) {
|
13678
13734
|
|
13679
13735
|
ggml_vec_dot_f16(neb01,
|
13680
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13681
|
-
(ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)),
|
13682
|
-
S16);
|
13736
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13737
|
+
(ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), 0,
|
13738
|
+
S16, 0, 1);
|
13683
13739
|
}
|
13684
13740
|
|
13685
13741
|
ggml_vec_add_f32(nec01,
|
@@ -13866,9 +13922,9 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
13866
13922
|
const int i1 = ik1;
|
13867
13923
|
|
13868
13924
|
ggml_vec_dot_f32(neq0,
|
13869
|
-
S + i1,
|
13870
|
-
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13871
|
-
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13925
|
+
S + i1, 0,
|
13926
|
+
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13927
|
+
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13872
13928
|
}
|
13873
13929
|
|
13874
13930
|
// scale
|
@@ -14013,7 +14069,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14013
14069
|
|
14014
14070
|
// S = SM * (S - dot(SM, S))
|
14015
14071
|
float dot_SM_gradSM = 0;
|
14016
|
-
ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, SM, S);
|
14072
|
+
ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, 0, SM, 0, S, 0, 1);
|
14017
14073
|
ggml_vec_acc1_f32(M, S, -dot_SM_gradSM);
|
14018
14074
|
ggml_vec_mul_f32 (masked_begin, S, S, SM);
|
14019
14075
|
|
@@ -15311,7 +15367,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
15311
15367
|
return NULL;
|
15312
15368
|
}
|
15313
15369
|
|
15314
|
-
if (node->
|
15370
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
15315
15371
|
return node;
|
15316
15372
|
}
|
15317
15373
|
|
@@ -15345,7 +15401,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
15345
15401
|
|
15346
15402
|
clone->op = node->op;
|
15347
15403
|
clone->grad = node->grad;
|
15348
|
-
clone->
|
15404
|
+
clone->flags = node->flags;
|
15349
15405
|
clone->extra = node->extra;
|
15350
15406
|
for (int k = 0; k < GGML_MAX_DIMS; ++k) {
|
15351
15407
|
clone->nb[k] = node->nb[k];
|
@@ -16377,7 +16433,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
16377
16433
|
for (int i = 0; i < gf->n_nodes; i++) {
|
16378
16434
|
struct ggml_tensor * node = gf->nodes[i];
|
16379
16435
|
|
16380
|
-
if (node->
|
16436
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
16381
16437
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16382
16438
|
ggml_build_forward_expand(gb, node->grad);
|
16383
16439
|
}
|
@@ -16649,7 +16705,7 @@ struct ggml_compute_state_shared {
|
|
16649
16705
|
atomic_int node_n; // active graph node
|
16650
16706
|
atomic_int node_task; // active graph node task phase
|
16651
16707
|
|
16652
|
-
|
16708
|
+
ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
|
16653
16709
|
void * abort_callback_data;
|
16654
16710
|
};
|
16655
16711
|
|
@@ -17862,7 +17918,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17862
17918
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17863
17919
|
i,
|
17864
17920
|
node->ne[0], node->ne[1], node->ne[2],
|
17865
|
-
ggml_op_name(node->op), node->
|
17921
|
+
ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17866
17922
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17867
17923
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17868
17924
|
(double) node->perf_time_us / 1000.0,
|
@@ -17955,7 +18011,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17955
18011
|
continue;
|
17956
18012
|
}
|
17957
18013
|
|
17958
|
-
if (node->
|
18014
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
17959
18015
|
snprintf(color, sizeof(color), "yellow");
|
17960
18016
|
} else if (node->grad) {
|
17961
18017
|
if (ggml_graph_find(gf, node)) {
|
@@ -18129,7 +18185,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18129
18185
|
int np = 0;
|
18130
18186
|
int64_t nx = 0;
|
18131
18187
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18132
|
-
if (gf->nodes[i]->
|
18188
|
+
if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
|
18133
18189
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
18134
18190
|
|
18135
18191
|
GGML_ASSERT(np < GGML_MAX_PARAMS);
|
@@ -18382,7 +18438,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18382
18438
|
}
|
18383
18439
|
|
18384
18440
|
// compute the initial gradient in the search direction
|
18385
|
-
ggml_vec_dot_f32(nx, &dginit, g, d);
|
18441
|
+
ggml_vec_dot_f32(nx, &dginit, 0, g, 0, d, 0, 1);
|
18386
18442
|
|
18387
18443
|
// make sure that d points to a descent direction
|
18388
18444
|
if (0 < dginit) {
|
@@ -18432,7 +18488,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18432
18488
|
return count;
|
18433
18489
|
}
|
18434
18490
|
|
18435
|
-
ggml_vec_dot_f32(nx, &dg, g, d);
|
18491
|
+
ggml_vec_dot_f32(nx, &dg, 0, g, 0, d, 0, 1);
|
18436
18492
|
|
18437
18493
|
// check the Wolfe condition
|
18438
18494
|
if (dg < params->lbfgs.wolfe * dginit) {
|
@@ -18492,7 +18548,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18492
18548
|
int np = 0;
|
18493
18549
|
int nx = 0;
|
18494
18550
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18495
|
-
if (gf->nodes[i]->
|
18551
|
+
if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
|
18496
18552
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
18497
18553
|
|
18498
18554
|
GGML_ASSERT(np < GGML_MAX_PARAMS);
|
@@ -18693,8 +18749,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18693
18749
|
// ys = y^t \cdot s -> 1 / \rho.
|
18694
18750
|
// yy = y^t \cdot y.
|
18695
18751
|
//
|
18696
|
-
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]);
|
18697
|
-
ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]);
|
18752
|
+
ggml_vec_dot_f32(nx, &ys, 0, &lm_y[end[0]*nx], 0, &lm_s[end[0]*nx], 0, 1);
|
18753
|
+
ggml_vec_dot_f32(nx, &yy, 0, &lm_y[end[0]*nx], 0, &lm_y[end[0]*nx], 0, 1);
|
18698
18754
|
|
18699
18755
|
lm_ys[end[0]] = ys;
|
18700
18756
|
|
@@ -18713,7 +18769,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18713
18769
|
for (int i = 0; i < bound; ++i) {
|
18714
18770
|
j[0] = (j[0] + m - 1) % m;
|
18715
18771
|
// \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}
|
18716
|
-
ggml_vec_dot_f32(nx, &lm_alpha[j[0]], &lm_s[j[0]*nx], d);
|
18772
|
+
ggml_vec_dot_f32(nx, &lm_alpha[j[0]], 0, &lm_s[j[0]*nx], 0, d, 0, 1);
|
18717
18773
|
lm_alpha[j[0]] /= lm_ys[j[0]];
|
18718
18774
|
// q_{i} = q_{i+1} - \alpha_{i} y_{i}
|
18719
18775
|
ggml_vec_mad_f32(nx, d, &lm_y[j[0]*nx], -lm_alpha[j[0]]);
|
@@ -18723,7 +18779,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18723
18779
|
|
18724
18780
|
for (int i = 0; i < bound; ++i) {
|
18725
18781
|
// \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}
|
18726
|
-
ggml_vec_dot_f32(nx, &beta, &lm_y[j[0]*nx], d);
|
18782
|
+
ggml_vec_dot_f32(nx, &beta, 0, &lm_y[j[0]*nx], 0, d, 0, 1);
|
18727
18783
|
beta /= lm_ys[j[0]];
|
18728
18784
|
// \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}
|
18729
18785
|
ggml_vec_mad_f32(nx, d, &lm_s[j[0]*nx], lm_alpha[j[0]] - beta);
|
@@ -18967,6 +19023,16 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
18967
19023
|
|
18968
19024
|
////////////////////////////////////////////////////////////////////////////////
|
18969
19025
|
|
19026
|
+
void ggml_set_input(struct ggml_tensor * tensor) {
|
19027
|
+
tensor->flags |= GGML_TENSOR_FLAG_INPUT;
|
19028
|
+
}
|
19029
|
+
|
19030
|
+
void ggml_set_output(struct ggml_tensor * tensor) {
|
19031
|
+
tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
|
19032
|
+
}
|
19033
|
+
|
19034
|
+
////////////////////////////////////////////////////////////////////////////////
|
19035
|
+
|
18970
19036
|
void ggml_quantize_init(enum ggml_type type) {
|
18971
19037
|
ggml_critical_section_start();
|
18972
19038
|
|
@@ -20611,4 +20677,12 @@ int ggml_cpu_has_vsx(void) {
|
|
20611
20677
|
#endif
|
20612
20678
|
}
|
20613
20679
|
|
20680
|
+
int ggml_cpu_has_matmul_int8(void) {
|
20681
|
+
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
20682
|
+
return 1;
|
20683
|
+
#else
|
20684
|
+
return 0;
|
20685
|
+
#endif
|
20686
|
+
}
|
20687
|
+
|
20614
20688
|
////////////////////////////////////////////////////////////////////////////////
|
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -505,11 +505,17 @@ extern "C" {
|
|
505
505
|
|
506
506
|
enum ggml_log_level {
|
507
507
|
GGML_LOG_LEVEL_ERROR = 2,
|
508
|
-
GGML_LOG_LEVEL_WARN
|
509
|
-
GGML_LOG_LEVEL_INFO
|
508
|
+
GGML_LOG_LEVEL_WARN = 3,
|
509
|
+
GGML_LOG_LEVEL_INFO = 4,
|
510
510
|
GGML_LOG_LEVEL_DEBUG = 5
|
511
511
|
};
|
512
512
|
|
513
|
+
enum ggml_tensor_flag {
|
514
|
+
GGML_TENSOR_FLAG_INPUT = 1,
|
515
|
+
GGML_TENSOR_FLAG_OUTPUT = 2,
|
516
|
+
GGML_TENSOR_FLAG_PARAM = 4,
|
517
|
+
};
|
518
|
+
|
513
519
|
// ggml object
|
514
520
|
struct ggml_object {
|
515
521
|
size_t offs;
|
@@ -543,7 +549,7 @@ extern "C" {
|
|
543
549
|
// op params - allocated as int32_t for alignment
|
544
550
|
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
545
551
|
|
546
|
-
|
552
|
+
int32_t flags;
|
547
553
|
|
548
554
|
struct ggml_tensor * grad;
|
549
555
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
@@ -567,6 +573,11 @@ extern "C" {
|
|
567
573
|
|
568
574
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
569
575
|
|
576
|
+
// Abort callback
|
577
|
+
// If not NULL, called before ggml computation
|
578
|
+
// If it returns true, the computation is aborted
|
579
|
+
typedef bool (*ggml_abort_callback)(void * data);
|
580
|
+
|
570
581
|
// the compute plan that needs to be prepared for ggml_graph_compute()
|
571
582
|
// since https://github.com/ggerganov/ggml/issues/287
|
572
583
|
struct ggml_cplan {
|
@@ -576,8 +587,8 @@ extern "C" {
|
|
576
587
|
int n_threads;
|
577
588
|
|
578
589
|
// abort ggml_graph_compute when true
|
579
|
-
|
580
|
-
void *
|
590
|
+
ggml_abort_callback abort_callback;
|
591
|
+
void * abort_callback_data;
|
581
592
|
};
|
582
593
|
|
583
594
|
enum ggml_cgraph_eval_order {
|
@@ -2087,6 +2098,12 @@ extern "C" {
|
|
2087
2098
|
ggml_opt_callback callback,
|
2088
2099
|
void * callback_data);
|
2089
2100
|
|
2101
|
+
//
|
2102
|
+
// tensor flags
|
2103
|
+
//
|
2104
|
+
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
2105
|
+
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
2106
|
+
|
2090
2107
|
//
|
2091
2108
|
// quantization
|
2092
2109
|
//
|
@@ -2273,6 +2290,7 @@ extern "C" {
|
|
2273
2290
|
GGML_API int ggml_cpu_has_ssse3 (void);
|
2274
2291
|
GGML_API int ggml_cpu_has_sycl (void);
|
2275
2292
|
GGML_API int ggml_cpu_has_vsx (void);
|
2293
|
+
GGML_API int ggml_cpu_has_matmul_int8(void);
|
2276
2294
|
|
2277
2295
|
//
|
2278
2296
|
// Internal types and functions exposed for tests and benchmarks
|
@@ -2286,7 +2304,8 @@ extern "C" {
|
|
2286
2304
|
#endif
|
2287
2305
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
2288
2306
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
2289
|
-
typedef void (*ggml_vec_dot_t) (
|
2307
|
+
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
2308
|
+
const void * GGML_RESTRICT y, size_t by, int nrc);
|
2290
2309
|
|
2291
2310
|
typedef struct {
|
2292
2311
|
const char * type_name;
|
@@ -2298,6 +2317,7 @@ extern "C" {
|
|
2298
2317
|
ggml_from_float_t from_float_reference;
|
2299
2318
|
ggml_vec_dot_t vec_dot;
|
2300
2319
|
enum ggml_type vec_dot_type;
|
2320
|
+
int64_t nrows; // number of rows to process simultaneously;
|
2301
2321
|
} ggml_type_traits_t;
|
2302
2322
|
|
2303
2323
|
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|