llama_cpp 0.12.5 → 0.12.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/ext/llama_cpp/llama_cpp.cpp +67 -10
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +15 -1
- data/vendor/tmp/llama.cpp/Makefile +51 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +595 -492
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +268 -271
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +560 -346
- data/vendor/tmp/llama.cpp/ggml-impl.h +20 -7
- data/vendor/tmp/llama.cpp/ggml-metal.m +101 -11
- data/vendor/tmp/llama.cpp/ggml-metal.metal +608 -9
- data/vendor/tmp/llama.cpp/ggml-quants.c +1255 -94
- data/vendor/tmp/llama.cpp/ggml-quants.h +39 -16
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +95 -264
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +213 -58
- data/vendor/tmp/llama.cpp/ggml.c +1082 -564
- data/vendor/tmp/llama.cpp/ggml.h +50 -17
- data/vendor/tmp/llama.cpp/llama.cpp +1329 -280
- data/vendor/tmp/llama.cpp/llama.h +43 -1
- data/vendor/tmp/llama.cpp/scripts/get-flags.mk +1 -1
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -23,6 +23,9 @@
|
|
23
23
|
#include <limits.h>
|
24
24
|
#include <stdarg.h>
|
25
25
|
#include <signal.h>
|
26
|
+
#if defined(__gnu_linux__)
|
27
|
+
#include <syscall.h>
|
28
|
+
#endif
|
26
29
|
|
27
30
|
#ifdef GGML_USE_METAL
|
28
31
|
#include <unistd.h>
|
@@ -270,6 +273,8 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|
270
273
|
#include <Accelerate/Accelerate.h>
|
271
274
|
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
272
275
|
#include "ggml-opencl.h"
|
276
|
+
#elif defined(GGML_USE_VULKAN)
|
277
|
+
#include "ggml-vulkan.h"
|
273
278
|
#endif
|
274
279
|
#elif defined(GGML_USE_OPENBLAS)
|
275
280
|
#if defined(GGML_BLAS_USE_MKL)
|
@@ -318,7 +323,7 @@ float ggml_table_f32_f16[1 << 16];
|
|
318
323
|
// note: do not use these inside ggml.c
|
319
324
|
// these are meant to be used via the ggml.h API
|
320
325
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
321
|
-
return
|
326
|
+
return GGML_FP16_TO_FP32(x);
|
322
327
|
}
|
323
328
|
|
324
329
|
ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
@@ -428,8 +433,8 @@ int64_t ggml_cycles_per_ms(void) {
|
|
428
433
|
|
429
434
|
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
430
435
|
|
431
|
-
static void ggml_vec_dot_f32(
|
432
|
-
static void ggml_vec_dot_f16(
|
436
|
+
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
|
437
|
+
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
433
438
|
|
434
439
|
static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
435
440
|
[GGML_TYPE_I8] = {
|
@@ -457,6 +462,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
457
462
|
.is_quantized = false,
|
458
463
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
459
464
|
.vec_dot_type = GGML_TYPE_F32,
|
465
|
+
.nrows = 1,
|
460
466
|
},
|
461
467
|
[GGML_TYPE_F16] = {
|
462
468
|
.type_name = "f16",
|
@@ -468,6 +474,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
468
474
|
.from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
469
475
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
|
470
476
|
.vec_dot_type = GGML_TYPE_F16,
|
477
|
+
.nrows = 1,
|
471
478
|
},
|
472
479
|
[GGML_TYPE_Q4_0] = {
|
473
480
|
.type_name = "q4_0",
|
@@ -479,6 +486,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
479
486
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
480
487
|
.vec_dot = ggml_vec_dot_q4_0_q8_0,
|
481
488
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
489
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
490
|
+
.nrows = 2,
|
491
|
+
#else
|
492
|
+
.nrows = 1,
|
493
|
+
#endif
|
482
494
|
},
|
483
495
|
[GGML_TYPE_Q4_1] = {
|
484
496
|
.type_name = "q4_1",
|
@@ -490,6 +502,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
490
502
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
491
503
|
.vec_dot = ggml_vec_dot_q4_1_q8_1,
|
492
504
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
505
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
506
|
+
.nrows = 2,
|
507
|
+
#else
|
508
|
+
.nrows = 1,
|
509
|
+
#endif
|
493
510
|
},
|
494
511
|
[4] = { // GGML_TYPE_Q4_2
|
495
512
|
.type_name = "DEPRECATED",
|
@@ -501,6 +518,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
501
518
|
.from_float_reference = NULL,
|
502
519
|
.vec_dot = NULL,
|
503
520
|
.vec_dot_type = GGML_TYPE_COUNT,
|
521
|
+
.nrows = 1,
|
504
522
|
},
|
505
523
|
[5] = { // GGML_TYPE_Q4_3
|
506
524
|
.type_name = "DEPRECATED",
|
@@ -512,6 +530,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
512
530
|
.from_float_reference = NULL,
|
513
531
|
.vec_dot = NULL,
|
514
532
|
.vec_dot_type = GGML_TYPE_COUNT,
|
533
|
+
.nrows = 1,
|
515
534
|
},
|
516
535
|
[GGML_TYPE_Q5_0] = {
|
517
536
|
.type_name = "q5_0",
|
@@ -523,6 +542,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
523
542
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
524
543
|
.vec_dot = ggml_vec_dot_q5_0_q8_0,
|
525
544
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
545
|
+
.nrows = 1,
|
526
546
|
},
|
527
547
|
[GGML_TYPE_Q5_1] = {
|
528
548
|
.type_name = "q5_1",
|
@@ -534,6 +554,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
534
554
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
535
555
|
.vec_dot = ggml_vec_dot_q5_1_q8_1,
|
536
556
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
557
|
+
.nrows = 1,
|
537
558
|
},
|
538
559
|
[GGML_TYPE_Q8_0] = {
|
539
560
|
.type_name = "q8_0",
|
@@ -545,6 +566,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
545
566
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
546
567
|
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
547
568
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
569
|
+
#if defined (__ARM_FEATURE_MATMUL_INT8)
|
570
|
+
.nrows = 2,
|
571
|
+
#else
|
572
|
+
.nrows = 1,
|
573
|
+
#endif
|
548
574
|
},
|
549
575
|
[GGML_TYPE_Q8_1] = {
|
550
576
|
.type_name = "q8_1",
|
@@ -554,6 +580,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
554
580
|
.from_float = quantize_row_q8_1,
|
555
581
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
556
582
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
583
|
+
.nrows = 1,
|
557
584
|
},
|
558
585
|
[GGML_TYPE_Q2_K] = {
|
559
586
|
.type_name = "q2_K",
|
@@ -565,6 +592,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
565
592
|
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
566
593
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
567
594
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
595
|
+
.nrows = 1,
|
568
596
|
},
|
569
597
|
[GGML_TYPE_Q3_K] = {
|
570
598
|
.type_name = "q3_K",
|
@@ -576,6 +604,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
576
604
|
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
577
605
|
.vec_dot = ggml_vec_dot_q3_K_q8_K,
|
578
606
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
607
|
+
.nrows = 1,
|
579
608
|
},
|
580
609
|
[GGML_TYPE_Q4_K] = {
|
581
610
|
.type_name = "q4_K",
|
@@ -587,6 +616,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
587
616
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
588
617
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
589
618
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
619
|
+
.nrows = 1,
|
590
620
|
},
|
591
621
|
[GGML_TYPE_Q5_K] = {
|
592
622
|
.type_name = "q5_K",
|
@@ -598,6 +628,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
598
628
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
599
629
|
.vec_dot = ggml_vec_dot_q5_K_q8_K,
|
600
630
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
631
|
+
.nrows = 1,
|
601
632
|
},
|
602
633
|
[GGML_TYPE_Q6_K] = {
|
603
634
|
.type_name = "q6_K",
|
@@ -609,6 +640,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
609
640
|
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
610
641
|
.vec_dot = ggml_vec_dot_q6_K_q8_K,
|
611
642
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
643
|
+
.nrows = 1,
|
612
644
|
},
|
613
645
|
[GGML_TYPE_IQ2_XXS] = {
|
614
646
|
.type_name = "iq2_xxs",
|
@@ -620,6 +652,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
620
652
|
.from_float_reference = NULL,
|
621
653
|
.vec_dot = ggml_vec_dot_iq2_xxs_q8_K,
|
622
654
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
655
|
+
.nrows = 1,
|
623
656
|
},
|
624
657
|
[GGML_TYPE_IQ2_XS] = {
|
625
658
|
.type_name = "iq2_xs",
|
@@ -631,6 +664,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
631
664
|
.from_float_reference = NULL,
|
632
665
|
.vec_dot = ggml_vec_dot_iq2_xs_q8_K,
|
633
666
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
667
|
+
.nrows = 1,
|
634
668
|
},
|
635
669
|
[GGML_TYPE_IQ3_XXS] = {
|
636
670
|
.type_name = "iq3_xxs",
|
@@ -642,6 +676,31 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
642
676
|
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
|
643
677
|
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
644
678
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
679
|
+
.nrows = 1,
|
680
|
+
},
|
681
|
+
[GGML_TYPE_IQ1_S] = {
|
682
|
+
.type_name = "iq1_s",
|
683
|
+
.blck_size = QK_K,
|
684
|
+
.type_size = sizeof(block_iq1_s),
|
685
|
+
.is_quantized = true,
|
686
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq1_s,
|
687
|
+
.from_float = NULL,
|
688
|
+
.from_float_reference = NULL,
|
689
|
+
.vec_dot = ggml_vec_dot_iq1_s_q8_K,
|
690
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
691
|
+
.nrows = 1,
|
692
|
+
},
|
693
|
+
[GGML_TYPE_IQ4_NL] = {
|
694
|
+
.type_name = "iq4_nl",
|
695
|
+
.blck_size = QK4_NL,
|
696
|
+
.type_size = sizeof(block_iq4_nl),
|
697
|
+
.is_quantized = true,
|
698
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
699
|
+
.from_float = quantize_row_iq4_nl,
|
700
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_nl_reference,
|
701
|
+
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
|
702
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
703
|
+
.nrows = 1,
|
645
704
|
},
|
646
705
|
[GGML_TYPE_Q8_K] = {
|
647
706
|
.type_name = "q8_K",
|
@@ -739,7 +798,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
739
798
|
#define GGML_F16x8 float16x8_t
|
740
799
|
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
741
800
|
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
742
|
-
#define GGML_F16x8_LOAD
|
801
|
+
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
|
743
802
|
#define GGML_F16x8_STORE vst1q_f16
|
744
803
|
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
745
804
|
#define GGML_F16x8_ADD vaddq_f16
|
@@ -782,7 +841,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
782
841
|
#define GGML_F32Cx4 float32x4_t
|
783
842
|
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
784
843
|
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
785
|
-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x))
|
844
|
+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
|
786
845
|
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
787
846
|
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
788
847
|
#define GGML_F32Cx4_ADD vaddq_f32
|
@@ -838,7 +897,7 @@ do { \
|
|
838
897
|
const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
|
839
898
|
_mm256_extractf128_ps(x[0], 1)); \
|
840
899
|
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
841
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1));
|
900
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
842
901
|
} while (0)
|
843
902
|
// TODO: is this optimal ?
|
844
903
|
|
@@ -1119,7 +1178,7 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
|
|
1119
1178
|
x[i] = _mm_add_ps(x[i], x[offset+i]); \
|
1120
1179
|
} \
|
1121
1180
|
const __m128 t0 = _mm_hadd_ps(x[0], x[0]); \
|
1122
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t0, t0));
|
1181
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t0, t0)); \
|
1123
1182
|
}
|
1124
1183
|
// TODO: is this optimal ?
|
1125
1184
|
|
@@ -1212,7 +1271,13 @@ inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x)
|
|
1212
1271
|
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
|
1213
1272
|
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
|
1214
1273
|
|
1215
|
-
static void ggml_vec_dot_f32(
|
1274
|
+
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) {
|
1275
|
+
assert(nrc == 1);
|
1276
|
+
UNUSED(nrc);
|
1277
|
+
UNUSED(bx);
|
1278
|
+
UNUSED(by);
|
1279
|
+
UNUSED(bs);
|
1280
|
+
|
1216
1281
|
#ifdef GGML_SIMD
|
1217
1282
|
float sumf = 0.0f;
|
1218
1283
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
@@ -1249,7 +1314,13 @@ static void ggml_vec_dot_f32(const int n, float * restrict s, const float * rest
|
|
1249
1314
|
*s = sumf;
|
1250
1315
|
}
|
1251
1316
|
|
1252
|
-
static void ggml_vec_dot_f16(
|
1317
|
+
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc) {
|
1318
|
+
assert(nrc == 1);
|
1319
|
+
UNUSED(nrc);
|
1320
|
+
UNUSED(bx);
|
1321
|
+
UNUSED(by);
|
1322
|
+
UNUSED(bs);
|
1323
|
+
|
1253
1324
|
ggml_float sumf = 0.0;
|
1254
1325
|
|
1255
1326
|
#if defined(GGML_SIMD)
|
@@ -1455,7 +1526,7 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
|
1455
1526
|
#endif
|
1456
1527
|
}
|
1457
1528
|
|
1458
|
-
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); }
|
1529
|
+
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, 0, x, 0, x, 0, 1); *s = sqrtf(*s); }
|
1459
1530
|
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
|
1460
1531
|
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
|
1461
1532
|
inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
|
@@ -1912,9 +1983,16 @@ struct ggml_numa_node {
|
|
1912
1983
|
};
|
1913
1984
|
|
1914
1985
|
struct ggml_numa_nodes {
|
1986
|
+
enum ggml_numa_strategy numa_strategy;
|
1915
1987
|
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
|
1916
1988
|
uint32_t n_nodes;
|
1917
1989
|
uint32_t total_cpus; // hardware threads on system
|
1990
|
+
uint32_t current_node; // node on which main process is execting
|
1991
|
+
#if defined(__gnu_linux__)
|
1992
|
+
cpu_set_t cpuset; // cpuset from numactl
|
1993
|
+
#else
|
1994
|
+
uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
|
1995
|
+
#endif
|
1918
1996
|
};
|
1919
1997
|
|
1920
1998
|
//
|
@@ -1948,18 +2026,40 @@ inline static void ggml_critical_section_end(void) {
|
|
1948
2026
|
atomic_fetch_sub(&g_state_barrier, 1);
|
1949
2027
|
}
|
1950
2028
|
|
1951
|
-
|
2029
|
+
#if defined(__gnu_linux__)
|
2030
|
+
static cpu_set_t ggml_get_numa_affinity(void) {
|
2031
|
+
cpu_set_t cpuset;
|
2032
|
+
pthread_t thread;
|
2033
|
+
thread = pthread_self();
|
2034
|
+
CPU_ZERO(&cpuset);
|
2035
|
+
pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
2036
|
+
return cpuset;
|
2037
|
+
}
|
2038
|
+
#else
|
2039
|
+
static uint32_t ggml_get_numa_affinity(void) {
|
2040
|
+
return 0; // no NUMA support
|
2041
|
+
}
|
2042
|
+
#endif
|
2043
|
+
|
2044
|
+
void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
1952
2045
|
if (g_state.numa.n_nodes > 0) {
|
1953
2046
|
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
|
1954
2047
|
|
1955
2048
|
return;
|
1956
2049
|
}
|
1957
2050
|
|
1958
|
-
#
|
2051
|
+
#if defined(__gnu_linux__)
|
1959
2052
|
struct stat st;
|
1960
2053
|
char path[256];
|
1961
2054
|
int rv;
|
1962
2055
|
|
2056
|
+
// set numa scheme
|
2057
|
+
g_state.numa.numa_strategy = numa_flag;
|
2058
|
+
|
2059
|
+
GGML_PRINT_DEBUG("numa strategy %u\n",g_state.numa.numa_strategy);
|
2060
|
+
|
2061
|
+
g_state.numa.cpuset = ggml_get_numa_affinity();
|
2062
|
+
|
1963
2063
|
// enumerate nodes
|
1964
2064
|
while (g_state.numa.n_nodes < GGML_NUMA_MAX_NODES) {
|
1965
2065
|
rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", g_state.numa.n_nodes);
|
@@ -1978,11 +2078,23 @@ void ggml_numa_init(void) {
|
|
1978
2078
|
|
1979
2079
|
GGML_PRINT_DEBUG("found %u numa nodes, %u CPUs\n", g_state.numa.n_nodes, g_state.numa.total_cpus);
|
1980
2080
|
|
1981
|
-
|
2081
|
+
// figure out which node we're on
|
2082
|
+
uint current_cpu;
|
2083
|
+
int getcpu_ret = 0;
|
2084
|
+
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 28)
|
2085
|
+
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
2086
|
+
#else
|
2087
|
+
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
2088
|
+
getcpu_ret = syscall(SYS_getcpu,¤t_cpu,&g_state.numa.current_node);
|
2089
|
+
#endif
|
2090
|
+
|
2091
|
+
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
1982
2092
|
g_state.numa.n_nodes = 0;
|
1983
2093
|
return;
|
1984
2094
|
}
|
1985
2095
|
|
2096
|
+
GGML_PRINT_DEBUG("found our process on numa node %u, CPU %u\n", g_state.numa.current_node, current_cpu);
|
2097
|
+
|
1986
2098
|
for (uint32_t n = 0; n < g_state.numa.n_nodes; ++n) {
|
1987
2099
|
struct ggml_numa_node * node = &g_state.numa.nodes[n];
|
1988
2100
|
GGML_PRINT_DEBUG("CPUs on node %u:", n);
|
@@ -2009,6 +2121,7 @@ void ggml_numa_init(void) {
|
|
2009
2121
|
}
|
2010
2122
|
}
|
2011
2123
|
#else
|
2124
|
+
GGML_UNUSED(numa_flag);
|
2012
2125
|
// TODO
|
2013
2126
|
#endif
|
2014
2127
|
}
|
@@ -2189,6 +2302,8 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2189
2302
|
case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
|
2190
2303
|
case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
|
2191
2304
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2305
|
+
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
2306
|
+
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
2192
2307
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2193
2308
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2194
2309
|
}
|
@@ -2607,7 +2722,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2607
2722
|
/*.nb =*/ { 0, 0, 0, 0 },
|
2608
2723
|
/*.op =*/ GGML_OP_NONE,
|
2609
2724
|
/*.op_params =*/ { 0 },
|
2610
|
-
/*.
|
2725
|
+
/*.flags =*/ 0,
|
2611
2726
|
/*.grad =*/ NULL,
|
2612
2727
|
/*.src =*/ { NULL },
|
2613
2728
|
/*.perf_runs =*/ 0,
|
@@ -3142,7 +3257,7 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
|
3142
3257
|
}
|
3143
3258
|
|
3144
3259
|
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
|
3145
|
-
strncpy(tensor->name, name, sizeof(tensor->name));
|
3260
|
+
strncpy(tensor->name, name, sizeof(tensor->name) - 1);
|
3146
3261
|
tensor->name[sizeof(tensor->name) - 1] = '\0';
|
3147
3262
|
return tensor;
|
3148
3263
|
}
|
@@ -5018,16 +5133,28 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5018
5133
|
struct ggml_context * ctx,
|
5019
5134
|
struct ggml_tensor * a,
|
5020
5135
|
struct ggml_tensor * mask,
|
5136
|
+
struct ggml_tensor * pos,
|
5021
5137
|
float scale,
|
5138
|
+
float max_bias,
|
5022
5139
|
bool inplace) {
|
5023
5140
|
GGML_ASSERT(ggml_is_contiguous(a));
|
5141
|
+
|
5024
5142
|
if (mask) {
|
5025
5143
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
5026
|
-
GGML_ASSERT(mask
|
5027
|
-
GGML_ASSERT(mask->ne[3] == 1);
|
5144
|
+
GGML_ASSERT(ggml_is_matrix(mask));
|
5028
5145
|
GGML_ASSERT(ggml_can_repeat_rows(mask, a));
|
5029
5146
|
}
|
5030
5147
|
|
5148
|
+
if (pos) {
|
5149
|
+
GGML_ASSERT(ggml_is_vector(pos));
|
5150
|
+
GGML_ASSERT(pos->type == GGML_TYPE_F32);
|
5151
|
+
GGML_ASSERT(pos->ne[0] == a->ne[0]);
|
5152
|
+
}
|
5153
|
+
|
5154
|
+
if (max_bias > 0.0f) {
|
5155
|
+
GGML_ASSERT(pos);
|
5156
|
+
}
|
5157
|
+
|
5031
5158
|
bool is_node = false;
|
5032
5159
|
|
5033
5160
|
if (a->grad) {
|
@@ -5036,13 +5163,14 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5036
5163
|
|
5037
5164
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5038
5165
|
|
5039
|
-
float params[] = { scale };
|
5166
|
+
float params[] = { scale, max_bias };
|
5040
5167
|
ggml_set_op_params(result, params, sizeof(params));
|
5041
5168
|
|
5042
5169
|
result->op = GGML_OP_SOFT_MAX;
|
5043
5170
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5044
5171
|
result->src[0] = a;
|
5045
5172
|
result->src[1] = mask;
|
5173
|
+
result->src[2] = pos;
|
5046
5174
|
|
5047
5175
|
return result;
|
5048
5176
|
}
|
@@ -5050,21 +5178,23 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5050
5178
|
struct ggml_tensor * ggml_soft_max(
|
5051
5179
|
struct ggml_context * ctx,
|
5052
5180
|
struct ggml_tensor * a) {
|
5053
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, false);
|
5181
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, false);
|
5054
5182
|
}
|
5055
5183
|
|
5056
5184
|
struct ggml_tensor * ggml_soft_max_inplace(
|
5057
5185
|
struct ggml_context * ctx,
|
5058
5186
|
struct ggml_tensor * a) {
|
5059
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, true);
|
5187
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, true);
|
5060
5188
|
}
|
5061
5189
|
|
5062
5190
|
struct ggml_tensor * ggml_soft_max_ext(
|
5063
5191
|
struct ggml_context * ctx,
|
5064
5192
|
struct ggml_tensor * a,
|
5065
5193
|
struct ggml_tensor * mask,
|
5066
|
-
|
5067
|
-
|
5194
|
+
struct ggml_tensor * pos,
|
5195
|
+
float scale,
|
5196
|
+
float max_bias) {
|
5197
|
+
return ggml_soft_max_impl(ctx, a, mask, pos, scale, max_bias, false);
|
5068
5198
|
}
|
5069
5199
|
|
5070
5200
|
// ggml_soft_max_back
|
@@ -5514,7 +5644,9 @@ struct ggml_tensor * ggml_conv_2d(
|
|
5514
5644
|
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
5515
5645
|
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
5516
5646
|
|
5517
|
-
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2],
|
5647
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
|
5648
|
+
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
|
5649
|
+
|
5518
5650
|
|
5519
5651
|
return result;
|
5520
5652
|
}
|
@@ -6509,7 +6641,7 @@ struct ggml_tensor * ggml_cross_entropy_loss_back(
|
|
6509
6641
|
void ggml_set_param(
|
6510
6642
|
struct ggml_context * ctx,
|
6511
6643
|
struct ggml_tensor * tensor) {
|
6512
|
-
tensor->
|
6644
|
+
tensor->flags |= GGML_TENSOR_FLAG_PARAM;
|
6513
6645
|
|
6514
6646
|
GGML_ASSERT(tensor->grad == NULL);
|
6515
6647
|
tensor->grad = ggml_dup_tensor(ctx, tensor);
|
@@ -6520,8 +6652,10 @@ void ggml_set_param(
|
|
6520
6652
|
|
6521
6653
|
static void ggml_compute_forward_dup_same_cont(
|
6522
6654
|
const struct ggml_compute_params * params,
|
6523
|
-
const struct ggml_tensor * src0,
|
6524
6655
|
struct ggml_tensor * dst) {
|
6656
|
+
|
6657
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6658
|
+
|
6525
6659
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6526
6660
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
6527
6661
|
GGML_ASSERT(src0->type == dst->type);
|
@@ -6552,8 +6686,10 @@ static void ggml_compute_forward_dup_same_cont(
|
|
6552
6686
|
}
|
6553
6687
|
static void ggml_compute_forward_dup_f16(
|
6554
6688
|
const struct ggml_compute_params * params,
|
6555
|
-
const struct ggml_tensor * src0,
|
6556
6689
|
struct ggml_tensor * dst) {
|
6690
|
+
|
6691
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6692
|
+
|
6557
6693
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6558
6694
|
|
6559
6695
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -6566,7 +6702,7 @@ static void ggml_compute_forward_dup_f16(
|
|
6566
6702
|
const int nth = params->nth; // number of threads
|
6567
6703
|
|
6568
6704
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
6569
|
-
ggml_compute_forward_dup_same_cont(params,
|
6705
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
6570
6706
|
return;
|
6571
6707
|
}
|
6572
6708
|
|
@@ -6823,8 +6959,10 @@ static void ggml_compute_forward_dup_f16(
|
|
6823
6959
|
|
6824
6960
|
static void ggml_compute_forward_dup_f32(
|
6825
6961
|
const struct ggml_compute_params * params,
|
6826
|
-
const struct ggml_tensor * src0,
|
6827
6962
|
struct ggml_tensor * dst) {
|
6963
|
+
|
6964
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6965
|
+
|
6828
6966
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6829
6967
|
|
6830
6968
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -6837,7 +6975,7 @@ static void ggml_compute_forward_dup_f32(
|
|
6837
6975
|
const int nth = params->nth; // number of threads
|
6838
6976
|
|
6839
6977
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
6840
|
-
ggml_compute_forward_dup_same_cont(params,
|
6978
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
6841
6979
|
return;
|
6842
6980
|
}
|
6843
6981
|
|
@@ -7073,8 +7211,10 @@ static void ggml_compute_forward_dup_f32(
|
|
7073
7211
|
// A simplified version of ggml_compute_forward_dup that doesn't do float upcasting, and just plain old memcpy.
|
7074
7212
|
static void ggml_compute_forward_dup_bytes(
|
7075
7213
|
const struct ggml_compute_params * params,
|
7076
|
-
const struct ggml_tensor * src0,
|
7077
7214
|
struct ggml_tensor * dst) {
|
7215
|
+
|
7216
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7217
|
+
|
7078
7218
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
7079
7219
|
GGML_ASSERT(src0->type == dst->type);
|
7080
7220
|
|
@@ -7083,7 +7223,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
7083
7223
|
}
|
7084
7224
|
|
7085
7225
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
7086
|
-
ggml_compute_forward_dup_same_cont(params,
|
7226
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
7087
7227
|
return;
|
7088
7228
|
}
|
7089
7229
|
|
@@ -7222,21 +7362,23 @@ static void ggml_compute_forward_dup_bytes(
|
|
7222
7362
|
|
7223
7363
|
static void ggml_compute_forward_dup(
|
7224
7364
|
const struct ggml_compute_params * params,
|
7225
|
-
const struct ggml_tensor * src0,
|
7226
7365
|
struct ggml_tensor * dst) {
|
7366
|
+
|
7367
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7368
|
+
|
7227
7369
|
if (src0->type == dst->type) {
|
7228
|
-
ggml_compute_forward_dup_bytes(params,
|
7370
|
+
ggml_compute_forward_dup_bytes(params, dst);
|
7229
7371
|
return;
|
7230
7372
|
}
|
7231
7373
|
|
7232
7374
|
switch (src0->type) {
|
7233
7375
|
case GGML_TYPE_F16:
|
7234
7376
|
{
|
7235
|
-
ggml_compute_forward_dup_f16(params,
|
7377
|
+
ggml_compute_forward_dup_f16(params, dst);
|
7236
7378
|
} break;
|
7237
7379
|
case GGML_TYPE_F32:
|
7238
7380
|
{
|
7239
|
-
ggml_compute_forward_dup_f32(params,
|
7381
|
+
ggml_compute_forward_dup_f32(params, dst);
|
7240
7382
|
} break;
|
7241
7383
|
default:
|
7242
7384
|
{
|
@@ -7249,9 +7391,11 @@ static void ggml_compute_forward_dup(
|
|
7249
7391
|
|
7250
7392
|
static void ggml_compute_forward_add_f32(
|
7251
7393
|
const struct ggml_compute_params * params,
|
7252
|
-
const struct ggml_tensor * src0,
|
7253
|
-
const struct ggml_tensor * src1,
|
7254
7394
|
struct ggml_tensor * dst) {
|
7395
|
+
|
7396
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7397
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7398
|
+
|
7255
7399
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
7256
7400
|
|
7257
7401
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7337,9 +7481,11 @@ static void ggml_compute_forward_add_f32(
|
|
7337
7481
|
|
7338
7482
|
static void ggml_compute_forward_add_f16_f32(
|
7339
7483
|
const struct ggml_compute_params * params,
|
7340
|
-
const struct ggml_tensor * src0,
|
7341
|
-
const struct ggml_tensor * src1,
|
7342
7484
|
struct ggml_tensor * dst) {
|
7485
|
+
|
7486
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7487
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7488
|
+
|
7343
7489
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7344
7490
|
|
7345
7491
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7414,9 +7560,11 @@ static void ggml_compute_forward_add_f16_f32(
|
|
7414
7560
|
|
7415
7561
|
static void ggml_compute_forward_add_f16_f16(
|
7416
7562
|
const struct ggml_compute_params * params,
|
7417
|
-
const struct ggml_tensor * src0,
|
7418
|
-
const struct ggml_tensor * src1,
|
7419
7563
|
struct ggml_tensor * dst) {
|
7564
|
+
|
7565
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7566
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7567
|
+
|
7420
7568
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7421
7569
|
|
7422
7570
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7468,9 +7616,11 @@ static void ggml_compute_forward_add_f16_f16(
|
|
7468
7616
|
|
7469
7617
|
static void ggml_compute_forward_add_q_f32(
|
7470
7618
|
const struct ggml_compute_params * params,
|
7471
|
-
const struct ggml_tensor * src0,
|
7472
|
-
const struct ggml_tensor * src1,
|
7473
7619
|
struct ggml_tensor * dst) {
|
7620
|
+
|
7621
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7622
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7623
|
+
|
7474
7624
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7475
7625
|
|
7476
7626
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7546,14 +7696,16 @@ static void ggml_compute_forward_add_q_f32(
|
|
7546
7696
|
|
7547
7697
|
static void ggml_compute_forward_add(
|
7548
7698
|
const struct ggml_compute_params * params,
|
7549
|
-
const struct ggml_tensor * src0,
|
7550
|
-
const struct ggml_tensor * src1,
|
7551
7699
|
struct ggml_tensor * dst) {
|
7700
|
+
|
7701
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7702
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7703
|
+
|
7552
7704
|
switch (src0->type) {
|
7553
7705
|
case GGML_TYPE_F32:
|
7554
7706
|
{
|
7555
7707
|
if (src1->type == GGML_TYPE_F32) {
|
7556
|
-
ggml_compute_forward_add_f32(params,
|
7708
|
+
ggml_compute_forward_add_f32(params, dst);
|
7557
7709
|
}
|
7558
7710
|
else {
|
7559
7711
|
GGML_ASSERT(false);
|
@@ -7562,10 +7714,10 @@ static void ggml_compute_forward_add(
|
|
7562
7714
|
case GGML_TYPE_F16:
|
7563
7715
|
{
|
7564
7716
|
if (src1->type == GGML_TYPE_F16) {
|
7565
|
-
ggml_compute_forward_add_f16_f16(params,
|
7717
|
+
ggml_compute_forward_add_f16_f16(params, dst);
|
7566
7718
|
}
|
7567
7719
|
else if (src1->type == GGML_TYPE_F32) {
|
7568
|
-
ggml_compute_forward_add_f16_f32(params,
|
7720
|
+
ggml_compute_forward_add_f16_f32(params, dst);
|
7569
7721
|
}
|
7570
7722
|
else {
|
7571
7723
|
GGML_ASSERT(false);
|
@@ -7584,8 +7736,10 @@ static void ggml_compute_forward_add(
|
|
7584
7736
|
case GGML_TYPE_IQ2_XXS:
|
7585
7737
|
case GGML_TYPE_IQ2_XS:
|
7586
7738
|
case GGML_TYPE_IQ3_XXS:
|
7739
|
+
case GGML_TYPE_IQ1_S:
|
7740
|
+
case GGML_TYPE_IQ4_NL:
|
7587
7741
|
{
|
7588
|
-
ggml_compute_forward_add_q_f32(params,
|
7742
|
+
ggml_compute_forward_add_q_f32(params, dst);
|
7589
7743
|
} break;
|
7590
7744
|
default:
|
7591
7745
|
{
|
@@ -7598,9 +7752,11 @@ static void ggml_compute_forward_add(
|
|
7598
7752
|
|
7599
7753
|
static void ggml_compute_forward_add1_f32(
|
7600
7754
|
const struct ggml_compute_params * params,
|
7601
|
-
const struct ggml_tensor * src0,
|
7602
|
-
const struct ggml_tensor * src1,
|
7603
7755
|
struct ggml_tensor * dst) {
|
7756
|
+
|
7757
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7758
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7759
|
+
|
7604
7760
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7605
7761
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7606
7762
|
|
@@ -7650,9 +7806,11 @@ static void ggml_compute_forward_add1_f32(
|
|
7650
7806
|
|
7651
7807
|
static void ggml_compute_forward_add1_f16_f32(
|
7652
7808
|
const struct ggml_compute_params * params,
|
7653
|
-
const struct ggml_tensor * src0,
|
7654
|
-
const struct ggml_tensor * src1,
|
7655
7809
|
struct ggml_tensor * dst) {
|
7810
|
+
|
7811
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7812
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7813
|
+
|
7656
7814
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7657
7815
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7658
7816
|
|
@@ -7700,9 +7858,11 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
7700
7858
|
|
7701
7859
|
static void ggml_compute_forward_add1_f16_f16(
|
7702
7860
|
const struct ggml_compute_params * params,
|
7703
|
-
const struct ggml_tensor * src0,
|
7704
|
-
const struct ggml_tensor * src1,
|
7705
7861
|
struct ggml_tensor * dst) {
|
7862
|
+
|
7863
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7864
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7865
|
+
|
7706
7866
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7707
7867
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7708
7868
|
|
@@ -7750,9 +7910,11 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
7750
7910
|
|
7751
7911
|
static void ggml_compute_forward_add1_q_f32(
|
7752
7912
|
const struct ggml_compute_params * params,
|
7753
|
-
const struct ggml_tensor * src0,
|
7754
|
-
const struct ggml_tensor * src1,
|
7755
7913
|
struct ggml_tensor * dst) {
|
7914
|
+
|
7915
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7916
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7917
|
+
|
7756
7918
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7757
7919
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7758
7920
|
|
@@ -7817,21 +7979,23 @@ static void ggml_compute_forward_add1_q_f32(
|
|
7817
7979
|
|
7818
7980
|
static void ggml_compute_forward_add1(
|
7819
7981
|
const struct ggml_compute_params * params,
|
7820
|
-
const struct ggml_tensor * src0,
|
7821
|
-
const struct ggml_tensor * src1,
|
7822
7982
|
struct ggml_tensor * dst) {
|
7983
|
+
|
7984
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7985
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7986
|
+
|
7823
7987
|
switch (src0->type) {
|
7824
7988
|
case GGML_TYPE_F32:
|
7825
7989
|
{
|
7826
|
-
ggml_compute_forward_add1_f32(params,
|
7990
|
+
ggml_compute_forward_add1_f32(params, dst);
|
7827
7991
|
} break;
|
7828
7992
|
case GGML_TYPE_F16:
|
7829
7993
|
{
|
7830
7994
|
if (src1->type == GGML_TYPE_F16) {
|
7831
|
-
ggml_compute_forward_add1_f16_f16(params,
|
7995
|
+
ggml_compute_forward_add1_f16_f16(params, dst);
|
7832
7996
|
}
|
7833
7997
|
else if (src1->type == GGML_TYPE_F32) {
|
7834
|
-
ggml_compute_forward_add1_f16_f32(params,
|
7998
|
+
ggml_compute_forward_add1_f16_f32(params, dst);
|
7835
7999
|
}
|
7836
8000
|
else {
|
7837
8001
|
GGML_ASSERT(false);
|
@@ -7851,8 +8015,10 @@ static void ggml_compute_forward_add1(
|
|
7851
8015
|
case GGML_TYPE_IQ2_XXS:
|
7852
8016
|
case GGML_TYPE_IQ2_XS:
|
7853
8017
|
case GGML_TYPE_IQ3_XXS:
|
8018
|
+
case GGML_TYPE_IQ1_S:
|
8019
|
+
case GGML_TYPE_IQ4_NL:
|
7854
8020
|
{
|
7855
|
-
ggml_compute_forward_add1_q_f32(params,
|
8021
|
+
ggml_compute_forward_add1_q_f32(params, dst);
|
7856
8022
|
} break;
|
7857
8023
|
default:
|
7858
8024
|
{
|
@@ -7865,9 +8031,11 @@ static void ggml_compute_forward_add1(
|
|
7865
8031
|
|
7866
8032
|
static void ggml_compute_forward_acc_f32(
|
7867
8033
|
const struct ggml_compute_params * params,
|
7868
|
-
const struct ggml_tensor * src0,
|
7869
|
-
const struct ggml_tensor * src1,
|
7870
8034
|
struct ggml_tensor * dst) {
|
8035
|
+
|
8036
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8037
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8038
|
+
|
7871
8039
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7872
8040
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
7873
8041
|
|
@@ -7947,14 +8115,14 @@ static void ggml_compute_forward_acc_f32(
|
|
7947
8115
|
|
7948
8116
|
static void ggml_compute_forward_acc(
|
7949
8117
|
const struct ggml_compute_params * params,
|
7950
|
-
const struct ggml_tensor * src0,
|
7951
|
-
const struct ggml_tensor * src1,
|
7952
8118
|
struct ggml_tensor * dst) {
|
7953
8119
|
|
8120
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8121
|
+
|
7954
8122
|
switch (src0->type) {
|
7955
8123
|
case GGML_TYPE_F32:
|
7956
8124
|
{
|
7957
|
-
ggml_compute_forward_acc_f32(params,
|
8125
|
+
ggml_compute_forward_acc_f32(params, dst);
|
7958
8126
|
} break;
|
7959
8127
|
case GGML_TYPE_F16:
|
7960
8128
|
case GGML_TYPE_Q4_0:
|
@@ -7971,6 +8139,8 @@ static void ggml_compute_forward_acc(
|
|
7971
8139
|
case GGML_TYPE_IQ2_XXS:
|
7972
8140
|
case GGML_TYPE_IQ2_XS:
|
7973
8141
|
case GGML_TYPE_IQ3_XXS:
|
8142
|
+
case GGML_TYPE_IQ1_S:
|
8143
|
+
case GGML_TYPE_IQ4_NL:
|
7974
8144
|
default:
|
7975
8145
|
{
|
7976
8146
|
GGML_ASSERT(false);
|
@@ -7982,9 +8152,11 @@ static void ggml_compute_forward_acc(
|
|
7982
8152
|
|
7983
8153
|
static void ggml_compute_forward_sub_f32(
|
7984
8154
|
const struct ggml_compute_params * params,
|
7985
|
-
const struct ggml_tensor * src0,
|
7986
|
-
const struct ggml_tensor * src1,
|
7987
8155
|
struct ggml_tensor * dst) {
|
8156
|
+
|
8157
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8158
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8159
|
+
|
7988
8160
|
assert(params->ith == 0);
|
7989
8161
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7990
8162
|
|
@@ -8042,13 +8214,14 @@ static void ggml_compute_forward_sub_f32(
|
|
8042
8214
|
|
8043
8215
|
static void ggml_compute_forward_sub(
|
8044
8216
|
const struct ggml_compute_params * params,
|
8045
|
-
const struct ggml_tensor * src0,
|
8046
|
-
const struct ggml_tensor * src1,
|
8047
8217
|
struct ggml_tensor * dst) {
|
8218
|
+
|
8219
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8220
|
+
|
8048
8221
|
switch (src0->type) {
|
8049
8222
|
case GGML_TYPE_F32:
|
8050
8223
|
{
|
8051
|
-
ggml_compute_forward_sub_f32(params,
|
8224
|
+
ggml_compute_forward_sub_f32(params, dst);
|
8052
8225
|
} break;
|
8053
8226
|
default:
|
8054
8227
|
{
|
@@ -8061,9 +8234,11 @@ static void ggml_compute_forward_sub(
|
|
8061
8234
|
|
8062
8235
|
static void ggml_compute_forward_mul_f32(
|
8063
8236
|
const struct ggml_compute_params * params,
|
8064
|
-
const struct ggml_tensor * src0,
|
8065
|
-
const struct ggml_tensor * src1,
|
8066
8237
|
struct ggml_tensor * dst) {
|
8238
|
+
|
8239
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8240
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8241
|
+
|
8067
8242
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8068
8243
|
|
8069
8244
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8144,15 +8319,17 @@ static void ggml_compute_forward_mul_f32(
|
|
8144
8319
|
|
8145
8320
|
static void ggml_compute_forward_mul(
|
8146
8321
|
const struct ggml_compute_params * params,
|
8147
|
-
const struct ggml_tensor * src0,
|
8148
|
-
const struct ggml_tensor * src1,
|
8149
8322
|
struct ggml_tensor * dst) {
|
8323
|
+
|
8324
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8325
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8326
|
+
|
8150
8327
|
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
|
8151
8328
|
|
8152
8329
|
switch (src0->type) {
|
8153
8330
|
case GGML_TYPE_F32:
|
8154
8331
|
{
|
8155
|
-
ggml_compute_forward_mul_f32(params,
|
8332
|
+
ggml_compute_forward_mul_f32(params, dst);
|
8156
8333
|
} break;
|
8157
8334
|
default:
|
8158
8335
|
{
|
@@ -8165,9 +8342,11 @@ static void ggml_compute_forward_mul(
|
|
8165
8342
|
|
8166
8343
|
static void ggml_compute_forward_div_f32(
|
8167
8344
|
const struct ggml_compute_params * params,
|
8168
|
-
const struct ggml_tensor * src0,
|
8169
|
-
const struct ggml_tensor * src1,
|
8170
8345
|
struct ggml_tensor * dst) {
|
8346
|
+
|
8347
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8348
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8349
|
+
|
8171
8350
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8172
8351
|
|
8173
8352
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8238,13 +8417,14 @@ static void ggml_compute_forward_div_f32(
|
|
8238
8417
|
|
8239
8418
|
static void ggml_compute_forward_div(
|
8240
8419
|
const struct ggml_compute_params * params,
|
8241
|
-
const struct ggml_tensor * src0,
|
8242
|
-
const struct ggml_tensor * src1,
|
8243
8420
|
struct ggml_tensor * dst) {
|
8421
|
+
|
8422
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8423
|
+
|
8244
8424
|
switch (src0->type) {
|
8245
8425
|
case GGML_TYPE_F32:
|
8246
8426
|
{
|
8247
|
-
ggml_compute_forward_div_f32(params,
|
8427
|
+
ggml_compute_forward_div_f32(params, dst);
|
8248
8428
|
} break;
|
8249
8429
|
default:
|
8250
8430
|
{
|
@@ -8257,8 +8437,10 @@ static void ggml_compute_forward_div(
|
|
8257
8437
|
|
8258
8438
|
static void ggml_compute_forward_sqr_f32(
|
8259
8439
|
const struct ggml_compute_params * params,
|
8260
|
-
const struct ggml_tensor * src0,
|
8261
8440
|
struct ggml_tensor * dst) {
|
8441
|
+
|
8442
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8443
|
+
|
8262
8444
|
assert(params->ith == 0);
|
8263
8445
|
assert(ggml_are_same_shape(src0, dst));
|
8264
8446
|
|
@@ -8281,12 +8463,14 @@ static void ggml_compute_forward_sqr_f32(
|
|
8281
8463
|
|
8282
8464
|
static void ggml_compute_forward_sqr(
|
8283
8465
|
const struct ggml_compute_params * params,
|
8284
|
-
const struct ggml_tensor * src0,
|
8285
8466
|
struct ggml_tensor * dst) {
|
8467
|
+
|
8468
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8469
|
+
|
8286
8470
|
switch (src0->type) {
|
8287
8471
|
case GGML_TYPE_F32:
|
8288
8472
|
{
|
8289
|
-
ggml_compute_forward_sqr_f32(params,
|
8473
|
+
ggml_compute_forward_sqr_f32(params, dst);
|
8290
8474
|
} break;
|
8291
8475
|
default:
|
8292
8476
|
{
|
@@ -8299,8 +8483,10 @@ static void ggml_compute_forward_sqr(
|
|
8299
8483
|
|
8300
8484
|
static void ggml_compute_forward_sqrt_f32(
|
8301
8485
|
const struct ggml_compute_params * params,
|
8302
|
-
const struct ggml_tensor * src0,
|
8303
8486
|
struct ggml_tensor * dst) {
|
8487
|
+
|
8488
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8489
|
+
|
8304
8490
|
assert(params->ith == 0);
|
8305
8491
|
assert(ggml_are_same_shape(src0, dst));
|
8306
8492
|
|
@@ -8323,12 +8509,14 @@ static void ggml_compute_forward_sqrt_f32(
|
|
8323
8509
|
|
8324
8510
|
static void ggml_compute_forward_sqrt(
|
8325
8511
|
const struct ggml_compute_params * params,
|
8326
|
-
const struct ggml_tensor * src0,
|
8327
8512
|
struct ggml_tensor * dst) {
|
8513
|
+
|
8514
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8515
|
+
|
8328
8516
|
switch (src0->type) {
|
8329
8517
|
case GGML_TYPE_F32:
|
8330
8518
|
{
|
8331
|
-
ggml_compute_forward_sqrt_f32(params,
|
8519
|
+
ggml_compute_forward_sqrt_f32(params, dst);
|
8332
8520
|
} break;
|
8333
8521
|
default:
|
8334
8522
|
{
|
@@ -8341,8 +8529,10 @@ static void ggml_compute_forward_sqrt(
|
|
8341
8529
|
|
8342
8530
|
static void ggml_compute_forward_log_f32(
|
8343
8531
|
const struct ggml_compute_params * params,
|
8344
|
-
const struct ggml_tensor * src0,
|
8345
8532
|
struct ggml_tensor * dst) {
|
8533
|
+
|
8534
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8535
|
+
|
8346
8536
|
GGML_ASSERT(params->ith == 0);
|
8347
8537
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8348
8538
|
|
@@ -8365,12 +8555,14 @@ static void ggml_compute_forward_log_f32(
|
|
8365
8555
|
|
8366
8556
|
static void ggml_compute_forward_log(
|
8367
8557
|
const struct ggml_compute_params * params,
|
8368
|
-
const struct ggml_tensor * src0,
|
8369
8558
|
struct ggml_tensor * dst) {
|
8559
|
+
|
8560
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8561
|
+
|
8370
8562
|
switch (src0->type) {
|
8371
8563
|
case GGML_TYPE_F32:
|
8372
8564
|
{
|
8373
|
-
ggml_compute_forward_log_f32(params,
|
8565
|
+
ggml_compute_forward_log_f32(params, dst);
|
8374
8566
|
} break;
|
8375
8567
|
default:
|
8376
8568
|
{
|
@@ -8383,8 +8575,10 @@ static void ggml_compute_forward_log(
|
|
8383
8575
|
|
8384
8576
|
static void ggml_compute_forward_sum_f32(
|
8385
8577
|
const struct ggml_compute_params * params,
|
8386
|
-
const struct ggml_tensor * src0,
|
8387
8578
|
struct ggml_tensor * dst) {
|
8579
|
+
|
8580
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8581
|
+
|
8388
8582
|
assert(params->ith == 0);
|
8389
8583
|
assert(ggml_is_scalar(dst));
|
8390
8584
|
|
@@ -8416,8 +8610,10 @@ static void ggml_compute_forward_sum_f32(
|
|
8416
8610
|
|
8417
8611
|
static void ggml_compute_forward_sum_f16(
|
8418
8612
|
const struct ggml_compute_params * params,
|
8419
|
-
const struct ggml_tensor * src0,
|
8420
8613
|
struct ggml_tensor * dst) {
|
8614
|
+
|
8615
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8616
|
+
|
8421
8617
|
assert(params->ith == 0);
|
8422
8618
|
assert(ggml_is_scalar(dst));
|
8423
8619
|
|
@@ -8448,16 +8644,18 @@ static void ggml_compute_forward_sum_f16(
|
|
8448
8644
|
|
8449
8645
|
static void ggml_compute_forward_sum(
|
8450
8646
|
const struct ggml_compute_params * params,
|
8451
|
-
const struct ggml_tensor * src0,
|
8452
8647
|
struct ggml_tensor * dst) {
|
8648
|
+
|
8649
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8650
|
+
|
8453
8651
|
switch (src0->type) {
|
8454
8652
|
case GGML_TYPE_F32:
|
8455
8653
|
{
|
8456
|
-
ggml_compute_forward_sum_f32(params,
|
8654
|
+
ggml_compute_forward_sum_f32(params, dst);
|
8457
8655
|
} break;
|
8458
8656
|
case GGML_TYPE_F16:
|
8459
8657
|
{
|
8460
|
-
ggml_compute_forward_sum_f16(params,
|
8658
|
+
ggml_compute_forward_sum_f16(params, dst);
|
8461
8659
|
} break;
|
8462
8660
|
default:
|
8463
8661
|
{
|
@@ -8470,8 +8668,10 @@ static void ggml_compute_forward_sum(
|
|
8470
8668
|
|
8471
8669
|
static void ggml_compute_forward_sum_rows_f32(
|
8472
8670
|
const struct ggml_compute_params * params,
|
8473
|
-
const struct ggml_tensor * src0,
|
8474
8671
|
struct ggml_tensor * dst) {
|
8672
|
+
|
8673
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8674
|
+
|
8475
8675
|
GGML_ASSERT(params->ith == 0);
|
8476
8676
|
|
8477
8677
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8503,12 +8703,14 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
8503
8703
|
|
8504
8704
|
static void ggml_compute_forward_sum_rows(
|
8505
8705
|
const struct ggml_compute_params * params,
|
8506
|
-
const struct ggml_tensor * src0,
|
8507
8706
|
struct ggml_tensor * dst) {
|
8707
|
+
|
8708
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8709
|
+
|
8508
8710
|
switch (src0->type) {
|
8509
8711
|
case GGML_TYPE_F32:
|
8510
8712
|
{
|
8511
|
-
ggml_compute_forward_sum_rows_f32(params,
|
8713
|
+
ggml_compute_forward_sum_rows_f32(params, dst);
|
8512
8714
|
} break;
|
8513
8715
|
default:
|
8514
8716
|
{
|
@@ -8521,8 +8723,10 @@ static void ggml_compute_forward_sum_rows(
|
|
8521
8723
|
|
8522
8724
|
static void ggml_compute_forward_mean_f32(
|
8523
8725
|
const struct ggml_compute_params * params,
|
8524
|
-
const struct ggml_tensor * src0,
|
8525
8726
|
struct ggml_tensor * dst) {
|
8727
|
+
|
8728
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8729
|
+
|
8526
8730
|
assert(params->ith == 0);
|
8527
8731
|
|
8528
8732
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8558,12 +8762,14 @@ static void ggml_compute_forward_mean_f32(
|
|
8558
8762
|
|
8559
8763
|
static void ggml_compute_forward_mean(
|
8560
8764
|
const struct ggml_compute_params * params,
|
8561
|
-
const struct ggml_tensor * src0,
|
8562
8765
|
struct ggml_tensor * dst) {
|
8766
|
+
|
8767
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8768
|
+
|
8563
8769
|
switch (src0->type) {
|
8564
8770
|
case GGML_TYPE_F32:
|
8565
8771
|
{
|
8566
|
-
ggml_compute_forward_mean_f32(params,
|
8772
|
+
ggml_compute_forward_mean_f32(params, dst);
|
8567
8773
|
} break;
|
8568
8774
|
default:
|
8569
8775
|
{
|
@@ -8576,8 +8782,10 @@ static void ggml_compute_forward_mean(
|
|
8576
8782
|
|
8577
8783
|
static void ggml_compute_forward_argmax_f32(
|
8578
8784
|
const struct ggml_compute_params * params,
|
8579
|
-
const struct ggml_tensor * src0,
|
8580
8785
|
struct ggml_tensor * dst) {
|
8786
|
+
|
8787
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8788
|
+
|
8581
8789
|
assert(params->ith == 0);
|
8582
8790
|
|
8583
8791
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8604,12 +8812,14 @@ static void ggml_compute_forward_argmax_f32(
|
|
8604
8812
|
|
8605
8813
|
static void ggml_compute_forward_argmax(
|
8606
8814
|
const struct ggml_compute_params * params,
|
8607
|
-
const struct ggml_tensor * src0,
|
8608
8815
|
struct ggml_tensor * dst) {
|
8816
|
+
|
8817
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8818
|
+
|
8609
8819
|
switch (src0->type) {
|
8610
8820
|
case GGML_TYPE_F32:
|
8611
8821
|
{
|
8612
|
-
ggml_compute_forward_argmax_f32(params,
|
8822
|
+
ggml_compute_forward_argmax_f32(params, dst);
|
8613
8823
|
} break;
|
8614
8824
|
default:
|
8615
8825
|
{
|
@@ -8622,8 +8832,10 @@ static void ggml_compute_forward_argmax(
|
|
8622
8832
|
|
8623
8833
|
static void ggml_compute_forward_repeat_f32(
|
8624
8834
|
const struct ggml_compute_params * params,
|
8625
|
-
const struct ggml_tensor * src0,
|
8626
8835
|
struct ggml_tensor * dst) {
|
8836
|
+
|
8837
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8838
|
+
|
8627
8839
|
GGML_ASSERT(params->ith == 0);
|
8628
8840
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8629
8841
|
|
@@ -8665,8 +8877,10 @@ static void ggml_compute_forward_repeat_f32(
|
|
8665
8877
|
|
8666
8878
|
static void ggml_compute_forward_repeat_f16(
|
8667
8879
|
const struct ggml_compute_params * params,
|
8668
|
-
const struct ggml_tensor * src0,
|
8669
8880
|
struct ggml_tensor * dst) {
|
8881
|
+
|
8882
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8883
|
+
|
8670
8884
|
GGML_ASSERT(params->ith == 0);
|
8671
8885
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8672
8886
|
|
@@ -8711,18 +8925,20 @@ static void ggml_compute_forward_repeat_f16(
|
|
8711
8925
|
|
8712
8926
|
static void ggml_compute_forward_repeat(
|
8713
8927
|
const struct ggml_compute_params * params,
|
8714
|
-
const struct ggml_tensor * src0,
|
8715
8928
|
struct ggml_tensor * dst) {
|
8929
|
+
|
8930
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8931
|
+
|
8716
8932
|
switch (src0->type) {
|
8717
8933
|
case GGML_TYPE_F16:
|
8718
8934
|
case GGML_TYPE_I16:
|
8719
8935
|
{
|
8720
|
-
ggml_compute_forward_repeat_f16(params,
|
8936
|
+
ggml_compute_forward_repeat_f16(params, dst);
|
8721
8937
|
} break;
|
8722
8938
|
case GGML_TYPE_F32:
|
8723
8939
|
case GGML_TYPE_I32:
|
8724
8940
|
{
|
8725
|
-
ggml_compute_forward_repeat_f32(params,
|
8941
|
+
ggml_compute_forward_repeat_f32(params, dst);
|
8726
8942
|
} break;
|
8727
8943
|
default:
|
8728
8944
|
{
|
@@ -8735,8 +8951,10 @@ static void ggml_compute_forward_repeat(
|
|
8735
8951
|
|
8736
8952
|
static void ggml_compute_forward_repeat_back_f32(
|
8737
8953
|
const struct ggml_compute_params * params,
|
8738
|
-
const struct ggml_tensor * src0,
|
8739
8954
|
struct ggml_tensor * dst) {
|
8955
|
+
|
8956
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8957
|
+
|
8740
8958
|
GGML_ASSERT(params->ith == 0);
|
8741
8959
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
8742
8960
|
|
@@ -8792,12 +9010,14 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
8792
9010
|
|
8793
9011
|
static void ggml_compute_forward_repeat_back(
|
8794
9012
|
const struct ggml_compute_params * params,
|
8795
|
-
const struct ggml_tensor * src0,
|
8796
9013
|
struct ggml_tensor * dst) {
|
9014
|
+
|
9015
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9016
|
+
|
8797
9017
|
switch (src0->type) {
|
8798
9018
|
case GGML_TYPE_F32:
|
8799
9019
|
{
|
8800
|
-
ggml_compute_forward_repeat_back_f32(params,
|
9020
|
+
ggml_compute_forward_repeat_back_f32(params, dst);
|
8801
9021
|
} break;
|
8802
9022
|
default:
|
8803
9023
|
{
|
@@ -8810,10 +9030,11 @@ static void ggml_compute_forward_repeat_back(
|
|
8810
9030
|
|
8811
9031
|
static void ggml_compute_forward_concat_f32(
|
8812
9032
|
const struct ggml_compute_params * params,
|
8813
|
-
const struct ggml_tensor * src0,
|
8814
|
-
const struct ggml_tensor * src1,
|
8815
9033
|
struct ggml_tensor * dst) {
|
8816
9034
|
|
9035
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9036
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
9037
|
+
|
8817
9038
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
8818
9039
|
return;
|
8819
9040
|
}
|
@@ -8858,14 +9079,15 @@ static void ggml_compute_forward_concat_f32(
|
|
8858
9079
|
|
8859
9080
|
static void ggml_compute_forward_concat(
|
8860
9081
|
const struct ggml_compute_params* params,
|
8861
|
-
const struct ggml_tensor* src0,
|
8862
|
-
const struct ggml_tensor* src1,
|
8863
9082
|
struct ggml_tensor* dst) {
|
9083
|
+
|
9084
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9085
|
+
|
8864
9086
|
switch (src0->type) {
|
8865
9087
|
case GGML_TYPE_F32:
|
8866
9088
|
case GGML_TYPE_I32:
|
8867
9089
|
{
|
8868
|
-
ggml_compute_forward_concat_f32(params,
|
9090
|
+
ggml_compute_forward_concat_f32(params, dst);
|
8869
9091
|
} break;
|
8870
9092
|
default:
|
8871
9093
|
{
|
@@ -8878,8 +9100,10 @@ static void ggml_compute_forward_concat(
|
|
8878
9100
|
|
8879
9101
|
static void ggml_compute_forward_abs_f32(
|
8880
9102
|
const struct ggml_compute_params * params,
|
8881
|
-
const struct ggml_tensor * src0,
|
8882
9103
|
struct ggml_tensor * dst) {
|
9104
|
+
|
9105
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9106
|
+
|
8883
9107
|
assert(params->ith == 0);
|
8884
9108
|
assert(ggml_are_same_shape(src0, dst));
|
8885
9109
|
|
@@ -8902,12 +9126,14 @@ static void ggml_compute_forward_abs_f32(
|
|
8902
9126
|
|
8903
9127
|
static void ggml_compute_forward_abs(
|
8904
9128
|
const struct ggml_compute_params * params,
|
8905
|
-
const struct ggml_tensor * src0,
|
8906
9129
|
struct ggml_tensor * dst) {
|
9130
|
+
|
9131
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9132
|
+
|
8907
9133
|
switch (src0->type) {
|
8908
9134
|
case GGML_TYPE_F32:
|
8909
9135
|
{
|
8910
|
-
ggml_compute_forward_abs_f32(params,
|
9136
|
+
ggml_compute_forward_abs_f32(params, dst);
|
8911
9137
|
} break;
|
8912
9138
|
default:
|
8913
9139
|
{
|
@@ -8920,8 +9146,10 @@ static void ggml_compute_forward_abs(
|
|
8920
9146
|
|
8921
9147
|
static void ggml_compute_forward_sgn_f32(
|
8922
9148
|
const struct ggml_compute_params * params,
|
8923
|
-
const struct ggml_tensor * src0,
|
8924
9149
|
struct ggml_tensor * dst) {
|
9150
|
+
|
9151
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9152
|
+
|
8925
9153
|
assert(params->ith == 0);
|
8926
9154
|
assert(ggml_are_same_shape(src0, dst));
|
8927
9155
|
|
@@ -8944,12 +9172,14 @@ static void ggml_compute_forward_sgn_f32(
|
|
8944
9172
|
|
8945
9173
|
static void ggml_compute_forward_sgn(
|
8946
9174
|
const struct ggml_compute_params * params,
|
8947
|
-
const struct ggml_tensor * src0,
|
8948
9175
|
struct ggml_tensor * dst) {
|
9176
|
+
|
9177
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9178
|
+
|
8949
9179
|
switch (src0->type) {
|
8950
9180
|
case GGML_TYPE_F32:
|
8951
9181
|
{
|
8952
|
-
ggml_compute_forward_sgn_f32(params,
|
9182
|
+
ggml_compute_forward_sgn_f32(params, dst);
|
8953
9183
|
} break;
|
8954
9184
|
default:
|
8955
9185
|
{
|
@@ -8962,8 +9192,10 @@ static void ggml_compute_forward_sgn(
|
|
8962
9192
|
|
8963
9193
|
static void ggml_compute_forward_neg_f32(
|
8964
9194
|
const struct ggml_compute_params * params,
|
8965
|
-
const struct ggml_tensor * src0,
|
8966
9195
|
struct ggml_tensor * dst) {
|
9196
|
+
|
9197
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9198
|
+
|
8967
9199
|
assert(params->ith == 0);
|
8968
9200
|
assert(ggml_are_same_shape(src0, dst));
|
8969
9201
|
|
@@ -8986,12 +9218,14 @@ static void ggml_compute_forward_neg_f32(
|
|
8986
9218
|
|
8987
9219
|
static void ggml_compute_forward_neg(
|
8988
9220
|
const struct ggml_compute_params * params,
|
8989
|
-
const struct ggml_tensor * src0,
|
8990
9221
|
struct ggml_tensor * dst) {
|
9222
|
+
|
9223
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9224
|
+
|
8991
9225
|
switch (src0->type) {
|
8992
9226
|
case GGML_TYPE_F32:
|
8993
9227
|
{
|
8994
|
-
ggml_compute_forward_neg_f32(params,
|
9228
|
+
ggml_compute_forward_neg_f32(params, dst);
|
8995
9229
|
} break;
|
8996
9230
|
default:
|
8997
9231
|
{
|
@@ -9004,8 +9238,10 @@ static void ggml_compute_forward_neg(
|
|
9004
9238
|
|
9005
9239
|
static void ggml_compute_forward_step_f32(
|
9006
9240
|
const struct ggml_compute_params * params,
|
9007
|
-
const struct ggml_tensor * src0,
|
9008
9241
|
struct ggml_tensor * dst) {
|
9242
|
+
|
9243
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9244
|
+
|
9009
9245
|
assert(params->ith == 0);
|
9010
9246
|
assert(ggml_are_same_shape(src0, dst));
|
9011
9247
|
|
@@ -9028,12 +9264,14 @@ static void ggml_compute_forward_step_f32(
|
|
9028
9264
|
|
9029
9265
|
static void ggml_compute_forward_step(
|
9030
9266
|
const struct ggml_compute_params * params,
|
9031
|
-
const struct ggml_tensor * src0,
|
9032
9267
|
struct ggml_tensor * dst) {
|
9268
|
+
|
9269
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9270
|
+
|
9033
9271
|
switch (src0->type) {
|
9034
9272
|
case GGML_TYPE_F32:
|
9035
9273
|
{
|
9036
|
-
ggml_compute_forward_step_f32(params,
|
9274
|
+
ggml_compute_forward_step_f32(params, dst);
|
9037
9275
|
} break;
|
9038
9276
|
default:
|
9039
9277
|
{
|
@@ -9046,8 +9284,10 @@ static void ggml_compute_forward_step(
|
|
9046
9284
|
|
9047
9285
|
static void ggml_compute_forward_tanh_f32(
|
9048
9286
|
const struct ggml_compute_params * params,
|
9049
|
-
const struct ggml_tensor * src0,
|
9050
9287
|
struct ggml_tensor * dst) {
|
9288
|
+
|
9289
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9290
|
+
|
9051
9291
|
assert(params->ith == 0);
|
9052
9292
|
assert(ggml_are_same_shape(src0, dst));
|
9053
9293
|
|
@@ -9070,12 +9310,14 @@ static void ggml_compute_forward_tanh_f32(
|
|
9070
9310
|
|
9071
9311
|
static void ggml_compute_forward_tanh(
|
9072
9312
|
const struct ggml_compute_params * params,
|
9073
|
-
const struct ggml_tensor * src0,
|
9074
9313
|
struct ggml_tensor * dst) {
|
9314
|
+
|
9315
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9316
|
+
|
9075
9317
|
switch (src0->type) {
|
9076
9318
|
case GGML_TYPE_F32:
|
9077
9319
|
{
|
9078
|
-
ggml_compute_forward_tanh_f32(params,
|
9320
|
+
ggml_compute_forward_tanh_f32(params, dst);
|
9079
9321
|
} break;
|
9080
9322
|
default:
|
9081
9323
|
{
|
@@ -9088,8 +9330,10 @@ static void ggml_compute_forward_tanh(
|
|
9088
9330
|
|
9089
9331
|
static void ggml_compute_forward_elu_f32(
|
9090
9332
|
const struct ggml_compute_params * params,
|
9091
|
-
const struct ggml_tensor * src0,
|
9092
9333
|
struct ggml_tensor * dst) {
|
9334
|
+
|
9335
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9336
|
+
|
9093
9337
|
assert(params->ith == 0);
|
9094
9338
|
assert(ggml_are_same_shape(src0, dst));
|
9095
9339
|
|
@@ -9112,12 +9356,14 @@ static void ggml_compute_forward_elu_f32(
|
|
9112
9356
|
|
9113
9357
|
static void ggml_compute_forward_elu(
|
9114
9358
|
const struct ggml_compute_params * params,
|
9115
|
-
const struct ggml_tensor * src0,
|
9116
9359
|
struct ggml_tensor * dst) {
|
9360
|
+
|
9361
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9362
|
+
|
9117
9363
|
switch (src0->type) {
|
9118
9364
|
case GGML_TYPE_F32:
|
9119
9365
|
{
|
9120
|
-
ggml_compute_forward_elu_f32(params,
|
9366
|
+
ggml_compute_forward_elu_f32(params, dst);
|
9121
9367
|
} break;
|
9122
9368
|
default:
|
9123
9369
|
{
|
@@ -9130,8 +9376,10 @@ static void ggml_compute_forward_elu(
|
|
9130
9376
|
|
9131
9377
|
static void ggml_compute_forward_relu_f32(
|
9132
9378
|
const struct ggml_compute_params * params,
|
9133
|
-
const struct ggml_tensor * src0,
|
9134
9379
|
struct ggml_tensor * dst) {
|
9380
|
+
|
9381
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9382
|
+
|
9135
9383
|
assert(params->ith == 0);
|
9136
9384
|
assert(ggml_are_same_shape(src0, dst));
|
9137
9385
|
|
@@ -9154,12 +9402,14 @@ static void ggml_compute_forward_relu_f32(
|
|
9154
9402
|
|
9155
9403
|
static void ggml_compute_forward_relu(
|
9156
9404
|
const struct ggml_compute_params * params,
|
9157
|
-
const struct ggml_tensor * src0,
|
9158
9405
|
struct ggml_tensor * dst) {
|
9406
|
+
|
9407
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9408
|
+
|
9159
9409
|
switch (src0->type) {
|
9160
9410
|
case GGML_TYPE_F32:
|
9161
9411
|
{
|
9162
|
-
ggml_compute_forward_relu_f32(params,
|
9412
|
+
ggml_compute_forward_relu_f32(params, dst);
|
9163
9413
|
} break;
|
9164
9414
|
default:
|
9165
9415
|
{
|
@@ -9172,8 +9422,10 @@ static void ggml_compute_forward_relu(
|
|
9172
9422
|
|
9173
9423
|
static void ggml_compute_forward_gelu_f32(
|
9174
9424
|
const struct ggml_compute_params * params,
|
9175
|
-
const struct ggml_tensor * src0,
|
9176
9425
|
struct ggml_tensor * dst) {
|
9426
|
+
|
9427
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9428
|
+
|
9177
9429
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9178
9430
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9179
9431
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9213,12 +9465,14 @@ static void ggml_compute_forward_gelu_f32(
|
|
9213
9465
|
|
9214
9466
|
static void ggml_compute_forward_gelu(
|
9215
9467
|
const struct ggml_compute_params * params,
|
9216
|
-
const struct ggml_tensor * src0,
|
9217
9468
|
struct ggml_tensor * dst) {
|
9469
|
+
|
9470
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9471
|
+
|
9218
9472
|
switch (src0->type) {
|
9219
9473
|
case GGML_TYPE_F32:
|
9220
9474
|
{
|
9221
|
-
ggml_compute_forward_gelu_f32(params,
|
9475
|
+
ggml_compute_forward_gelu_f32(params, dst);
|
9222
9476
|
} break;
|
9223
9477
|
default:
|
9224
9478
|
{
|
@@ -9231,8 +9485,10 @@ static void ggml_compute_forward_gelu(
|
|
9231
9485
|
|
9232
9486
|
static void ggml_compute_forward_gelu_quick_f32(
|
9233
9487
|
const struct ggml_compute_params * params,
|
9234
|
-
const struct ggml_tensor * src0,
|
9235
9488
|
struct ggml_tensor * dst) {
|
9489
|
+
|
9490
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9491
|
+
|
9236
9492
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9237
9493
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9238
9494
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9272,12 +9528,14 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
9272
9528
|
|
9273
9529
|
static void ggml_compute_forward_gelu_quick(
|
9274
9530
|
const struct ggml_compute_params * params,
|
9275
|
-
const struct ggml_tensor * src0,
|
9276
9531
|
struct ggml_tensor * dst) {
|
9532
|
+
|
9533
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9534
|
+
|
9277
9535
|
switch (src0->type) {
|
9278
9536
|
case GGML_TYPE_F32:
|
9279
9537
|
{
|
9280
|
-
ggml_compute_forward_gelu_quick_f32(params,
|
9538
|
+
ggml_compute_forward_gelu_quick_f32(params, dst);
|
9281
9539
|
} break;
|
9282
9540
|
default:
|
9283
9541
|
{
|
@@ -9290,8 +9548,10 @@ static void ggml_compute_forward_gelu_quick(
|
|
9290
9548
|
|
9291
9549
|
static void ggml_compute_forward_silu_f32(
|
9292
9550
|
const struct ggml_compute_params * params,
|
9293
|
-
const struct ggml_tensor * src0,
|
9294
9551
|
struct ggml_tensor * dst) {
|
9552
|
+
|
9553
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9554
|
+
|
9295
9555
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9296
9556
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9297
9557
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9331,12 +9591,14 @@ static void ggml_compute_forward_silu_f32(
|
|
9331
9591
|
|
9332
9592
|
static void ggml_compute_forward_silu(
|
9333
9593
|
const struct ggml_compute_params * params,
|
9334
|
-
const struct ggml_tensor * src0,
|
9335
9594
|
struct ggml_tensor * dst) {
|
9595
|
+
|
9596
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9597
|
+
|
9336
9598
|
switch (src0->type) {
|
9337
9599
|
case GGML_TYPE_F32:
|
9338
9600
|
{
|
9339
|
-
ggml_compute_forward_silu_f32(params,
|
9601
|
+
ggml_compute_forward_silu_f32(params, dst);
|
9340
9602
|
} break;
|
9341
9603
|
default:
|
9342
9604
|
{
|
@@ -9348,8 +9610,10 @@ static void ggml_compute_forward_silu(
|
|
9348
9610
|
|
9349
9611
|
static void ggml_compute_forward_leaky_relu_f32(
|
9350
9612
|
const struct ggml_compute_params * params,
|
9351
|
-
const struct ggml_tensor * src0,
|
9352
9613
|
struct ggml_tensor * dst) {
|
9614
|
+
|
9615
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9616
|
+
|
9353
9617
|
assert(params->ith == 0);
|
9354
9618
|
assert(ggml_are_same_shape(src0, dst));
|
9355
9619
|
|
@@ -9375,12 +9639,14 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
9375
9639
|
|
9376
9640
|
static void ggml_compute_forward_leaky_relu(
|
9377
9641
|
const struct ggml_compute_params * params,
|
9378
|
-
const struct ggml_tensor * src0,
|
9379
9642
|
struct ggml_tensor * dst) {
|
9643
|
+
|
9644
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9645
|
+
|
9380
9646
|
switch (src0->type) {
|
9381
9647
|
case GGML_TYPE_F32:
|
9382
9648
|
{
|
9383
|
-
ggml_compute_forward_leaky_relu_f32(params,
|
9649
|
+
ggml_compute_forward_leaky_relu_f32(params, dst);
|
9384
9650
|
} break;
|
9385
9651
|
default:
|
9386
9652
|
{
|
@@ -9393,9 +9659,11 @@ static void ggml_compute_forward_leaky_relu(
|
|
9393
9659
|
|
9394
9660
|
static void ggml_compute_forward_silu_back_f32(
|
9395
9661
|
const struct ggml_compute_params * params,
|
9396
|
-
const struct ggml_tensor * src0,
|
9397
|
-
const struct ggml_tensor * grad,
|
9398
9662
|
struct ggml_tensor * dst) {
|
9663
|
+
|
9664
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9665
|
+
const struct ggml_tensor * grad = dst->src[1];
|
9666
|
+
|
9399
9667
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
9400
9668
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9401
9669
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
@@ -9438,13 +9706,14 @@ static void ggml_compute_forward_silu_back_f32(
|
|
9438
9706
|
|
9439
9707
|
static void ggml_compute_forward_silu_back(
|
9440
9708
|
const struct ggml_compute_params * params,
|
9441
|
-
const struct ggml_tensor * src0,
|
9442
|
-
const struct ggml_tensor * grad,
|
9443
9709
|
struct ggml_tensor * dst) {
|
9710
|
+
|
9711
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9712
|
+
|
9444
9713
|
switch (src0->type) {
|
9445
9714
|
case GGML_TYPE_F32:
|
9446
9715
|
{
|
9447
|
-
ggml_compute_forward_silu_back_f32(params,
|
9716
|
+
ggml_compute_forward_silu_back_f32(params, dst);
|
9448
9717
|
} break;
|
9449
9718
|
default:
|
9450
9719
|
{
|
@@ -9456,8 +9725,10 @@ static void ggml_compute_forward_silu_back(
|
|
9456
9725
|
|
9457
9726
|
static void ggml_compute_forward_hardswish_f32(
|
9458
9727
|
const struct ggml_compute_params * params,
|
9459
|
-
const struct ggml_tensor * src0,
|
9460
9728
|
struct ggml_tensor * dst) {
|
9729
|
+
|
9730
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9731
|
+
|
9461
9732
|
assert(params->ith == 0);
|
9462
9733
|
assert(ggml_are_same_shape(src0, dst));
|
9463
9734
|
|
@@ -9479,12 +9750,14 @@ static void ggml_compute_forward_hardswish_f32(
|
|
9479
9750
|
}
|
9480
9751
|
static void ggml_compute_forward_hardswish(
|
9481
9752
|
const struct ggml_compute_params * params,
|
9482
|
-
const struct ggml_tensor * src0,
|
9483
9753
|
struct ggml_tensor * dst) {
|
9754
|
+
|
9755
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9756
|
+
|
9484
9757
|
switch (src0->type) {
|
9485
9758
|
case GGML_TYPE_F32:
|
9486
9759
|
{
|
9487
|
-
ggml_compute_forward_hardswish_f32(params,
|
9760
|
+
ggml_compute_forward_hardswish_f32(params, dst);
|
9488
9761
|
} break;
|
9489
9762
|
default:
|
9490
9763
|
{
|
@@ -9495,8 +9768,10 @@ static void ggml_compute_forward_hardswish(
|
|
9495
9768
|
|
9496
9769
|
static void ggml_compute_forward_hardsigmoid_f32(
|
9497
9770
|
const struct ggml_compute_params * params,
|
9498
|
-
const struct ggml_tensor * src0,
|
9499
9771
|
struct ggml_tensor * dst) {
|
9772
|
+
|
9773
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9774
|
+
|
9500
9775
|
assert(params->ith == 0);
|
9501
9776
|
assert(ggml_are_same_shape(src0, dst));
|
9502
9777
|
|
@@ -9519,12 +9794,14 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
9519
9794
|
|
9520
9795
|
static void ggml_compute_forward_hardsigmoid(
|
9521
9796
|
const struct ggml_compute_params * params,
|
9522
|
-
const struct ggml_tensor * src0,
|
9523
9797
|
struct ggml_tensor * dst) {
|
9798
|
+
|
9799
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9800
|
+
|
9524
9801
|
switch (src0->type) {
|
9525
9802
|
case GGML_TYPE_F32:
|
9526
9803
|
{
|
9527
|
-
ggml_compute_forward_hardsigmoid_f32(params,
|
9804
|
+
ggml_compute_forward_hardsigmoid_f32(params, dst);
|
9528
9805
|
} break;
|
9529
9806
|
default:
|
9530
9807
|
{
|
@@ -9538,8 +9815,10 @@ static void ggml_compute_forward_hardsigmoid(
|
|
9538
9815
|
|
9539
9816
|
static void ggml_compute_forward_norm_f32(
|
9540
9817
|
const struct ggml_compute_params * params,
|
9541
|
-
const struct ggml_tensor * src0,
|
9542
9818
|
struct ggml_tensor * dst) {
|
9819
|
+
|
9820
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9821
|
+
|
9543
9822
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9544
9823
|
|
9545
9824
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9591,12 +9870,14 @@ static void ggml_compute_forward_norm_f32(
|
|
9591
9870
|
|
9592
9871
|
static void ggml_compute_forward_norm(
|
9593
9872
|
const struct ggml_compute_params * params,
|
9594
|
-
const struct ggml_tensor * src0,
|
9595
9873
|
struct ggml_tensor * dst) {
|
9874
|
+
|
9875
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9876
|
+
|
9596
9877
|
switch (src0->type) {
|
9597
9878
|
case GGML_TYPE_F32:
|
9598
9879
|
{
|
9599
|
-
ggml_compute_forward_norm_f32(params,
|
9880
|
+
ggml_compute_forward_norm_f32(params, dst);
|
9600
9881
|
} break;
|
9601
9882
|
default:
|
9602
9883
|
{
|
@@ -9609,8 +9890,10 @@ static void ggml_compute_forward_norm(
|
|
9609
9890
|
|
9610
9891
|
static void ggml_compute_forward_rms_norm_f32(
|
9611
9892
|
const struct ggml_compute_params * params,
|
9612
|
-
const struct ggml_tensor * src0,
|
9613
9893
|
struct ggml_tensor * dst) {
|
9894
|
+
|
9895
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9896
|
+
|
9614
9897
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9615
9898
|
|
9616
9899
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9659,12 +9942,14 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
9659
9942
|
|
9660
9943
|
static void ggml_compute_forward_rms_norm(
|
9661
9944
|
const struct ggml_compute_params * params,
|
9662
|
-
const struct ggml_tensor * src0,
|
9663
9945
|
struct ggml_tensor * dst) {
|
9946
|
+
|
9947
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9948
|
+
|
9664
9949
|
switch (src0->type) {
|
9665
9950
|
case GGML_TYPE_F32:
|
9666
9951
|
{
|
9667
|
-
ggml_compute_forward_rms_norm_f32(params,
|
9952
|
+
ggml_compute_forward_rms_norm_f32(params, dst);
|
9668
9953
|
} break;
|
9669
9954
|
default:
|
9670
9955
|
{
|
@@ -9675,9 +9960,11 @@ static void ggml_compute_forward_rms_norm(
|
|
9675
9960
|
|
9676
9961
|
static void ggml_compute_forward_rms_norm_back_f32(
|
9677
9962
|
const struct ggml_compute_params * params,
|
9678
|
-
const struct ggml_tensor * src0,
|
9679
|
-
const struct ggml_tensor * src1,
|
9680
9963
|
struct ggml_tensor * dst) {
|
9964
|
+
|
9965
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9966
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
9967
|
+
|
9681
9968
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
9682
9969
|
|
9683
9970
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9832,13 +10119,14 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
9832
10119
|
|
9833
10120
|
static void ggml_compute_forward_rms_norm_back(
|
9834
10121
|
const struct ggml_compute_params * params,
|
9835
|
-
const struct ggml_tensor * src0,
|
9836
|
-
const struct ggml_tensor * src1,
|
9837
10122
|
struct ggml_tensor * dst) {
|
10123
|
+
|
10124
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10125
|
+
|
9838
10126
|
switch (src0->type) {
|
9839
10127
|
case GGML_TYPE_F32:
|
9840
10128
|
{
|
9841
|
-
ggml_compute_forward_rms_norm_back_f32(params,
|
10129
|
+
ggml_compute_forward_rms_norm_back_f32(params, dst);
|
9842
10130
|
} break;
|
9843
10131
|
default:
|
9844
10132
|
{
|
@@ -9851,8 +10139,10 @@ static void ggml_compute_forward_rms_norm_back(
|
|
9851
10139
|
|
9852
10140
|
static void ggml_compute_forward_group_norm_f32(
|
9853
10141
|
const struct ggml_compute_params * params,
|
9854
|
-
const struct ggml_tensor * src0,
|
9855
10142
|
struct ggml_tensor * dst) {
|
10143
|
+
|
10144
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10145
|
+
|
9856
10146
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9857
10147
|
|
9858
10148
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9923,12 +10213,14 @@ static void ggml_compute_forward_group_norm_f32(
|
|
9923
10213
|
|
9924
10214
|
static void ggml_compute_forward_group_norm(
|
9925
10215
|
const struct ggml_compute_params * params,
|
9926
|
-
const struct ggml_tensor * src0,
|
9927
10216
|
struct ggml_tensor * dst) {
|
10217
|
+
|
10218
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10219
|
+
|
9928
10220
|
switch (src0->type) {
|
9929
10221
|
case GGML_TYPE_F32:
|
9930
10222
|
{
|
9931
|
-
ggml_compute_forward_group_norm_f32(params,
|
10223
|
+
ggml_compute_forward_group_norm_f32(params, dst);
|
9932
10224
|
} break;
|
9933
10225
|
default:
|
9934
10226
|
{
|
@@ -9974,9 +10266,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
|
|
9974
10266
|
|
9975
10267
|
static void ggml_compute_forward_mul_mat(
|
9976
10268
|
const struct ggml_compute_params * params,
|
9977
|
-
const struct ggml_tensor * src0,
|
9978
|
-
const struct ggml_tensor * src1,
|
9979
10269
|
struct ggml_tensor * dst) {
|
10270
|
+
|
10271
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10272
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10273
|
+
|
9980
10274
|
int64_t t0 = ggml_perf_time_us();
|
9981
10275
|
UNUSED(t0);
|
9982
10276
|
|
@@ -9992,6 +10286,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9992
10286
|
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
|
9993
10287
|
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
|
9994
10288
|
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
|
10289
|
+
int64_t const vec_dot_num_rows = type_traits[type].nrows;
|
9995
10290
|
|
9996
10291
|
GGML_ASSERT(ne0 == ne01);
|
9997
10292
|
GGML_ASSERT(ne1 == ne11);
|
@@ -10159,12 +10454,23 @@ static void ggml_compute_forward_mul_mat(
|
|
10159
10454
|
const int64_t blck_0 = 16;
|
10160
10455
|
const int64_t blck_1 = 16;
|
10161
10456
|
|
10457
|
+
// dot kernels can handle 1 row and col at a time, but mmla kernels can process 2 rows and cols
|
10458
|
+
int64_t nrc = vec_dot_num_rows;
|
10459
|
+
// TODO: currently the mmla kernels support only even numbered rows/cols.
|
10460
|
+
// this check can be removed once they are extended to support odd numbered rows/cols too
|
10461
|
+
if ((nr0 % 2 != 0) || (ne11 % 2 != 0)) {
|
10462
|
+
nrc = 1;
|
10463
|
+
}
|
10464
|
+
|
10465
|
+
const size_t src1_col_stride = src1_cont || src1->type != vec_dot_type ? row_size : nb11;
|
10466
|
+
|
10162
10467
|
// attempt to reduce false-sharing (does not seem to make a difference)
|
10163
|
-
|
10468
|
+
// 16 * 2, accounting for mmla kernels
|
10469
|
+
float tmp[32];
|
10164
10470
|
|
10165
10471
|
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
10166
10472
|
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
10167
|
-
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111;
|
10473
|
+
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ir1 += nrc) {
|
10168
10474
|
const int64_t i13 = (ir1/(ne12*ne1));
|
10169
10475
|
const int64_t i12 = (ir1 - i13*ne12*ne1)/ne1;
|
10170
10476
|
const int64_t i11 = (ir1 - i13*ne12*ne1 - i12*ne1);
|
@@ -10187,17 +10493,19 @@ static void ggml_compute_forward_mul_mat(
|
|
10187
10493
|
(src1_cont || src1->type != vec_dot_type
|
10188
10494
|
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
10189
10495
|
: (i11*nb11 + i12*nb12 + i13*nb13));
|
10190
|
-
|
10191
10496
|
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
10192
10497
|
|
10193
10498
|
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
10194
10499
|
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
10195
10500
|
//}
|
10196
10501
|
|
10197
|
-
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011;
|
10198
|
-
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
10502
|
+
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ir0 += nrc) {
|
10503
|
+
vec_dot(ne00, &tmp[ir0 - iir0], (nrc>1 ? 16 : 0), src0_row + ir0*nb01, (nrc>1 ? nb01 : 0), src1_col, (nrc>1 ? src1_col_stride : 0), nrc);
|
10504
|
+
}
|
10505
|
+
|
10506
|
+
for (int cn = 0; cn < nrc; ++cn) {
|
10507
|
+
memcpy(&dst_col[iir0 + cn*nb1/nb0], tmp + (cn*16), (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10199
10508
|
}
|
10200
|
-
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10201
10509
|
}
|
10202
10510
|
}
|
10203
10511
|
}
|
@@ -10207,10 +10515,11 @@ static void ggml_compute_forward_mul_mat(
|
|
10207
10515
|
|
10208
10516
|
static void ggml_compute_forward_mul_mat_id(
|
10209
10517
|
const struct ggml_compute_params * params,
|
10210
|
-
const struct ggml_tensor * ids,
|
10211
|
-
const struct ggml_tensor * src1,
|
10212
10518
|
struct ggml_tensor * dst) {
|
10213
10519
|
|
10520
|
+
const struct ggml_tensor * ids = dst->src[0];
|
10521
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10522
|
+
|
10214
10523
|
const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
|
10215
10524
|
|
10216
10525
|
GGML_TENSOR_BINARY_OP_LOCALS
|
@@ -10386,7 +10695,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10386
10695
|
//}
|
10387
10696
|
|
10388
10697
|
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
10389
|
-
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
10698
|
+
vec_dot(ne00, &tmp[ir0 - iir0], 0, src0_row + ir0*nb01, 0, src1_col, 0, 1);
|
10390
10699
|
}
|
10391
10700
|
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
10392
10701
|
}
|
@@ -10401,9 +10710,11 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10401
10710
|
|
10402
10711
|
static void ggml_compute_forward_out_prod_f32(
|
10403
10712
|
const struct ggml_compute_params * params,
|
10404
|
-
const struct ggml_tensor * src0,
|
10405
|
-
const struct ggml_tensor * src1,
|
10406
10713
|
struct ggml_tensor * dst) {
|
10714
|
+
|
10715
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10716
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10717
|
+
|
10407
10718
|
// int64_t t0 = ggml_perf_time_us();
|
10408
10719
|
// UNUSED(t0);
|
10409
10720
|
|
@@ -10593,9 +10904,11 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10593
10904
|
|
10594
10905
|
static void ggml_compute_forward_out_prod_q_f32(
|
10595
10906
|
const struct ggml_compute_params * params,
|
10596
|
-
const struct ggml_tensor * src0,
|
10597
|
-
const struct ggml_tensor * src1,
|
10598
10907
|
struct ggml_tensor * dst) {
|
10908
|
+
|
10909
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10910
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10911
|
+
|
10599
10912
|
// int64_t t0 = ggml_perf_time_us();
|
10600
10913
|
// UNUSED(t0);
|
10601
10914
|
|
@@ -10706,9 +11019,10 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10706
11019
|
|
10707
11020
|
static void ggml_compute_forward_out_prod(
|
10708
11021
|
const struct ggml_compute_params * params,
|
10709
|
-
const struct ggml_tensor * src0,
|
10710
|
-
const struct ggml_tensor * src1,
|
10711
11022
|
struct ggml_tensor * dst) {
|
11023
|
+
|
11024
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11025
|
+
|
10712
11026
|
switch (src0->type) {
|
10713
11027
|
case GGML_TYPE_Q4_0:
|
10714
11028
|
case GGML_TYPE_Q4_1:
|
@@ -10723,17 +11037,19 @@ static void ggml_compute_forward_out_prod(
|
|
10723
11037
|
case GGML_TYPE_IQ2_XXS:
|
10724
11038
|
case GGML_TYPE_IQ2_XS:
|
10725
11039
|
case GGML_TYPE_IQ3_XXS:
|
11040
|
+
case GGML_TYPE_IQ1_S:
|
11041
|
+
case GGML_TYPE_IQ4_NL:
|
10726
11042
|
{
|
10727
|
-
ggml_compute_forward_out_prod_q_f32(params,
|
11043
|
+
ggml_compute_forward_out_prod_q_f32(params, dst);
|
10728
11044
|
} break;
|
10729
11045
|
case GGML_TYPE_F16:
|
10730
11046
|
{
|
10731
11047
|
GGML_ASSERT(false); // todo
|
10732
|
-
// ggml_compute_forward_out_prod_f16_f32(params,
|
11048
|
+
// ggml_compute_forward_out_prod_f16_f32(params, dst);
|
10733
11049
|
} break;
|
10734
11050
|
case GGML_TYPE_F32:
|
10735
11051
|
{
|
10736
|
-
ggml_compute_forward_out_prod_f32(params,
|
11052
|
+
ggml_compute_forward_out_prod_f32(params, dst);
|
10737
11053
|
} break;
|
10738
11054
|
default:
|
10739
11055
|
{
|
@@ -10746,8 +11062,10 @@ static void ggml_compute_forward_out_prod(
|
|
10746
11062
|
|
10747
11063
|
static void ggml_compute_forward_scale_f32(
|
10748
11064
|
const struct ggml_compute_params * params,
|
10749
|
-
const struct ggml_tensor * src0,
|
10750
11065
|
struct ggml_tensor * dst) {
|
11066
|
+
|
11067
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11068
|
+
|
10751
11069
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
10752
11070
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
10753
11071
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -10788,12 +11106,14 @@ static void ggml_compute_forward_scale_f32(
|
|
10788
11106
|
|
10789
11107
|
static void ggml_compute_forward_scale(
|
10790
11108
|
const struct ggml_compute_params * params,
|
10791
|
-
const struct ggml_tensor * src0,
|
10792
11109
|
struct ggml_tensor * dst) {
|
11110
|
+
|
11111
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11112
|
+
|
10793
11113
|
switch (src0->type) {
|
10794
11114
|
case GGML_TYPE_F32:
|
10795
11115
|
{
|
10796
|
-
ggml_compute_forward_scale_f32(params,
|
11116
|
+
ggml_compute_forward_scale_f32(params, dst);
|
10797
11117
|
} break;
|
10798
11118
|
default:
|
10799
11119
|
{
|
@@ -10806,9 +11126,11 @@ static void ggml_compute_forward_scale(
|
|
10806
11126
|
|
10807
11127
|
static void ggml_compute_forward_set_f32(
|
10808
11128
|
const struct ggml_compute_params * params,
|
10809
|
-
const struct ggml_tensor * src0,
|
10810
|
-
const struct ggml_tensor * src1,
|
10811
11129
|
struct ggml_tensor * dst) {
|
11130
|
+
|
11131
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11132
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11133
|
+
|
10812
11134
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10813
11135
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
10814
11136
|
|
@@ -10879,14 +11201,14 @@ static void ggml_compute_forward_set_f32(
|
|
10879
11201
|
|
10880
11202
|
static void ggml_compute_forward_set(
|
10881
11203
|
const struct ggml_compute_params * params,
|
10882
|
-
const struct ggml_tensor * src0,
|
10883
|
-
const struct ggml_tensor * src1,
|
10884
11204
|
struct ggml_tensor * dst) {
|
10885
11205
|
|
11206
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11207
|
+
|
10886
11208
|
switch (src0->type) {
|
10887
11209
|
case GGML_TYPE_F32:
|
10888
11210
|
{
|
10889
|
-
ggml_compute_forward_set_f32(params,
|
11211
|
+
ggml_compute_forward_set_f32(params, dst);
|
10890
11212
|
} break;
|
10891
11213
|
case GGML_TYPE_F16:
|
10892
11214
|
case GGML_TYPE_Q4_0:
|
@@ -10903,6 +11225,8 @@ static void ggml_compute_forward_set(
|
|
10903
11225
|
case GGML_TYPE_IQ2_XXS:
|
10904
11226
|
case GGML_TYPE_IQ2_XS:
|
10905
11227
|
case GGML_TYPE_IQ3_XXS:
|
11228
|
+
case GGML_TYPE_IQ1_S:
|
11229
|
+
case GGML_TYPE_IQ4_NL:
|
10906
11230
|
default:
|
10907
11231
|
{
|
10908
11232
|
GGML_ASSERT(false);
|
@@ -10914,29 +11238,25 @@ static void ggml_compute_forward_set(
|
|
10914
11238
|
|
10915
11239
|
static void ggml_compute_forward_cpy(
|
10916
11240
|
const struct ggml_compute_params * params,
|
10917
|
-
const struct ggml_tensor * src0,
|
10918
11241
|
struct ggml_tensor * dst) {
|
10919
|
-
ggml_compute_forward_dup(params,
|
11242
|
+
ggml_compute_forward_dup(params, dst);
|
10920
11243
|
}
|
10921
11244
|
|
10922
11245
|
// ggml_compute_forward_cont
|
10923
11246
|
|
10924
11247
|
static void ggml_compute_forward_cont(
|
10925
11248
|
const struct ggml_compute_params * params,
|
10926
|
-
const struct ggml_tensor * src0,
|
10927
11249
|
struct ggml_tensor * dst) {
|
10928
|
-
ggml_compute_forward_dup(params,
|
11250
|
+
ggml_compute_forward_dup(params, dst);
|
10929
11251
|
}
|
10930
11252
|
|
10931
11253
|
// ggml_compute_forward_reshape
|
10932
11254
|
|
10933
11255
|
static void ggml_compute_forward_reshape(
|
10934
11256
|
const struct ggml_compute_params * params,
|
10935
|
-
const struct ggml_tensor * src0,
|
10936
11257
|
struct ggml_tensor * dst) {
|
10937
11258
|
// NOP
|
10938
11259
|
UNUSED(params);
|
10939
|
-
UNUSED(src0);
|
10940
11260
|
UNUSED(dst);
|
10941
11261
|
}
|
10942
11262
|
|
@@ -10944,39 +11264,41 @@ static void ggml_compute_forward_reshape(
|
|
10944
11264
|
|
10945
11265
|
static void ggml_compute_forward_view(
|
10946
11266
|
const struct ggml_compute_params * params,
|
10947
|
-
const struct ggml_tensor *
|
11267
|
+
const struct ggml_tensor * dst) {
|
10948
11268
|
// NOP
|
10949
11269
|
UNUSED(params);
|
10950
|
-
UNUSED(
|
11270
|
+
UNUSED(dst);
|
10951
11271
|
}
|
10952
11272
|
|
10953
11273
|
// ggml_compute_forward_permute
|
10954
11274
|
|
10955
11275
|
static void ggml_compute_forward_permute(
|
10956
11276
|
const struct ggml_compute_params * params,
|
10957
|
-
const struct ggml_tensor *
|
11277
|
+
const struct ggml_tensor * dst) {
|
10958
11278
|
// NOP
|
10959
11279
|
UNUSED(params);
|
10960
|
-
UNUSED(
|
11280
|
+
UNUSED(dst);
|
10961
11281
|
}
|
10962
11282
|
|
10963
11283
|
// ggml_compute_forward_transpose
|
10964
11284
|
|
10965
11285
|
static void ggml_compute_forward_transpose(
|
10966
11286
|
const struct ggml_compute_params * params,
|
10967
|
-
const struct ggml_tensor *
|
11287
|
+
const struct ggml_tensor * dst) {
|
10968
11288
|
// NOP
|
10969
11289
|
UNUSED(params);
|
10970
|
-
UNUSED(
|
11290
|
+
UNUSED(dst);
|
10971
11291
|
}
|
10972
11292
|
|
10973
11293
|
// ggml_compute_forward_get_rows
|
10974
11294
|
|
10975
11295
|
static void ggml_compute_forward_get_rows_q(
|
10976
11296
|
const struct ggml_compute_params * params,
|
10977
|
-
const struct ggml_tensor * src0,
|
10978
|
-
const struct ggml_tensor * src1,
|
10979
11297
|
struct ggml_tensor * dst) {
|
11298
|
+
|
11299
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11300
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11301
|
+
|
10980
11302
|
assert(params->ith == 0);
|
10981
11303
|
|
10982
11304
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11012,9 +11334,11 @@ static void ggml_compute_forward_get_rows_q(
|
|
11012
11334
|
|
11013
11335
|
static void ggml_compute_forward_get_rows_f16(
|
11014
11336
|
const struct ggml_compute_params * params,
|
11015
|
-
const struct ggml_tensor * src0,
|
11016
|
-
const struct ggml_tensor * src1,
|
11017
11337
|
struct ggml_tensor * dst) {
|
11338
|
+
|
11339
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11340
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11341
|
+
|
11018
11342
|
assert(params->ith == 0);
|
11019
11343
|
|
11020
11344
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11047,9 +11371,11 @@ static void ggml_compute_forward_get_rows_f16(
|
|
11047
11371
|
|
11048
11372
|
static void ggml_compute_forward_get_rows_f32(
|
11049
11373
|
const struct ggml_compute_params * params,
|
11050
|
-
const struct ggml_tensor * src0,
|
11051
|
-
const struct ggml_tensor * src1,
|
11052
11374
|
struct ggml_tensor * dst) {
|
11375
|
+
|
11376
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11377
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11378
|
+
|
11053
11379
|
assert(params->ith == 0);
|
11054
11380
|
|
11055
11381
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11082,9 +11408,10 @@ static void ggml_compute_forward_get_rows_f32(
|
|
11082
11408
|
|
11083
11409
|
static void ggml_compute_forward_get_rows(
|
11084
11410
|
const struct ggml_compute_params * params,
|
11085
|
-
const struct ggml_tensor * src0,
|
11086
|
-
const struct ggml_tensor * src1,
|
11087
11411
|
struct ggml_tensor * dst) {
|
11412
|
+
|
11413
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11414
|
+
|
11088
11415
|
switch (src0->type) {
|
11089
11416
|
case GGML_TYPE_Q4_0:
|
11090
11417
|
case GGML_TYPE_Q4_1:
|
@@ -11100,17 +11427,19 @@ static void ggml_compute_forward_get_rows(
|
|
11100
11427
|
case GGML_TYPE_IQ2_XXS:
|
11101
11428
|
case GGML_TYPE_IQ2_XS:
|
11102
11429
|
case GGML_TYPE_IQ3_XXS:
|
11430
|
+
case GGML_TYPE_IQ1_S:
|
11431
|
+
case GGML_TYPE_IQ4_NL:
|
11103
11432
|
{
|
11104
|
-
ggml_compute_forward_get_rows_q(params,
|
11433
|
+
ggml_compute_forward_get_rows_q(params, dst);
|
11105
11434
|
} break;
|
11106
11435
|
case GGML_TYPE_F16:
|
11107
11436
|
{
|
11108
|
-
ggml_compute_forward_get_rows_f16(params,
|
11437
|
+
ggml_compute_forward_get_rows_f16(params, dst);
|
11109
11438
|
} break;
|
11110
11439
|
case GGML_TYPE_F32:
|
11111
11440
|
case GGML_TYPE_I32:
|
11112
11441
|
{
|
11113
|
-
ggml_compute_forward_get_rows_f32(params,
|
11442
|
+
ggml_compute_forward_get_rows_f32(params, dst);
|
11114
11443
|
} break;
|
11115
11444
|
default:
|
11116
11445
|
{
|
@@ -11141,9 +11470,11 @@ static void ggml_compute_forward_get_rows(
|
|
11141
11470
|
|
11142
11471
|
static void ggml_compute_forward_get_rows_back_f32_f16(
|
11143
11472
|
const struct ggml_compute_params * params,
|
11144
|
-
const struct ggml_tensor * src0,
|
11145
|
-
const struct ggml_tensor * src1,
|
11146
11473
|
struct ggml_tensor * dst) {
|
11474
|
+
|
11475
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11476
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11477
|
+
|
11147
11478
|
GGML_ASSERT(params->ith == 0);
|
11148
11479
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11149
11480
|
|
@@ -11178,9 +11509,11 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11178
11509
|
|
11179
11510
|
static void ggml_compute_forward_get_rows_back_f32(
|
11180
11511
|
const struct ggml_compute_params * params,
|
11181
|
-
const struct ggml_tensor * src0,
|
11182
|
-
const struct ggml_tensor * src1,
|
11183
11512
|
struct ggml_tensor * dst) {
|
11513
|
+
|
11514
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11515
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11516
|
+
|
11184
11517
|
GGML_ASSERT(params->ith == 0);
|
11185
11518
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11186
11519
|
|
@@ -11215,17 +11548,18 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
11215
11548
|
|
11216
11549
|
static void ggml_compute_forward_get_rows_back(
|
11217
11550
|
const struct ggml_compute_params * params,
|
11218
|
-
const struct ggml_tensor * src0,
|
11219
|
-
const struct ggml_tensor * src1,
|
11220
11551
|
struct ggml_tensor * dst) {
|
11552
|
+
|
11553
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11554
|
+
|
11221
11555
|
switch (src0->type) {
|
11222
11556
|
case GGML_TYPE_F16:
|
11223
11557
|
{
|
11224
|
-
ggml_compute_forward_get_rows_back_f32_f16(params,
|
11558
|
+
ggml_compute_forward_get_rows_back_f32_f16(params, dst);
|
11225
11559
|
} break;
|
11226
11560
|
case GGML_TYPE_F32:
|
11227
11561
|
{
|
11228
|
-
ggml_compute_forward_get_rows_back_f32(params,
|
11562
|
+
ggml_compute_forward_get_rows_back_f32(params, dst);
|
11229
11563
|
} break;
|
11230
11564
|
default:
|
11231
11565
|
{
|
@@ -11256,8 +11590,10 @@ static void ggml_compute_forward_get_rows_back(
|
|
11256
11590
|
|
11257
11591
|
static void ggml_compute_forward_diag_f32(
|
11258
11592
|
const struct ggml_compute_params * params,
|
11259
|
-
const struct ggml_tensor * src0,
|
11260
11593
|
struct ggml_tensor * dst) {
|
11594
|
+
|
11595
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11596
|
+
|
11261
11597
|
GGML_ASSERT(params->ith == 0);
|
11262
11598
|
|
11263
11599
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11296,12 +11632,14 @@ static void ggml_compute_forward_diag_f32(
|
|
11296
11632
|
|
11297
11633
|
static void ggml_compute_forward_diag(
|
11298
11634
|
const struct ggml_compute_params * params,
|
11299
|
-
const struct ggml_tensor * src0,
|
11300
11635
|
struct ggml_tensor * dst) {
|
11636
|
+
|
11637
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11638
|
+
|
11301
11639
|
switch (src0->type) {
|
11302
11640
|
case GGML_TYPE_F32:
|
11303
11641
|
{
|
11304
|
-
ggml_compute_forward_diag_f32(params,
|
11642
|
+
ggml_compute_forward_diag_f32(params, dst);
|
11305
11643
|
} break;
|
11306
11644
|
default:
|
11307
11645
|
{
|
@@ -11314,10 +11652,11 @@ static void ggml_compute_forward_diag(
|
|
11314
11652
|
|
11315
11653
|
static void ggml_compute_forward_diag_mask_f32(
|
11316
11654
|
const struct ggml_compute_params * params,
|
11317
|
-
const struct ggml_tensor * src0,
|
11318
11655
|
struct ggml_tensor * dst,
|
11319
11656
|
const float value) {
|
11320
11657
|
|
11658
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11659
|
+
|
11321
11660
|
const int ith = params->ith;
|
11322
11661
|
const int nth = params->nth;
|
11323
11662
|
|
@@ -11367,12 +11706,14 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11367
11706
|
|
11368
11707
|
static void ggml_compute_forward_diag_mask_inf(
|
11369
11708
|
const struct ggml_compute_params * params,
|
11370
|
-
const struct ggml_tensor * src0,
|
11371
11709
|
struct ggml_tensor * dst) {
|
11710
|
+
|
11711
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11712
|
+
|
11372
11713
|
switch (src0->type) {
|
11373
11714
|
case GGML_TYPE_F32:
|
11374
11715
|
{
|
11375
|
-
ggml_compute_forward_diag_mask_f32(params,
|
11716
|
+
ggml_compute_forward_diag_mask_f32(params, dst, -INFINITY);
|
11376
11717
|
} break;
|
11377
11718
|
default:
|
11378
11719
|
{
|
@@ -11383,12 +11724,14 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11383
11724
|
|
11384
11725
|
static void ggml_compute_forward_diag_mask_zero(
|
11385
11726
|
const struct ggml_compute_params * params,
|
11386
|
-
const struct ggml_tensor * src0,
|
11387
11727
|
struct ggml_tensor * dst) {
|
11728
|
+
|
11729
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11730
|
+
|
11388
11731
|
switch (src0->type) {
|
11389
11732
|
case GGML_TYPE_F32:
|
11390
11733
|
{
|
11391
|
-
ggml_compute_forward_diag_mask_f32(params,
|
11734
|
+
ggml_compute_forward_diag_mask_f32(params, dst, 0);
|
11392
11735
|
} break;
|
11393
11736
|
default:
|
11394
11737
|
{
|
@@ -11401,9 +11744,12 @@ static void ggml_compute_forward_diag_mask_zero(
|
|
11401
11744
|
|
11402
11745
|
static void ggml_compute_forward_soft_max_f32(
|
11403
11746
|
const struct ggml_compute_params * params,
|
11404
|
-
const struct ggml_tensor * src0,
|
11405
|
-
const struct ggml_tensor * src1,
|
11406
11747
|
struct ggml_tensor * dst) {
|
11748
|
+
|
11749
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11750
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11751
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
11752
|
+
|
11407
11753
|
assert(ggml_is_contiguous(dst));
|
11408
11754
|
assert(ggml_are_same_shape(src0, dst));
|
11409
11755
|
|
@@ -11411,16 +11757,29 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11411
11757
|
return;
|
11412
11758
|
}
|
11413
11759
|
|
11414
|
-
float scale
|
11415
|
-
|
11760
|
+
float scale = 1.0f;
|
11761
|
+
float max_bias = 0.0f;
|
11762
|
+
|
11763
|
+
memcpy(&scale, (float *) dst->op_params + 0, sizeof(float));
|
11764
|
+
memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
|
11416
11765
|
|
11417
11766
|
// TODO: handle transposed/permuted matrices
|
11418
11767
|
|
11419
11768
|
const int ith = params->ith;
|
11420
11769
|
const int nth = params->nth;
|
11421
11770
|
|
11771
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
11772
|
+
|
11422
11773
|
const int64_t ne11 = src1 ? src1->ne[1] : 1;
|
11423
11774
|
|
11775
|
+
// TODO: is this supposed to be ceil instead of floor?
|
11776
|
+
// https://huggingface.co/mosaicml/mpt-7b/blob/main/attention.py#L370
|
11777
|
+
const uint32_t n_head_kv = ne02;
|
11778
|
+
const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2(n_head_kv));
|
11779
|
+
|
11780
|
+
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
|
11781
|
+
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
11782
|
+
|
11424
11783
|
const int nc = src0->ne[0];
|
11425
11784
|
const int nr = ggml_nrows(src0);
|
11426
11785
|
|
@@ -11433,6 +11792,9 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11433
11792
|
|
11434
11793
|
float * wp = (float *) params->wdata + (nc + CACHE_LINE_SIZE_F32) * ith;
|
11435
11794
|
|
11795
|
+
// when max_bias <= 0.0f, src2 is not used and we default it to src0 to avoid branching
|
11796
|
+
float * pos = src2 ? (float *) src2->data : src0->data;
|
11797
|
+
|
11436
11798
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
11437
11799
|
float * sp = (float *)((char *) src0->data + i1*src0->nb[1]);
|
11438
11800
|
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
|
@@ -11446,6 +11808,16 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11446
11808
|
ggml_vec_acc_f32(nc, wp, mp);
|
11447
11809
|
}
|
11448
11810
|
|
11811
|
+
// ALiBi bias
|
11812
|
+
if (max_bias > 0.0f) {
|
11813
|
+
const uint32_t h = (i1/ne01)%ne02; // head
|
11814
|
+
const float slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1);
|
11815
|
+
|
11816
|
+
for (int i = 0; i < nc; i++) {
|
11817
|
+
wp[i] = wp[i] + slope*pos[i];
|
11818
|
+
}
|
11819
|
+
}
|
11820
|
+
|
11449
11821
|
#ifndef NDEBUG
|
11450
11822
|
for (int i = 0; i < nc; ++i) {
|
11451
11823
|
//printf("p[%d] = %f\n", i, p[i]);
|
@@ -11488,13 +11860,14 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11488
11860
|
|
11489
11861
|
static void ggml_compute_forward_soft_max(
|
11490
11862
|
const struct ggml_compute_params * params,
|
11491
|
-
const struct ggml_tensor * src0,
|
11492
|
-
const struct ggml_tensor * src1,
|
11493
11863
|
struct ggml_tensor * dst) {
|
11864
|
+
|
11865
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11866
|
+
|
11494
11867
|
switch (src0->type) {
|
11495
11868
|
case GGML_TYPE_F32:
|
11496
11869
|
{
|
11497
|
-
ggml_compute_forward_soft_max_f32(params,
|
11870
|
+
ggml_compute_forward_soft_max_f32(params, dst);
|
11498
11871
|
} break;
|
11499
11872
|
default:
|
11500
11873
|
{
|
@@ -11507,9 +11880,11 @@ static void ggml_compute_forward_soft_max(
|
|
11507
11880
|
|
11508
11881
|
static void ggml_compute_forward_soft_max_back_f32(
|
11509
11882
|
const struct ggml_compute_params * params,
|
11510
|
-
const struct ggml_tensor * src0,
|
11511
|
-
const struct ggml_tensor * src1,
|
11512
11883
|
struct ggml_tensor * dst) {
|
11884
|
+
|
11885
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11886
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11887
|
+
|
11513
11888
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
11514
11889
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
11515
11890
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
@@ -11568,7 +11943,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11568
11943
|
|
11569
11944
|
// linear runtime, no additional memory
|
11570
11945
|
float dot_y_dy = 0;
|
11571
|
-
ggml_vec_dot_f32 (nc, &dot_y_dy, y, dy);
|
11946
|
+
ggml_vec_dot_f32 (nc, &dot_y_dy, 0, y, 0, dy, 0, 1);
|
11572
11947
|
ggml_vec_cpy_f32 (nc, dx, dy);
|
11573
11948
|
ggml_vec_acc1_f32(nc, dx, -dot_y_dy);
|
11574
11949
|
ggml_vec_mul_f32 (nc, dx, dx, y);
|
@@ -11584,13 +11959,14 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11584
11959
|
|
11585
11960
|
static void ggml_compute_forward_soft_max_back(
|
11586
11961
|
const struct ggml_compute_params * params,
|
11587
|
-
const struct ggml_tensor * src0,
|
11588
|
-
const struct ggml_tensor * src1,
|
11589
11962
|
struct ggml_tensor * dst) {
|
11963
|
+
|
11964
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11965
|
+
|
11590
11966
|
switch (src0->type) {
|
11591
11967
|
case GGML_TYPE_F32:
|
11592
11968
|
{
|
11593
|
-
ggml_compute_forward_soft_max_back_f32(params,
|
11969
|
+
ggml_compute_forward_soft_max_back_f32(params, dst);
|
11594
11970
|
} break;
|
11595
11971
|
default:
|
11596
11972
|
{
|
@@ -11603,8 +11979,10 @@ static void ggml_compute_forward_soft_max_back(
|
|
11603
11979
|
|
11604
11980
|
static void ggml_compute_forward_alibi_f32(
|
11605
11981
|
const struct ggml_compute_params * params,
|
11606
|
-
const struct ggml_tensor * src0,
|
11607
11982
|
struct ggml_tensor * dst) {
|
11983
|
+
|
11984
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11985
|
+
|
11608
11986
|
assert(params->ith == 0);
|
11609
11987
|
|
11610
11988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11638,22 +12016,20 @@ static void ggml_compute_forward_alibi_f32(
|
|
11638
12016
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
11639
12017
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
11640
12018
|
|
11641
|
-
for (int64_t
|
11642
|
-
|
11643
|
-
|
11644
|
-
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
11645
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11646
|
-
|
11647
|
-
// TODO: k*nb2 or k*nb3
|
11648
|
-
|
11649
|
-
float m_k;
|
12019
|
+
for (int64_t k = 0; k < ne2_ne3; k++) {
|
12020
|
+
// TODO: k*nb2 or k*nb3
|
12021
|
+
float m_k;
|
11650
12022
|
|
11651
|
-
|
11652
|
-
|
11653
|
-
|
11654
|
-
|
11655
|
-
|
12023
|
+
if (k < n_heads_log2_floor) {
|
12024
|
+
m_k = powf(m0, k + 1);
|
12025
|
+
} else {
|
12026
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
12027
|
+
}
|
11656
12028
|
|
12029
|
+
for (int64_t i = 0; i < ne0; i++) {
|
12030
|
+
for (int64_t j = 0; j < ne1; j++) {
|
12031
|
+
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
12032
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11657
12033
|
pdst[0] = i * m_k + src[0];
|
11658
12034
|
}
|
11659
12035
|
}
|
@@ -11662,8 +12038,10 @@ static void ggml_compute_forward_alibi_f32(
|
|
11662
12038
|
|
11663
12039
|
static void ggml_compute_forward_alibi_f16(
|
11664
12040
|
const struct ggml_compute_params * params,
|
11665
|
-
const struct ggml_tensor * src0,
|
11666
12041
|
struct ggml_tensor * dst) {
|
12042
|
+
|
12043
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12044
|
+
|
11667
12045
|
assert(params->ith == 0);
|
11668
12046
|
|
11669
12047
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11698,21 +12076,20 @@ static void ggml_compute_forward_alibi_f16(
|
|
11698
12076
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
11699
12077
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
11700
12078
|
|
11701
|
-
for (int
|
11702
|
-
|
11703
|
-
|
11704
|
-
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
11705
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11706
|
-
|
11707
|
-
// TODO: k*nb2 or k*nb3
|
12079
|
+
for (int k = 0; k < ne2_ne3; k++) {
|
12080
|
+
// TODO: k*nb2 or k*nb3
|
12081
|
+
float m_k;
|
11708
12082
|
|
11709
|
-
|
12083
|
+
if (k < n_heads_log2_floor) {
|
12084
|
+
m_k = powf(m0, k + 1);
|
12085
|
+
} else {
|
12086
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
12087
|
+
}
|
11710
12088
|
|
11711
|
-
|
11712
|
-
|
11713
|
-
|
11714
|
-
|
11715
|
-
}
|
12089
|
+
for (int i = 0; i < ne0; i++) {
|
12090
|
+
for (int j = 0; j < ne1; j++) {
|
12091
|
+
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
12092
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11716
12093
|
|
11717
12094
|
// we return F32
|
11718
12095
|
pdst[0] = i * m_k + GGML_FP16_TO_FP32(src[0]);
|
@@ -11723,16 +12100,18 @@ static void ggml_compute_forward_alibi_f16(
|
|
11723
12100
|
|
11724
12101
|
static void ggml_compute_forward_alibi(
|
11725
12102
|
const struct ggml_compute_params * params,
|
11726
|
-
const struct ggml_tensor * src0,
|
11727
12103
|
struct ggml_tensor * dst) {
|
12104
|
+
|
12105
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12106
|
+
|
11728
12107
|
switch (src0->type) {
|
11729
12108
|
case GGML_TYPE_F16:
|
11730
12109
|
{
|
11731
|
-
ggml_compute_forward_alibi_f16(params,
|
12110
|
+
ggml_compute_forward_alibi_f16(params, dst);
|
11732
12111
|
} break;
|
11733
12112
|
case GGML_TYPE_F32:
|
11734
12113
|
{
|
11735
|
-
ggml_compute_forward_alibi_f32(params,
|
12114
|
+
ggml_compute_forward_alibi_f32(params, dst);
|
11736
12115
|
} break;
|
11737
12116
|
case GGML_TYPE_Q4_0:
|
11738
12117
|
case GGML_TYPE_Q4_1:
|
@@ -11748,6 +12127,8 @@ static void ggml_compute_forward_alibi(
|
|
11748
12127
|
case GGML_TYPE_IQ2_XXS:
|
11749
12128
|
case GGML_TYPE_IQ2_XS:
|
11750
12129
|
case GGML_TYPE_IQ3_XXS:
|
12130
|
+
case GGML_TYPE_IQ1_S:
|
12131
|
+
case GGML_TYPE_IQ4_NL:
|
11751
12132
|
case GGML_TYPE_Q8_K:
|
11752
12133
|
case GGML_TYPE_I8:
|
11753
12134
|
case GGML_TYPE_I16:
|
@@ -11763,8 +12144,10 @@ static void ggml_compute_forward_alibi(
|
|
11763
12144
|
|
11764
12145
|
static void ggml_compute_forward_clamp_f32(
|
11765
12146
|
const struct ggml_compute_params * params,
|
11766
|
-
const struct ggml_tensor * src0,
|
11767
12147
|
struct ggml_tensor * dst) {
|
12148
|
+
|
12149
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12150
|
+
|
11768
12151
|
assert(params->ith == 0);
|
11769
12152
|
|
11770
12153
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11803,12 +12186,14 @@ static void ggml_compute_forward_clamp_f32(
|
|
11803
12186
|
|
11804
12187
|
static void ggml_compute_forward_clamp(
|
11805
12188
|
const struct ggml_compute_params * params,
|
11806
|
-
const struct ggml_tensor * src0,
|
11807
12189
|
struct ggml_tensor * dst) {
|
12190
|
+
|
12191
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12192
|
+
|
11808
12193
|
switch (src0->type) {
|
11809
12194
|
case GGML_TYPE_F32:
|
11810
12195
|
{
|
11811
|
-
ggml_compute_forward_clamp_f32(params,
|
12196
|
+
ggml_compute_forward_clamp_f32(params, dst);
|
11812
12197
|
} break;
|
11813
12198
|
case GGML_TYPE_F16:
|
11814
12199
|
case GGML_TYPE_Q4_0:
|
@@ -11825,6 +12210,8 @@ static void ggml_compute_forward_clamp(
|
|
11825
12210
|
case GGML_TYPE_IQ2_XXS:
|
11826
12211
|
case GGML_TYPE_IQ2_XS:
|
11827
12212
|
case GGML_TYPE_IQ3_XXS:
|
12213
|
+
case GGML_TYPE_IQ1_S:
|
12214
|
+
case GGML_TYPE_IQ4_NL:
|
11828
12215
|
case GGML_TYPE_Q8_K:
|
11829
12216
|
case GGML_TYPE_I8:
|
11830
12217
|
case GGML_TYPE_I16:
|
@@ -11896,10 +12283,12 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
11896
12283
|
|
11897
12284
|
static void ggml_compute_forward_rope_f32(
|
11898
12285
|
const struct ggml_compute_params * params,
|
11899
|
-
const struct ggml_tensor * src0,
|
11900
|
-
const struct ggml_tensor * src1,
|
11901
12286
|
struct ggml_tensor * dst,
|
11902
12287
|
const bool forward) {
|
12288
|
+
|
12289
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12290
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12291
|
+
|
11903
12292
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11904
12293
|
return;
|
11905
12294
|
}
|
@@ -12072,10 +12461,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12072
12461
|
|
12073
12462
|
static void ggml_compute_forward_rope_f16(
|
12074
12463
|
const struct ggml_compute_params * params,
|
12075
|
-
const struct ggml_tensor * src0,
|
12076
|
-
const struct ggml_tensor * src1,
|
12077
12464
|
struct ggml_tensor * dst,
|
12078
12465
|
const bool forward) {
|
12466
|
+
|
12467
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12468
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12469
|
+
|
12079
12470
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12080
12471
|
return;
|
12081
12472
|
}
|
@@ -12237,17 +12628,18 @@ static void ggml_compute_forward_rope_f16(
|
|
12237
12628
|
|
12238
12629
|
static void ggml_compute_forward_rope(
|
12239
12630
|
const struct ggml_compute_params * params,
|
12240
|
-
const struct ggml_tensor * src0,
|
12241
|
-
const struct ggml_tensor * src1,
|
12242
12631
|
struct ggml_tensor * dst) {
|
12632
|
+
|
12633
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12634
|
+
|
12243
12635
|
switch (src0->type) {
|
12244
12636
|
case GGML_TYPE_F16:
|
12245
12637
|
{
|
12246
|
-
ggml_compute_forward_rope_f16(params,
|
12638
|
+
ggml_compute_forward_rope_f16(params, dst, true);
|
12247
12639
|
} break;
|
12248
12640
|
case GGML_TYPE_F32:
|
12249
12641
|
{
|
12250
|
-
ggml_compute_forward_rope_f32(params,
|
12642
|
+
ggml_compute_forward_rope_f32(params, dst, true);
|
12251
12643
|
} break;
|
12252
12644
|
default:
|
12253
12645
|
{
|
@@ -12260,17 +12652,18 @@ static void ggml_compute_forward_rope(
|
|
12260
12652
|
|
12261
12653
|
static void ggml_compute_forward_rope_back(
|
12262
12654
|
const struct ggml_compute_params * params,
|
12263
|
-
const struct ggml_tensor * src0,
|
12264
|
-
const struct ggml_tensor * src1,
|
12265
12655
|
struct ggml_tensor * dst) {
|
12656
|
+
|
12657
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12658
|
+
|
12266
12659
|
switch (src0->type) {
|
12267
12660
|
case GGML_TYPE_F16:
|
12268
12661
|
{
|
12269
|
-
ggml_compute_forward_rope_f16(params,
|
12662
|
+
ggml_compute_forward_rope_f16(params, dst, false);
|
12270
12663
|
} break;
|
12271
12664
|
case GGML_TYPE_F32:
|
12272
12665
|
{
|
12273
|
-
ggml_compute_forward_rope_f32(params,
|
12666
|
+
ggml_compute_forward_rope_f32(params, dst, false);
|
12274
12667
|
} break;
|
12275
12668
|
default:
|
12276
12669
|
{
|
@@ -12283,9 +12676,11 @@ static void ggml_compute_forward_rope_back(
|
|
12283
12676
|
|
12284
12677
|
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
12285
12678
|
const struct ggml_compute_params * params,
|
12286
|
-
const struct ggml_tensor * src0,
|
12287
|
-
const struct ggml_tensor * src1,
|
12288
12679
|
struct ggml_tensor * dst) {
|
12680
|
+
|
12681
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12682
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12683
|
+
|
12289
12684
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12290
12685
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12291
12686
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12369,9 +12764,9 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12369
12764
|
const int i1n = i10*ne11;
|
12370
12765
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12371
12766
|
float v = 0;
|
12372
|
-
ggml_vec_dot_f16(ne02, &v,
|
12373
|
-
(ggml_fp16_t *) wdata_src + i1n,
|
12374
|
-
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
12767
|
+
ggml_vec_dot_f16(ne02, &v, 0,
|
12768
|
+
(ggml_fp16_t *) wdata_src + i1n, 0,
|
12769
|
+
(ggml_fp16_t *) wdata_kernel + i00*ne02, 0, 1);
|
12375
12770
|
dst_data[i10*s0 + i00] += v;
|
12376
12771
|
}
|
12377
12772
|
}
|
@@ -12380,9 +12775,11 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12380
12775
|
|
12381
12776
|
static void ggml_compute_forward_conv_transpose_1d_f32(
|
12382
12777
|
const struct ggml_compute_params * params,
|
12383
|
-
const struct ggml_tensor * src0,
|
12384
|
-
const struct ggml_tensor * src1,
|
12385
12778
|
struct ggml_tensor * dst) {
|
12779
|
+
|
12780
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12781
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12782
|
+
|
12386
12783
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
12387
12784
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12388
12785
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12466,9 +12863,9 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12466
12863
|
const int i1n = i10*ne11;
|
12467
12864
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12468
12865
|
float v = 0;
|
12469
|
-
ggml_vec_dot_f32(ne02, &v,
|
12470
|
-
wdata_src + i1n,
|
12471
|
-
wdata_kernel + i00*ne02);
|
12866
|
+
ggml_vec_dot_f32(ne02, &v, 0,
|
12867
|
+
wdata_src + i1n, 0,
|
12868
|
+
wdata_kernel + i00*ne02, 0, 1);
|
12472
12869
|
dst_data[i10*s0 + i00] += v;
|
12473
12870
|
}
|
12474
12871
|
}
|
@@ -12477,17 +12874,18 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12477
12874
|
|
12478
12875
|
static void ggml_compute_forward_conv_transpose_1d(
|
12479
12876
|
const struct ggml_compute_params * params,
|
12480
|
-
const struct ggml_tensor * src0,
|
12481
|
-
const struct ggml_tensor * src1,
|
12482
12877
|
struct ggml_tensor * dst) {
|
12878
|
+
|
12879
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12880
|
+
|
12483
12881
|
switch (src0->type) {
|
12484
12882
|
case GGML_TYPE_F16:
|
12485
12883
|
{
|
12486
|
-
ggml_compute_forward_conv_transpose_1d_f16_f32(params,
|
12884
|
+
ggml_compute_forward_conv_transpose_1d_f16_f32(params, dst);
|
12487
12885
|
} break;
|
12488
12886
|
case GGML_TYPE_F32:
|
12489
12887
|
{
|
12490
|
-
ggml_compute_forward_conv_transpose_1d_f32(params,
|
12888
|
+
ggml_compute_forward_conv_transpose_1d_f32(params, dst);
|
12491
12889
|
} break;
|
12492
12890
|
default:
|
12493
12891
|
{
|
@@ -12501,9 +12899,11 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
12501
12899
|
// dst: result [N, OH, OW, IC*KH*KW]
|
12502
12900
|
static void ggml_compute_forward_im2col_f32(
|
12503
12901
|
const struct ggml_compute_params * params,
|
12504
|
-
const struct ggml_tensor * src0,
|
12505
|
-
const struct ggml_tensor * src1,
|
12506
12902
|
struct ggml_tensor * dst) {
|
12903
|
+
|
12904
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12905
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12906
|
+
|
12507
12907
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12508
12908
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12509
12909
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12587,9 +12987,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
12587
12987
|
// dst: result [N, OH, OW, IC*KH*KW]
|
12588
12988
|
static void ggml_compute_forward_im2col_f16(
|
12589
12989
|
const struct ggml_compute_params * params,
|
12590
|
-
const struct ggml_tensor * src0,
|
12591
|
-
const struct ggml_tensor * src1,
|
12592
12990
|
struct ggml_tensor * dst) {
|
12991
|
+
|
12992
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12993
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12994
|
+
|
12593
12995
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12594
12996
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12595
12997
|
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
@@ -12669,17 +13071,15 @@ static void ggml_compute_forward_im2col_f16(
|
|
12669
13071
|
|
12670
13072
|
static void ggml_compute_forward_im2col(
|
12671
13073
|
const struct ggml_compute_params * params,
|
12672
|
-
const struct ggml_tensor * src0,
|
12673
|
-
const struct ggml_tensor * src1,
|
12674
13074
|
struct ggml_tensor * dst) {
|
12675
13075
|
switch (dst->type) {
|
12676
13076
|
case GGML_TYPE_F16:
|
12677
13077
|
{
|
12678
|
-
ggml_compute_forward_im2col_f16(params,
|
13078
|
+
ggml_compute_forward_im2col_f16(params, dst);
|
12679
13079
|
} break;
|
12680
13080
|
case GGML_TYPE_F32:
|
12681
13081
|
{
|
12682
|
-
ggml_compute_forward_im2col_f32(params,
|
13082
|
+
ggml_compute_forward_im2col_f32(params, dst);
|
12683
13083
|
} break;
|
12684
13084
|
default:
|
12685
13085
|
{
|
@@ -12693,9 +13093,11 @@ static void ggml_compute_forward_im2col(
|
|
12693
13093
|
|
12694
13094
|
static void ggml_compute_forward_conv_transpose_2d(
|
12695
13095
|
const struct ggml_compute_params * params,
|
12696
|
-
const struct ggml_tensor * src0,
|
12697
|
-
const struct ggml_tensor * src1,
|
12698
13096
|
struct ggml_tensor * dst) {
|
13097
|
+
|
13098
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13099
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
13100
|
+
|
12699
13101
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12700
13102
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12701
13103
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12783,9 +13185,9 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
12783
13185
|
for (int i01 = 0; i01 < ne01; i01++) {
|
12784
13186
|
for (int i00 = 0; i00 < ne00; i00++) {
|
12785
13187
|
float v = 0;
|
12786
|
-
ggml_vec_dot_f16(ne03, &v,
|
12787
|
-
wdata_src + i1n,
|
12788
|
-
wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
13188
|
+
ggml_vec_dot_f16(ne03, &v, 0,
|
13189
|
+
wdata_src + i1n, 0,
|
13190
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03, 0, 1);
|
12789
13191
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
12790
13192
|
}
|
12791
13193
|
}
|
@@ -12799,9 +13201,11 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
12799
13201
|
static void ggml_compute_forward_pool_1d_sk_p0(
|
12800
13202
|
const struct ggml_compute_params * params,
|
12801
13203
|
const enum ggml_op_pool op,
|
12802
|
-
const struct ggml_tensor * src,
|
12803
13204
|
const int k,
|
12804
13205
|
struct ggml_tensor * dst) {
|
13206
|
+
|
13207
|
+
const struct ggml_tensor * src = dst->src[0];
|
13208
|
+
|
12805
13209
|
assert(src->type == GGML_TYPE_F32);
|
12806
13210
|
assert(params->ith == 0);
|
12807
13211
|
|
@@ -12850,7 +13254,6 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
12850
13254
|
|
12851
13255
|
static void ggml_compute_forward_pool_1d(
|
12852
13256
|
const struct ggml_compute_params * params,
|
12853
|
-
const struct ggml_tensor * src0,
|
12854
13257
|
struct ggml_tensor * dst) {
|
12855
13258
|
|
12856
13259
|
const int32_t * opts = (const int32_t *)dst->op_params;
|
@@ -12861,15 +13264,17 @@ static void ggml_compute_forward_pool_1d(
|
|
12861
13264
|
GGML_ASSERT(p0 == 0); // padding not supported
|
12862
13265
|
GGML_ASSERT(k0 == s0); // only s = k supported
|
12863
13266
|
|
12864
|
-
ggml_compute_forward_pool_1d_sk_p0(params, op,
|
13267
|
+
ggml_compute_forward_pool_1d_sk_p0(params, op, k0, dst);
|
12865
13268
|
}
|
12866
13269
|
|
12867
13270
|
// ggml_compute_forward_pool_2d
|
12868
13271
|
|
12869
13272
|
static void ggml_compute_forward_pool_2d(
|
12870
13273
|
const struct ggml_compute_params * params,
|
12871
|
-
const struct ggml_tensor * src,
|
12872
13274
|
struct ggml_tensor * dst) {
|
13275
|
+
|
13276
|
+
const struct ggml_tensor * src = dst->src[0];
|
13277
|
+
|
12873
13278
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
12874
13279
|
GGML_ASSERT(params->ith == 0);
|
12875
13280
|
|
@@ -12942,9 +13347,10 @@ static void ggml_compute_forward_pool_2d(
|
|
12942
13347
|
|
12943
13348
|
static void ggml_compute_forward_upscale_f32(
|
12944
13349
|
const struct ggml_compute_params * params,
|
12945
|
-
const struct ggml_tensor * src0,
|
12946
13350
|
struct ggml_tensor * dst) {
|
12947
13351
|
|
13352
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13353
|
+
|
12948
13354
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12949
13355
|
return;
|
12950
13356
|
}
|
@@ -12981,12 +13387,14 @@ static void ggml_compute_forward_upscale_f32(
|
|
12981
13387
|
|
12982
13388
|
static void ggml_compute_forward_upscale(
|
12983
13389
|
const struct ggml_compute_params * params,
|
12984
|
-
const struct ggml_tensor * src0,
|
12985
13390
|
struct ggml_tensor * dst) {
|
13391
|
+
|
13392
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13393
|
+
|
12986
13394
|
switch (src0->type) {
|
12987
13395
|
case GGML_TYPE_F32:
|
12988
13396
|
{
|
12989
|
-
ggml_compute_forward_upscale_f32(params,
|
13397
|
+
ggml_compute_forward_upscale_f32(params, dst);
|
12990
13398
|
} break;
|
12991
13399
|
default:
|
12992
13400
|
{
|
@@ -12999,9 +13407,10 @@ static void ggml_compute_forward_upscale(
|
|
12999
13407
|
|
13000
13408
|
static void ggml_compute_forward_pad_f32(
|
13001
13409
|
const struct ggml_compute_params * params,
|
13002
|
-
const struct ggml_tensor * src0,
|
13003
13410
|
struct ggml_tensor * dst) {
|
13004
13411
|
|
13412
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13413
|
+
|
13005
13414
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13006
13415
|
return;
|
13007
13416
|
}
|
@@ -13039,12 +13448,14 @@ static void ggml_compute_forward_pad_f32(
|
|
13039
13448
|
|
13040
13449
|
static void ggml_compute_forward_pad(
|
13041
13450
|
const struct ggml_compute_params * params,
|
13042
|
-
const struct ggml_tensor * src0,
|
13043
13451
|
struct ggml_tensor * dst) {
|
13452
|
+
|
13453
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13454
|
+
|
13044
13455
|
switch (src0->type) {
|
13045
13456
|
case GGML_TYPE_F32:
|
13046
13457
|
{
|
13047
|
-
ggml_compute_forward_pad_f32(params,
|
13458
|
+
ggml_compute_forward_pad_f32(params, dst);
|
13048
13459
|
} break;
|
13049
13460
|
default:
|
13050
13461
|
{
|
@@ -13057,9 +13468,10 @@ static void ggml_compute_forward_pad(
|
|
13057
13468
|
|
13058
13469
|
static void ggml_compute_forward_argsort_f32(
|
13059
13470
|
const struct ggml_compute_params * params,
|
13060
|
-
const struct ggml_tensor * src0,
|
13061
13471
|
struct ggml_tensor * dst) {
|
13062
13472
|
|
13473
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13474
|
+
|
13063
13475
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13064
13476
|
return;
|
13065
13477
|
}
|
@@ -13099,13 +13511,14 @@ static void ggml_compute_forward_argsort_f32(
|
|
13099
13511
|
|
13100
13512
|
static void ggml_compute_forward_argsort(
|
13101
13513
|
const struct ggml_compute_params * params,
|
13102
|
-
const struct ggml_tensor * src0,
|
13103
13514
|
struct ggml_tensor * dst) {
|
13104
13515
|
|
13516
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13517
|
+
|
13105
13518
|
switch (src0->type) {
|
13106
13519
|
case GGML_TYPE_F32:
|
13107
13520
|
{
|
13108
|
-
ggml_compute_forward_argsort_f32(params,
|
13521
|
+
ggml_compute_forward_argsort_f32(params, dst);
|
13109
13522
|
} break;
|
13110
13523
|
default:
|
13111
13524
|
{
|
@@ -13118,11 +13531,13 @@ static void ggml_compute_forward_argsort(
|
|
13118
13531
|
|
13119
13532
|
static void ggml_compute_forward_flash_attn_f32(
|
13120
13533
|
const struct ggml_compute_params * params,
|
13121
|
-
const struct ggml_tensor * q,
|
13122
|
-
const struct ggml_tensor * k,
|
13123
|
-
const struct ggml_tensor * v,
|
13124
13534
|
const bool masked,
|
13125
13535
|
struct ggml_tensor * dst) {
|
13536
|
+
|
13537
|
+
const struct ggml_tensor * q = dst->src[0];
|
13538
|
+
const struct ggml_tensor * k = dst->src[1];
|
13539
|
+
const struct ggml_tensor * v = dst->src[2];
|
13540
|
+
|
13126
13541
|
int64_t t0 = ggml_perf_time_us();
|
13127
13542
|
UNUSED(t0);
|
13128
13543
|
|
@@ -13214,9 +13629,9 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13214
13629
|
const int i1 = ik1;
|
13215
13630
|
|
13216
13631
|
ggml_vec_dot_f32(neq0,
|
13217
|
-
S + i1,
|
13218
|
-
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13219
|
-
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13632
|
+
S + i1, 0,
|
13633
|
+
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13634
|
+
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13220
13635
|
}
|
13221
13636
|
|
13222
13637
|
// scale
|
@@ -13299,20 +13714,22 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13299
13714
|
const int iv3 = iq3;
|
13300
13715
|
|
13301
13716
|
ggml_vec_dot_f32(masked_begin,
|
13302
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13303
|
-
(float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)),
|
13304
|
-
S);
|
13717
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13718
|
+
(float *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0,
|
13719
|
+
S, 0, 1);
|
13305
13720
|
}
|
13306
13721
|
}
|
13307
13722
|
}
|
13308
13723
|
|
13309
13724
|
static void ggml_compute_forward_flash_attn_f16(
|
13310
13725
|
const struct ggml_compute_params * params,
|
13311
|
-
const struct ggml_tensor * q,
|
13312
|
-
const struct ggml_tensor * k,
|
13313
|
-
const struct ggml_tensor * v,
|
13314
13726
|
const bool masked,
|
13315
13727
|
struct ggml_tensor * dst) {
|
13728
|
+
|
13729
|
+
const struct ggml_tensor * q = dst->src[0];
|
13730
|
+
const struct ggml_tensor * k = dst->src[1];
|
13731
|
+
const struct ggml_tensor * v = dst->src[2];
|
13732
|
+
|
13316
13733
|
int64_t t0 = ggml_perf_time_us();
|
13317
13734
|
UNUSED(t0);
|
13318
13735
|
|
@@ -13404,9 +13821,9 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13404
13821
|
const int i1 = ik1;
|
13405
13822
|
|
13406
13823
|
ggml_vec_dot_f16(neq0,
|
13407
|
-
S + i1,
|
13408
|
-
(ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13409
|
-
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
13824
|
+
S + i1, 0,
|
13825
|
+
(ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
13826
|
+
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13410
13827
|
}
|
13411
13828
|
} else {
|
13412
13829
|
for (int64_t ic = 0; ic < nek1; ic += GGML_VEC_DOT_UNROLL) {
|
@@ -13508,9 +13925,9 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13508
13925
|
const int iv3 = iq3;
|
13509
13926
|
|
13510
13927
|
ggml_vec_dot_f16(nev0,
|
13511
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13512
|
-
(ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)),
|
13513
|
-
S16);
|
13928
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
13929
|
+
(ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + iv2*nbv2 + iv3*nbv3)), 0,
|
13930
|
+
S16, 0, 1);
|
13514
13931
|
}
|
13515
13932
|
} else {
|
13516
13933
|
for (int64_t ic = 0; ic < nev1; ic += GGML_VEC_DOT_UNROLL) {
|
@@ -13534,19 +13951,19 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13534
13951
|
|
13535
13952
|
static void ggml_compute_forward_flash_attn(
|
13536
13953
|
const struct ggml_compute_params * params,
|
13537
|
-
const struct ggml_tensor * q,
|
13538
|
-
const struct ggml_tensor * k,
|
13539
|
-
const struct ggml_tensor * v,
|
13540
13954
|
const bool masked,
|
13541
13955
|
struct ggml_tensor * dst) {
|
13956
|
+
|
13957
|
+
const struct ggml_tensor * q = dst->src[0];
|
13958
|
+
|
13542
13959
|
switch (q->type) {
|
13543
13960
|
case GGML_TYPE_F16:
|
13544
13961
|
{
|
13545
|
-
ggml_compute_forward_flash_attn_f16(params,
|
13962
|
+
ggml_compute_forward_flash_attn_f16(params, masked, dst);
|
13546
13963
|
} break;
|
13547
13964
|
case GGML_TYPE_F32:
|
13548
13965
|
{
|
13549
|
-
ggml_compute_forward_flash_attn_f32(params,
|
13966
|
+
ggml_compute_forward_flash_attn_f32(params, masked, dst);
|
13550
13967
|
} break;
|
13551
13968
|
default:
|
13552
13969
|
{
|
@@ -13559,12 +13976,14 @@ static void ggml_compute_forward_flash_attn(
|
|
13559
13976
|
|
13560
13977
|
static void ggml_compute_forward_flash_ff_f16(
|
13561
13978
|
const struct ggml_compute_params * params,
|
13562
|
-
const struct ggml_tensor * a, // F16
|
13563
|
-
const struct ggml_tensor * b0, // F16 fc_w
|
13564
|
-
const struct ggml_tensor * b1, // F32 fc_b
|
13565
|
-
const struct ggml_tensor * c0, // F16 proj_w
|
13566
|
-
const struct ggml_tensor * c1, // F32 proj_b
|
13567
13979
|
struct ggml_tensor * dst) {
|
13980
|
+
|
13981
|
+
const struct ggml_tensor * a = dst->src[0]; // F16
|
13982
|
+
const struct ggml_tensor * b0 = dst->src[1]; // F16 fc_w
|
13983
|
+
const struct ggml_tensor * b1 = dst->src[2]; // F32 fc_b
|
13984
|
+
const struct ggml_tensor * c0 = dst->src[3]; // F16 proj_w
|
13985
|
+
const struct ggml_tensor * c1 = dst->src[4]; // F32 proj_b
|
13986
|
+
|
13568
13987
|
int64_t t0 = ggml_perf_time_us();
|
13569
13988
|
UNUSED(t0);
|
13570
13989
|
|
@@ -13652,9 +14071,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13652
14071
|
const int i1 = ib01;
|
13653
14072
|
|
13654
14073
|
ggml_vec_dot_f16(nea0,
|
13655
|
-
S + i1,
|
13656
|
-
(ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)),
|
13657
|
-
(ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)));
|
14074
|
+
S + i1, 0,
|
14075
|
+
(ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), 0,
|
14076
|
+
(ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3)), 0, 1);
|
13658
14077
|
}
|
13659
14078
|
|
13660
14079
|
ggml_vec_add_f32(neb01, S, S, (float *) b1->data);
|
@@ -13677,9 +14096,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13677
14096
|
for (int64_t ic = 0; ic < nec01; ++ic) {
|
13678
14097
|
|
13679
14098
|
ggml_vec_dot_f16(neb01,
|
13680
|
-
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
13681
|
-
(ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)),
|
13682
|
-
S16);
|
14099
|
+
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), 0,
|
14100
|
+
(ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), 0,
|
14101
|
+
S16, 0, 1);
|
13683
14102
|
}
|
13684
14103
|
|
13685
14104
|
ggml_vec_add_f32(nec01,
|
@@ -13692,16 +14111,14 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13692
14111
|
|
13693
14112
|
static void ggml_compute_forward_flash_ff(
|
13694
14113
|
const struct ggml_compute_params * params,
|
13695
|
-
const struct ggml_tensor * a,
|
13696
|
-
const struct ggml_tensor * b0,
|
13697
|
-
const struct ggml_tensor * b1,
|
13698
|
-
const struct ggml_tensor * c0,
|
13699
|
-
const struct ggml_tensor * c1,
|
13700
14114
|
struct ggml_tensor * dst) {
|
14115
|
+
|
14116
|
+
const struct ggml_tensor * b0 = dst->src[1];
|
14117
|
+
|
13701
14118
|
switch (b0->type) {
|
13702
14119
|
case GGML_TYPE_F16:
|
13703
14120
|
{
|
13704
|
-
ggml_compute_forward_flash_ff_f16(params,
|
14121
|
+
ggml_compute_forward_flash_ff_f16(params, dst);
|
13705
14122
|
} break;
|
13706
14123
|
case GGML_TYPE_F32:
|
13707
14124
|
{
|
@@ -13718,12 +14135,14 @@ static void ggml_compute_forward_flash_ff(
|
|
13718
14135
|
|
13719
14136
|
static void ggml_compute_forward_flash_attn_back_f32(
|
13720
14137
|
const struct ggml_compute_params * params,
|
13721
|
-
const struct ggml_tensor * q,
|
13722
|
-
const struct ggml_tensor * k,
|
13723
|
-
const struct ggml_tensor * v,
|
13724
|
-
const struct ggml_tensor * d,
|
13725
14138
|
const bool masked,
|
13726
14139
|
struct ggml_tensor * dst) {
|
14140
|
+
|
14141
|
+
const struct ggml_tensor * q = dst->src[0];
|
14142
|
+
const struct ggml_tensor * k = dst->src[1];
|
14143
|
+
const struct ggml_tensor * v = dst->src[2];
|
14144
|
+
const struct ggml_tensor * d = dst->src[3];
|
14145
|
+
|
13727
14146
|
int64_t t0 = ggml_perf_time_us();
|
13728
14147
|
UNUSED(t0);
|
13729
14148
|
|
@@ -13866,9 +14285,9 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
13866
14285
|
const int i1 = ik1;
|
13867
14286
|
|
13868
14287
|
ggml_vec_dot_f32(neq0,
|
13869
|
-
S + i1,
|
13870
|
-
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)),
|
13871
|
-
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
14288
|
+
S + i1, 0,
|
14289
|
+
(float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), 0,
|
14290
|
+
(float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)), 0, 1);
|
13872
14291
|
}
|
13873
14292
|
|
13874
14293
|
// scale
|
@@ -14013,7 +14432,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14013
14432
|
|
14014
14433
|
// S = SM * (S - dot(SM, S))
|
14015
14434
|
float dot_SM_gradSM = 0;
|
14016
|
-
ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, SM, S);
|
14435
|
+
ggml_vec_dot_f32 (masked_begin, &dot_SM_gradSM, 0, SM, 0, S, 0, 1);
|
14017
14436
|
ggml_vec_acc1_f32(M, S, -dot_SM_gradSM);
|
14018
14437
|
ggml_vec_mul_f32 (masked_begin, S, S, SM);
|
14019
14438
|
|
@@ -14071,16 +14490,15 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14071
14490
|
|
14072
14491
|
static void ggml_compute_forward_flash_attn_back(
|
14073
14492
|
const struct ggml_compute_params * params,
|
14074
|
-
const struct ggml_tensor * q,
|
14075
|
-
const struct ggml_tensor * k,
|
14076
|
-
const struct ggml_tensor * v,
|
14077
|
-
const struct ggml_tensor * d,
|
14078
14493
|
const bool masked,
|
14079
14494
|
struct ggml_tensor * dst) {
|
14495
|
+
|
14496
|
+
const struct ggml_tensor * q = dst->src[0];
|
14497
|
+
|
14080
14498
|
switch (q->type) {
|
14081
14499
|
case GGML_TYPE_F32:
|
14082
14500
|
{
|
14083
|
-
ggml_compute_forward_flash_attn_back_f32(params,
|
14501
|
+
ggml_compute_forward_flash_attn_back_f32(params, masked, dst);
|
14084
14502
|
} break;
|
14085
14503
|
default:
|
14086
14504
|
{
|
@@ -14093,8 +14511,10 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14093
14511
|
|
14094
14512
|
static void ggml_compute_forward_win_part_f32(
|
14095
14513
|
const struct ggml_compute_params * params,
|
14096
|
-
const struct ggml_tensor * src0,
|
14097
14514
|
struct ggml_tensor * dst) {
|
14515
|
+
|
14516
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14517
|
+
|
14098
14518
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14099
14519
|
return;
|
14100
14520
|
}
|
@@ -14137,12 +14557,14 @@ static void ggml_compute_forward_win_part_f32(
|
|
14137
14557
|
|
14138
14558
|
static void ggml_compute_forward_win_part(
|
14139
14559
|
const struct ggml_compute_params * params,
|
14140
|
-
const struct ggml_tensor * src0,
|
14141
14560
|
struct ggml_tensor * dst) {
|
14561
|
+
|
14562
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14563
|
+
|
14142
14564
|
switch (src0->type) {
|
14143
14565
|
case GGML_TYPE_F32:
|
14144
14566
|
{
|
14145
|
-
ggml_compute_forward_win_part_f32(params,
|
14567
|
+
ggml_compute_forward_win_part_f32(params, dst);
|
14146
14568
|
} break;
|
14147
14569
|
default:
|
14148
14570
|
{
|
@@ -14155,8 +14577,10 @@ static void ggml_compute_forward_win_part(
|
|
14155
14577
|
|
14156
14578
|
static void ggml_compute_forward_win_unpart_f32(
|
14157
14579
|
const struct ggml_compute_params * params,
|
14158
|
-
const struct ggml_tensor * src0,
|
14159
14580
|
struct ggml_tensor * dst) {
|
14581
|
+
|
14582
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14583
|
+
|
14160
14584
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14161
14585
|
return;
|
14162
14586
|
}
|
@@ -14197,12 +14621,14 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14197
14621
|
|
14198
14622
|
static void ggml_compute_forward_win_unpart(
|
14199
14623
|
const struct ggml_compute_params * params,
|
14200
|
-
const struct ggml_tensor * src0,
|
14201
14624
|
struct ggml_tensor * dst) {
|
14625
|
+
|
14626
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14627
|
+
|
14202
14628
|
switch (src0->type) {
|
14203
14629
|
case GGML_TYPE_F32:
|
14204
14630
|
{
|
14205
|
-
ggml_compute_forward_win_unpart_f32(params,
|
14631
|
+
ggml_compute_forward_win_unpart_f32(params, dst);
|
14206
14632
|
} break;
|
14207
14633
|
default:
|
14208
14634
|
{
|
@@ -14215,58 +14641,58 @@ static void ggml_compute_forward_win_unpart(
|
|
14215
14641
|
|
14216
14642
|
static void ggml_compute_forward_unary(
|
14217
14643
|
const struct ggml_compute_params * params,
|
14218
|
-
const struct ggml_tensor * src0,
|
14219
14644
|
struct ggml_tensor * dst) {
|
14645
|
+
|
14220
14646
|
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14221
14647
|
|
14222
14648
|
switch (op) {
|
14223
14649
|
case GGML_UNARY_OP_ABS:
|
14224
14650
|
{
|
14225
|
-
ggml_compute_forward_abs(params,
|
14651
|
+
ggml_compute_forward_abs(params, dst);
|
14226
14652
|
} break;
|
14227
14653
|
case GGML_UNARY_OP_SGN:
|
14228
14654
|
{
|
14229
|
-
ggml_compute_forward_sgn(params,
|
14655
|
+
ggml_compute_forward_sgn(params, dst);
|
14230
14656
|
} break;
|
14231
14657
|
case GGML_UNARY_OP_NEG:
|
14232
14658
|
{
|
14233
|
-
ggml_compute_forward_neg(params,
|
14659
|
+
ggml_compute_forward_neg(params, dst);
|
14234
14660
|
} break;
|
14235
14661
|
case GGML_UNARY_OP_STEP:
|
14236
14662
|
{
|
14237
|
-
ggml_compute_forward_step(params,
|
14663
|
+
ggml_compute_forward_step(params, dst);
|
14238
14664
|
} break;
|
14239
14665
|
case GGML_UNARY_OP_TANH:
|
14240
14666
|
{
|
14241
|
-
ggml_compute_forward_tanh(params,
|
14667
|
+
ggml_compute_forward_tanh(params, dst);
|
14242
14668
|
} break;
|
14243
14669
|
case GGML_UNARY_OP_ELU:
|
14244
14670
|
{
|
14245
|
-
ggml_compute_forward_elu(params,
|
14671
|
+
ggml_compute_forward_elu(params, dst);
|
14246
14672
|
} break;
|
14247
14673
|
case GGML_UNARY_OP_RELU:
|
14248
14674
|
{
|
14249
|
-
ggml_compute_forward_relu(params,
|
14675
|
+
ggml_compute_forward_relu(params, dst);
|
14250
14676
|
} break;
|
14251
14677
|
case GGML_UNARY_OP_GELU:
|
14252
14678
|
{
|
14253
|
-
ggml_compute_forward_gelu(params,
|
14679
|
+
ggml_compute_forward_gelu(params, dst);
|
14254
14680
|
} break;
|
14255
14681
|
case GGML_UNARY_OP_GELU_QUICK:
|
14256
14682
|
{
|
14257
|
-
ggml_compute_forward_gelu_quick(params,
|
14683
|
+
ggml_compute_forward_gelu_quick(params, dst);
|
14258
14684
|
} break;
|
14259
14685
|
case GGML_UNARY_OP_SILU:
|
14260
14686
|
{
|
14261
|
-
ggml_compute_forward_silu(params,
|
14687
|
+
ggml_compute_forward_silu(params, dst);
|
14262
14688
|
} break;
|
14263
14689
|
case GGML_UNARY_OP_HARDSWISH:
|
14264
14690
|
{
|
14265
|
-
ggml_compute_forward_hardswish(params,
|
14691
|
+
ggml_compute_forward_hardswish(params, dst);
|
14266
14692
|
} break;
|
14267
14693
|
case GGML_UNARY_OP_HARDSIGMOID:
|
14268
14694
|
{
|
14269
|
-
ggml_compute_forward_hardsigmoid(params,
|
14695
|
+
ggml_compute_forward_hardsigmoid(params, dst);
|
14270
14696
|
} break;
|
14271
14697
|
default:
|
14272
14698
|
{
|
@@ -14279,8 +14705,10 @@ static void ggml_compute_forward_unary(
|
|
14279
14705
|
|
14280
14706
|
static void ggml_compute_forward_get_rel_pos_f16(
|
14281
14707
|
const struct ggml_compute_params * params,
|
14282
|
-
const struct ggml_tensor * src0,
|
14283
14708
|
struct ggml_tensor * dst) {
|
14709
|
+
|
14710
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14711
|
+
|
14284
14712
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14285
14713
|
return;
|
14286
14714
|
}
|
@@ -14306,12 +14734,14 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
14306
14734
|
|
14307
14735
|
static void ggml_compute_forward_get_rel_pos(
|
14308
14736
|
const struct ggml_compute_params * params,
|
14309
|
-
const struct ggml_tensor * src0,
|
14310
14737
|
struct ggml_tensor * dst) {
|
14738
|
+
|
14739
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14740
|
+
|
14311
14741
|
switch (src0->type) {
|
14312
14742
|
case GGML_TYPE_F16:
|
14313
14743
|
{
|
14314
|
-
ggml_compute_forward_get_rel_pos_f16(params,
|
14744
|
+
ggml_compute_forward_get_rel_pos_f16(params, dst);
|
14315
14745
|
} break;
|
14316
14746
|
default:
|
14317
14747
|
{
|
@@ -14324,11 +14754,12 @@ static void ggml_compute_forward_get_rel_pos(
|
|
14324
14754
|
|
14325
14755
|
static void ggml_compute_forward_add_rel_pos_f32(
|
14326
14756
|
const struct ggml_compute_params * params,
|
14327
|
-
const struct ggml_tensor * src0,
|
14328
|
-
const struct ggml_tensor * src1,
|
14329
|
-
const struct ggml_tensor * src2,
|
14330
14757
|
struct ggml_tensor * dst) {
|
14331
14758
|
|
14759
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14760
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
14761
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
14762
|
+
|
14332
14763
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
14333
14764
|
if (!inplace && params->type == GGML_TASK_INIT) {
|
14334
14765
|
if (params->ith != 0) {
|
@@ -14392,14 +14823,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
14392
14823
|
|
14393
14824
|
static void ggml_compute_forward_add_rel_pos(
|
14394
14825
|
const struct ggml_compute_params * params,
|
14395
|
-
const struct ggml_tensor * src0,
|
14396
|
-
const struct ggml_tensor * src1,
|
14397
|
-
const struct ggml_tensor * src2,
|
14398
14826
|
struct ggml_tensor * dst) {
|
14827
|
+
|
14828
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14829
|
+
|
14399
14830
|
switch (src0->type) {
|
14400
14831
|
case GGML_TYPE_F32:
|
14401
14832
|
{
|
14402
|
-
ggml_compute_forward_add_rel_pos_f32(params,
|
14833
|
+
ggml_compute_forward_add_rel_pos_f32(params, dst);
|
14403
14834
|
} break;
|
14404
14835
|
default:
|
14405
14836
|
{
|
@@ -14412,9 +14843,11 @@ static void ggml_compute_forward_add_rel_pos(
|
|
14412
14843
|
|
14413
14844
|
static void ggml_compute_forward_map_unary_f32(
|
14414
14845
|
const struct ggml_compute_params * params,
|
14415
|
-
const struct ggml_tensor * src0,
|
14416
14846
|
struct ggml_tensor * dst,
|
14417
14847
|
const ggml_unary_op_f32_t fun) {
|
14848
|
+
|
14849
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14850
|
+
|
14418
14851
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
14419
14852
|
|
14420
14853
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14436,13 +14869,15 @@ static void ggml_compute_forward_map_unary_f32(
|
|
14436
14869
|
|
14437
14870
|
static void ggml_compute_forward_map_unary(
|
14438
14871
|
const struct ggml_compute_params * params,
|
14439
|
-
const struct ggml_tensor * src0,
|
14440
14872
|
struct ggml_tensor * dst,
|
14441
14873
|
const ggml_unary_op_f32_t fun) {
|
14874
|
+
|
14875
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14876
|
+
|
14442
14877
|
switch (src0->type) {
|
14443
14878
|
case GGML_TYPE_F32:
|
14444
14879
|
{
|
14445
|
-
ggml_compute_forward_map_unary_f32(params,
|
14880
|
+
ggml_compute_forward_map_unary_f32(params, dst, fun);
|
14446
14881
|
} break;
|
14447
14882
|
default:
|
14448
14883
|
{
|
@@ -14455,10 +14890,12 @@ static void ggml_compute_forward_map_unary(
|
|
14455
14890
|
|
14456
14891
|
static void ggml_compute_forward_map_binary_f32(
|
14457
14892
|
const struct ggml_compute_params * params,
|
14458
|
-
const struct ggml_tensor * src0,
|
14459
|
-
const struct ggml_tensor * src1,
|
14460
14893
|
struct ggml_tensor * dst,
|
14461
14894
|
const ggml_binary_op_f32_t fun) {
|
14895
|
+
|
14896
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14897
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
14898
|
+
|
14462
14899
|
assert(params->ith == 0);
|
14463
14900
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
14464
14901
|
|
@@ -14483,14 +14920,15 @@ static void ggml_compute_forward_map_binary_f32(
|
|
14483
14920
|
|
14484
14921
|
static void ggml_compute_forward_map_binary(
|
14485
14922
|
const struct ggml_compute_params * params,
|
14486
|
-
const struct ggml_tensor * src0,
|
14487
|
-
const struct ggml_tensor * src1,
|
14488
14923
|
struct ggml_tensor * dst,
|
14489
14924
|
const ggml_binary_op_f32_t fun) {
|
14925
|
+
|
14926
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14927
|
+
|
14490
14928
|
switch (src0->type) {
|
14491
14929
|
case GGML_TYPE_F32:
|
14492
14930
|
{
|
14493
|
-
ggml_compute_forward_map_binary_f32(params,
|
14931
|
+
ggml_compute_forward_map_binary_f32(params, dst, fun);
|
14494
14932
|
} break;
|
14495
14933
|
default:
|
14496
14934
|
{
|
@@ -14503,9 +14941,11 @@ static void ggml_compute_forward_map_binary(
|
|
14503
14941
|
|
14504
14942
|
static void ggml_compute_forward_map_custom1_f32(
|
14505
14943
|
const struct ggml_compute_params * params,
|
14506
|
-
const struct ggml_tensor * a,
|
14507
14944
|
struct ggml_tensor * dst,
|
14508
14945
|
const ggml_custom1_op_f32_t fun) {
|
14946
|
+
|
14947
|
+
const struct ggml_tensor * a = dst->src[0];
|
14948
|
+
|
14509
14949
|
assert(params->ith == 0);
|
14510
14950
|
|
14511
14951
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14519,10 +14959,12 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
14519
14959
|
|
14520
14960
|
static void ggml_compute_forward_map_custom2_f32(
|
14521
14961
|
const struct ggml_compute_params * params,
|
14522
|
-
const struct ggml_tensor * a,
|
14523
|
-
const struct ggml_tensor * b,
|
14524
14962
|
struct ggml_tensor * dst,
|
14525
14963
|
const ggml_custom2_op_f32_t fun) {
|
14964
|
+
|
14965
|
+
const struct ggml_tensor * a = dst->src[0];
|
14966
|
+
const struct ggml_tensor * b = dst->src[1];
|
14967
|
+
|
14526
14968
|
assert(params->ith == 0);
|
14527
14969
|
|
14528
14970
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14536,11 +14978,13 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
14536
14978
|
|
14537
14979
|
static void ggml_compute_forward_map_custom3_f32(
|
14538
14980
|
const struct ggml_compute_params * params,
|
14539
|
-
const struct ggml_tensor * a,
|
14540
|
-
const struct ggml_tensor * b,
|
14541
|
-
const struct ggml_tensor * c,
|
14542
14981
|
struct ggml_tensor * dst,
|
14543
14982
|
const ggml_custom3_op_f32_t fun) {
|
14983
|
+
|
14984
|
+
const struct ggml_tensor * a = dst->src[0];
|
14985
|
+
const struct ggml_tensor * b = dst->src[1];
|
14986
|
+
const struct ggml_tensor * c = dst->src[1];
|
14987
|
+
|
14544
14988
|
assert(params->ith == 0);
|
14545
14989
|
|
14546
14990
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14554,8 +14998,10 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
14554
14998
|
|
14555
14999
|
static void ggml_compute_forward_map_custom1(
|
14556
15000
|
const struct ggml_compute_params * params,
|
14557
|
-
const struct ggml_tensor * a,
|
14558
15001
|
struct ggml_tensor * dst) {
|
15002
|
+
|
15003
|
+
const struct ggml_tensor * a = dst->src[0];
|
15004
|
+
|
14559
15005
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14560
15006
|
return;
|
14561
15007
|
}
|
@@ -14569,9 +15015,11 @@ static void ggml_compute_forward_map_custom1(
|
|
14569
15015
|
|
14570
15016
|
static void ggml_compute_forward_map_custom2(
|
14571
15017
|
const struct ggml_compute_params * params,
|
14572
|
-
const struct ggml_tensor * a,
|
14573
|
-
const struct ggml_tensor * b,
|
14574
15018
|
struct ggml_tensor * dst) {
|
15019
|
+
|
15020
|
+
const struct ggml_tensor * a = dst->src[0];
|
15021
|
+
const struct ggml_tensor * b = dst->src[1];
|
15022
|
+
|
14575
15023
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14576
15024
|
return;
|
14577
15025
|
}
|
@@ -14585,10 +15033,12 @@ static void ggml_compute_forward_map_custom2(
|
|
14585
15033
|
|
14586
15034
|
static void ggml_compute_forward_map_custom3(
|
14587
15035
|
const struct ggml_compute_params * params,
|
14588
|
-
const struct ggml_tensor * a,
|
14589
|
-
const struct ggml_tensor * b,
|
14590
|
-
const struct ggml_tensor * c,
|
14591
15036
|
struct ggml_tensor * dst) {
|
15037
|
+
|
15038
|
+
const struct ggml_tensor * a = dst->src[0];
|
15039
|
+
const struct ggml_tensor * b = dst->src[1];
|
15040
|
+
const struct ggml_tensor * c = dst->src[2];
|
15041
|
+
|
14592
15042
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14593
15043
|
return;
|
14594
15044
|
}
|
@@ -14602,9 +15052,11 @@ static void ggml_compute_forward_map_custom3(
|
|
14602
15052
|
|
14603
15053
|
static void ggml_compute_forward_cross_entropy_loss_f32(
|
14604
15054
|
const struct ggml_compute_params * params,
|
14605
|
-
const struct ggml_tensor * src0,
|
14606
|
-
const struct ggml_tensor * src1,
|
14607
15055
|
struct ggml_tensor * dst) {
|
15056
|
+
|
15057
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15058
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
15059
|
+
|
14608
15060
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
14609
15061
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
14610
15062
|
GGML_ASSERT(ggml_is_scalar(dst));
|
@@ -14708,13 +15160,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
14708
15160
|
|
14709
15161
|
static void ggml_compute_forward_cross_entropy_loss(
|
14710
15162
|
const struct ggml_compute_params * params,
|
14711
|
-
const struct ggml_tensor * src0,
|
14712
|
-
const struct ggml_tensor * src1,
|
14713
15163
|
struct ggml_tensor * dst) {
|
15164
|
+
|
15165
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15166
|
+
|
14714
15167
|
switch (src0->type) {
|
14715
15168
|
case GGML_TYPE_F32:
|
14716
15169
|
{
|
14717
|
-
ggml_compute_forward_cross_entropy_loss_f32(params,
|
15170
|
+
ggml_compute_forward_cross_entropy_loss_f32(params, dst);
|
14718
15171
|
} break;
|
14719
15172
|
default:
|
14720
15173
|
{
|
@@ -14727,10 +15180,12 @@ static void ggml_compute_forward_cross_entropy_loss(
|
|
14727
15180
|
|
14728
15181
|
static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
14729
15182
|
const struct ggml_compute_params * params,
|
14730
|
-
const struct ggml_tensor * src0,
|
14731
|
-
const struct ggml_tensor * src1,
|
14732
|
-
const struct ggml_tensor * opt0,
|
14733
15183
|
struct ggml_tensor * dst) {
|
15184
|
+
|
15185
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15186
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
15187
|
+
const struct ggml_tensor * opt0 = dst->src[2];
|
15188
|
+
|
14734
15189
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
14735
15190
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
14736
15191
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
@@ -14817,14 +15272,14 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
14817
15272
|
|
14818
15273
|
static void ggml_compute_forward_cross_entropy_loss_back(
|
14819
15274
|
const struct ggml_compute_params * params,
|
14820
|
-
const struct ggml_tensor * src0,
|
14821
|
-
const struct ggml_tensor * src1,
|
14822
|
-
const struct ggml_tensor * opt0,
|
14823
15275
|
struct ggml_tensor * dst) {
|
15276
|
+
|
15277
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15278
|
+
|
14824
15279
|
switch (src0->type) {
|
14825
15280
|
case GGML_TYPE_F32:
|
14826
15281
|
{
|
14827
|
-
ggml_compute_forward_cross_entropy_loss_back_f32(params,
|
15282
|
+
ggml_compute_forward_cross_entropy_loss_back_f32(params, dst);
|
14828
15283
|
} break;
|
14829
15284
|
default:
|
14830
15285
|
{
|
@@ -14872,312 +15327,312 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14872
15327
|
switch (tensor->op) {
|
14873
15328
|
case GGML_OP_DUP:
|
14874
15329
|
{
|
14875
|
-
ggml_compute_forward_dup(params, tensor
|
15330
|
+
ggml_compute_forward_dup(params, tensor);
|
14876
15331
|
} break;
|
14877
15332
|
case GGML_OP_ADD:
|
14878
15333
|
{
|
14879
|
-
ggml_compute_forward_add(params, tensor
|
15334
|
+
ggml_compute_forward_add(params, tensor);
|
14880
15335
|
} break;
|
14881
15336
|
case GGML_OP_ADD1:
|
14882
15337
|
{
|
14883
|
-
ggml_compute_forward_add1(params, tensor
|
15338
|
+
ggml_compute_forward_add1(params, tensor);
|
14884
15339
|
} break;
|
14885
15340
|
case GGML_OP_ACC:
|
14886
15341
|
{
|
14887
|
-
ggml_compute_forward_acc(params, tensor
|
15342
|
+
ggml_compute_forward_acc(params, tensor);
|
14888
15343
|
} break;
|
14889
15344
|
case GGML_OP_SUB:
|
14890
15345
|
{
|
14891
|
-
ggml_compute_forward_sub(params, tensor
|
15346
|
+
ggml_compute_forward_sub(params, tensor);
|
14892
15347
|
} break;
|
14893
15348
|
case GGML_OP_MUL:
|
14894
15349
|
{
|
14895
|
-
ggml_compute_forward_mul(params, tensor
|
15350
|
+
ggml_compute_forward_mul(params, tensor);
|
14896
15351
|
} break;
|
14897
15352
|
case GGML_OP_DIV:
|
14898
15353
|
{
|
14899
|
-
ggml_compute_forward_div(params, tensor
|
15354
|
+
ggml_compute_forward_div(params, tensor);
|
14900
15355
|
} break;
|
14901
15356
|
case GGML_OP_SQR:
|
14902
15357
|
{
|
14903
|
-
ggml_compute_forward_sqr(params, tensor
|
15358
|
+
ggml_compute_forward_sqr(params, tensor);
|
14904
15359
|
} break;
|
14905
15360
|
case GGML_OP_SQRT:
|
14906
15361
|
{
|
14907
|
-
ggml_compute_forward_sqrt(params, tensor
|
15362
|
+
ggml_compute_forward_sqrt(params, tensor);
|
14908
15363
|
} break;
|
14909
15364
|
case GGML_OP_LOG:
|
14910
15365
|
{
|
14911
|
-
ggml_compute_forward_log(params, tensor
|
15366
|
+
ggml_compute_forward_log(params, tensor);
|
14912
15367
|
} break;
|
14913
15368
|
case GGML_OP_SUM:
|
14914
15369
|
{
|
14915
|
-
ggml_compute_forward_sum(params, tensor
|
15370
|
+
ggml_compute_forward_sum(params, tensor);
|
14916
15371
|
} break;
|
14917
15372
|
case GGML_OP_SUM_ROWS:
|
14918
15373
|
{
|
14919
|
-
ggml_compute_forward_sum_rows(params, tensor
|
15374
|
+
ggml_compute_forward_sum_rows(params, tensor);
|
14920
15375
|
} break;
|
14921
15376
|
case GGML_OP_MEAN:
|
14922
15377
|
{
|
14923
|
-
ggml_compute_forward_mean(params, tensor
|
15378
|
+
ggml_compute_forward_mean(params, tensor);
|
14924
15379
|
} break;
|
14925
15380
|
case GGML_OP_ARGMAX:
|
14926
15381
|
{
|
14927
|
-
ggml_compute_forward_argmax(params, tensor
|
15382
|
+
ggml_compute_forward_argmax(params, tensor);
|
14928
15383
|
} break;
|
14929
15384
|
case GGML_OP_REPEAT:
|
14930
15385
|
{
|
14931
|
-
ggml_compute_forward_repeat(params, tensor
|
15386
|
+
ggml_compute_forward_repeat(params, tensor);
|
14932
15387
|
} break;
|
14933
15388
|
case GGML_OP_REPEAT_BACK:
|
14934
15389
|
{
|
14935
|
-
ggml_compute_forward_repeat_back(params, tensor
|
15390
|
+
ggml_compute_forward_repeat_back(params, tensor);
|
14936
15391
|
} break;
|
14937
15392
|
case GGML_OP_CONCAT:
|
14938
15393
|
{
|
14939
|
-
ggml_compute_forward_concat(params, tensor
|
15394
|
+
ggml_compute_forward_concat(params, tensor);
|
14940
15395
|
} break;
|
14941
15396
|
case GGML_OP_SILU_BACK:
|
14942
15397
|
{
|
14943
|
-
ggml_compute_forward_silu_back(params, tensor
|
15398
|
+
ggml_compute_forward_silu_back(params, tensor);
|
14944
15399
|
} break;
|
14945
15400
|
case GGML_OP_NORM:
|
14946
15401
|
{
|
14947
|
-
ggml_compute_forward_norm(params, tensor
|
15402
|
+
ggml_compute_forward_norm(params, tensor);
|
14948
15403
|
} break;
|
14949
15404
|
case GGML_OP_RMS_NORM:
|
14950
15405
|
{
|
14951
|
-
ggml_compute_forward_rms_norm(params, tensor
|
15406
|
+
ggml_compute_forward_rms_norm(params, tensor);
|
14952
15407
|
} break;
|
14953
15408
|
case GGML_OP_RMS_NORM_BACK:
|
14954
15409
|
{
|
14955
|
-
ggml_compute_forward_rms_norm_back(params, tensor
|
15410
|
+
ggml_compute_forward_rms_norm_back(params, tensor);
|
14956
15411
|
} break;
|
14957
15412
|
case GGML_OP_GROUP_NORM:
|
14958
15413
|
{
|
14959
|
-
ggml_compute_forward_group_norm(params, tensor
|
15414
|
+
ggml_compute_forward_group_norm(params, tensor);
|
14960
15415
|
} break;
|
14961
15416
|
case GGML_OP_MUL_MAT:
|
14962
15417
|
{
|
14963
|
-
ggml_compute_forward_mul_mat(params, tensor
|
15418
|
+
ggml_compute_forward_mul_mat(params, tensor);
|
14964
15419
|
} break;
|
14965
15420
|
case GGML_OP_MUL_MAT_ID:
|
14966
15421
|
{
|
14967
|
-
ggml_compute_forward_mul_mat_id(params, tensor
|
15422
|
+
ggml_compute_forward_mul_mat_id(params, tensor);
|
14968
15423
|
} break;
|
14969
15424
|
case GGML_OP_OUT_PROD:
|
14970
15425
|
{
|
14971
|
-
ggml_compute_forward_out_prod(params, tensor
|
15426
|
+
ggml_compute_forward_out_prod(params, tensor);
|
14972
15427
|
} break;
|
14973
15428
|
case GGML_OP_SCALE:
|
14974
15429
|
{
|
14975
|
-
ggml_compute_forward_scale(params, tensor
|
15430
|
+
ggml_compute_forward_scale(params, tensor);
|
14976
15431
|
} break;
|
14977
15432
|
case GGML_OP_SET:
|
14978
15433
|
{
|
14979
|
-
ggml_compute_forward_set(params, tensor
|
15434
|
+
ggml_compute_forward_set(params, tensor);
|
14980
15435
|
} break;
|
14981
15436
|
case GGML_OP_CPY:
|
14982
15437
|
{
|
14983
|
-
ggml_compute_forward_cpy(params, tensor
|
15438
|
+
ggml_compute_forward_cpy(params, tensor);
|
14984
15439
|
} break;
|
14985
15440
|
case GGML_OP_CONT:
|
14986
15441
|
{
|
14987
|
-
ggml_compute_forward_cont(params, tensor
|
15442
|
+
ggml_compute_forward_cont(params, tensor);
|
14988
15443
|
} break;
|
14989
15444
|
case GGML_OP_RESHAPE:
|
14990
15445
|
{
|
14991
|
-
ggml_compute_forward_reshape(params, tensor
|
15446
|
+
ggml_compute_forward_reshape(params, tensor);
|
14992
15447
|
} break;
|
14993
15448
|
case GGML_OP_VIEW:
|
14994
15449
|
{
|
14995
|
-
ggml_compute_forward_view(params, tensor
|
15450
|
+
ggml_compute_forward_view(params, tensor);
|
14996
15451
|
} break;
|
14997
15452
|
case GGML_OP_PERMUTE:
|
14998
15453
|
{
|
14999
|
-
ggml_compute_forward_permute(params, tensor
|
15454
|
+
ggml_compute_forward_permute(params, tensor);
|
15000
15455
|
} break;
|
15001
15456
|
case GGML_OP_TRANSPOSE:
|
15002
15457
|
{
|
15003
|
-
ggml_compute_forward_transpose(params, tensor
|
15458
|
+
ggml_compute_forward_transpose(params, tensor);
|
15004
15459
|
} break;
|
15005
15460
|
case GGML_OP_GET_ROWS:
|
15006
15461
|
{
|
15007
|
-
ggml_compute_forward_get_rows(params, tensor
|
15462
|
+
ggml_compute_forward_get_rows(params, tensor);
|
15008
15463
|
} break;
|
15009
15464
|
case GGML_OP_GET_ROWS_BACK:
|
15010
15465
|
{
|
15011
|
-
ggml_compute_forward_get_rows_back(params, tensor
|
15466
|
+
ggml_compute_forward_get_rows_back(params, tensor);
|
15012
15467
|
} break;
|
15013
15468
|
case GGML_OP_DIAG:
|
15014
15469
|
{
|
15015
|
-
ggml_compute_forward_diag(params, tensor
|
15470
|
+
ggml_compute_forward_diag(params, tensor);
|
15016
15471
|
} break;
|
15017
15472
|
case GGML_OP_DIAG_MASK_INF:
|
15018
15473
|
{
|
15019
|
-
ggml_compute_forward_diag_mask_inf(params, tensor
|
15474
|
+
ggml_compute_forward_diag_mask_inf(params, tensor);
|
15020
15475
|
} break;
|
15021
15476
|
case GGML_OP_DIAG_MASK_ZERO:
|
15022
15477
|
{
|
15023
|
-
ggml_compute_forward_diag_mask_zero(params, tensor
|
15478
|
+
ggml_compute_forward_diag_mask_zero(params, tensor);
|
15024
15479
|
} break;
|
15025
15480
|
case GGML_OP_SOFT_MAX:
|
15026
15481
|
{
|
15027
|
-
ggml_compute_forward_soft_max(params, tensor
|
15482
|
+
ggml_compute_forward_soft_max(params, tensor);
|
15028
15483
|
} break;
|
15029
15484
|
case GGML_OP_SOFT_MAX_BACK:
|
15030
15485
|
{
|
15031
|
-
ggml_compute_forward_soft_max_back(params, tensor
|
15486
|
+
ggml_compute_forward_soft_max_back(params, tensor);
|
15032
15487
|
} break;
|
15033
15488
|
case GGML_OP_ROPE:
|
15034
15489
|
{
|
15035
|
-
ggml_compute_forward_rope(params, tensor
|
15490
|
+
ggml_compute_forward_rope(params, tensor);
|
15036
15491
|
} break;
|
15037
15492
|
case GGML_OP_ROPE_BACK:
|
15038
15493
|
{
|
15039
|
-
ggml_compute_forward_rope_back(params, tensor
|
15494
|
+
ggml_compute_forward_rope_back(params, tensor);
|
15040
15495
|
} break;
|
15041
15496
|
case GGML_OP_ALIBI:
|
15042
15497
|
{
|
15043
|
-
ggml_compute_forward_alibi(params, tensor
|
15498
|
+
ggml_compute_forward_alibi(params, tensor);
|
15044
15499
|
} break;
|
15045
15500
|
case GGML_OP_CLAMP:
|
15046
15501
|
{
|
15047
|
-
ggml_compute_forward_clamp(params, tensor
|
15502
|
+
ggml_compute_forward_clamp(params, tensor);
|
15048
15503
|
} break;
|
15049
15504
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
15050
15505
|
{
|
15051
|
-
ggml_compute_forward_conv_transpose_1d(params, tensor
|
15506
|
+
ggml_compute_forward_conv_transpose_1d(params, tensor);
|
15052
15507
|
} break;
|
15053
15508
|
case GGML_OP_IM2COL:
|
15054
15509
|
{
|
15055
|
-
ggml_compute_forward_im2col(params, tensor
|
15510
|
+
ggml_compute_forward_im2col(params, tensor);
|
15056
15511
|
} break;
|
15057
15512
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
15058
15513
|
{
|
15059
|
-
ggml_compute_forward_conv_transpose_2d(params, tensor
|
15514
|
+
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
15060
15515
|
} break;
|
15061
15516
|
case GGML_OP_POOL_1D:
|
15062
15517
|
{
|
15063
|
-
ggml_compute_forward_pool_1d(params, tensor
|
15518
|
+
ggml_compute_forward_pool_1d(params, tensor);
|
15064
15519
|
} break;
|
15065
15520
|
case GGML_OP_POOL_2D:
|
15066
15521
|
{
|
15067
|
-
ggml_compute_forward_pool_2d(params, tensor
|
15522
|
+
ggml_compute_forward_pool_2d(params, tensor);
|
15068
15523
|
} break;
|
15069
15524
|
case GGML_OP_UPSCALE:
|
15070
15525
|
{
|
15071
|
-
ggml_compute_forward_upscale(params, tensor
|
15526
|
+
ggml_compute_forward_upscale(params, tensor);
|
15072
15527
|
} break;
|
15073
15528
|
case GGML_OP_PAD:
|
15074
15529
|
{
|
15075
|
-
ggml_compute_forward_pad(params, tensor
|
15530
|
+
ggml_compute_forward_pad(params, tensor);
|
15076
15531
|
} break;
|
15077
15532
|
case GGML_OP_ARGSORT:
|
15078
15533
|
{
|
15079
|
-
ggml_compute_forward_argsort(params, tensor
|
15534
|
+
ggml_compute_forward_argsort(params, tensor);
|
15080
15535
|
} break;
|
15081
15536
|
case GGML_OP_LEAKY_RELU:
|
15082
15537
|
{
|
15083
|
-
ggml_compute_forward_leaky_relu(params, tensor
|
15538
|
+
ggml_compute_forward_leaky_relu(params, tensor);
|
15084
15539
|
} break;
|
15085
15540
|
case GGML_OP_FLASH_ATTN:
|
15086
15541
|
{
|
15087
15542
|
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15088
15543
|
GGML_ASSERT(t == 0 || t == 1);
|
15089
15544
|
const bool masked = t != 0;
|
15090
|
-
ggml_compute_forward_flash_attn(params,
|
15545
|
+
ggml_compute_forward_flash_attn(params, masked, tensor);
|
15091
15546
|
} break;
|
15092
15547
|
case GGML_OP_FLASH_FF:
|
15093
15548
|
{
|
15094
|
-
ggml_compute_forward_flash_ff(params, tensor
|
15549
|
+
ggml_compute_forward_flash_ff(params, tensor);
|
15095
15550
|
} break;
|
15096
15551
|
case GGML_OP_FLASH_ATTN_BACK:
|
15097
15552
|
{
|
15098
15553
|
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15099
15554
|
GGML_ASSERT(t == 0 || t == 1);
|
15100
15555
|
bool masked = t != 0;
|
15101
|
-
ggml_compute_forward_flash_attn_back(params,
|
15556
|
+
ggml_compute_forward_flash_attn_back(params, masked, tensor);
|
15102
15557
|
} break;
|
15103
15558
|
case GGML_OP_WIN_PART:
|
15104
15559
|
{
|
15105
|
-
ggml_compute_forward_win_part(params, tensor
|
15560
|
+
ggml_compute_forward_win_part(params, tensor);
|
15106
15561
|
} break;
|
15107
15562
|
case GGML_OP_WIN_UNPART:
|
15108
15563
|
{
|
15109
|
-
ggml_compute_forward_win_unpart(params, tensor
|
15564
|
+
ggml_compute_forward_win_unpart(params, tensor);
|
15110
15565
|
} break;
|
15111
15566
|
case GGML_OP_UNARY:
|
15112
15567
|
{
|
15113
|
-
ggml_compute_forward_unary(params, tensor
|
15568
|
+
ggml_compute_forward_unary(params, tensor);
|
15114
15569
|
} break;
|
15115
15570
|
case GGML_OP_GET_REL_POS:
|
15116
15571
|
{
|
15117
|
-
ggml_compute_forward_get_rel_pos(params, tensor
|
15572
|
+
ggml_compute_forward_get_rel_pos(params, tensor);
|
15118
15573
|
} break;
|
15119
15574
|
case GGML_OP_ADD_REL_POS:
|
15120
15575
|
{
|
15121
|
-
ggml_compute_forward_add_rel_pos(params, tensor
|
15576
|
+
ggml_compute_forward_add_rel_pos(params, tensor);
|
15122
15577
|
} break;
|
15123
15578
|
case GGML_OP_MAP_UNARY:
|
15124
15579
|
{
|
15125
15580
|
ggml_unary_op_f32_t fun;
|
15126
15581
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15127
|
-
ggml_compute_forward_map_unary(params, tensor
|
15582
|
+
ggml_compute_forward_map_unary(params, tensor, fun);
|
15128
15583
|
}
|
15129
15584
|
break;
|
15130
15585
|
case GGML_OP_MAP_BINARY:
|
15131
15586
|
{
|
15132
15587
|
ggml_binary_op_f32_t fun;
|
15133
15588
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15134
|
-
ggml_compute_forward_map_binary(params, tensor
|
15589
|
+
ggml_compute_forward_map_binary(params, tensor, fun);
|
15135
15590
|
}
|
15136
15591
|
break;
|
15137
15592
|
case GGML_OP_MAP_CUSTOM1_F32:
|
15138
15593
|
{
|
15139
15594
|
ggml_custom1_op_f32_t fun;
|
15140
15595
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15141
|
-
ggml_compute_forward_map_custom1_f32(params, tensor
|
15596
|
+
ggml_compute_forward_map_custom1_f32(params, tensor, fun);
|
15142
15597
|
}
|
15143
15598
|
break;
|
15144
15599
|
case GGML_OP_MAP_CUSTOM2_F32:
|
15145
15600
|
{
|
15146
15601
|
ggml_custom2_op_f32_t fun;
|
15147
15602
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15148
|
-
ggml_compute_forward_map_custom2_f32(params, tensor
|
15603
|
+
ggml_compute_forward_map_custom2_f32(params, tensor, fun);
|
15149
15604
|
}
|
15150
15605
|
break;
|
15151
15606
|
case GGML_OP_MAP_CUSTOM3_F32:
|
15152
15607
|
{
|
15153
15608
|
ggml_custom3_op_f32_t fun;
|
15154
15609
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15155
|
-
ggml_compute_forward_map_custom3_f32(params, tensor
|
15610
|
+
ggml_compute_forward_map_custom3_f32(params, tensor, fun);
|
15156
15611
|
}
|
15157
15612
|
break;
|
15158
15613
|
case GGML_OP_MAP_CUSTOM1:
|
15159
15614
|
{
|
15160
|
-
ggml_compute_forward_map_custom1(params, tensor
|
15615
|
+
ggml_compute_forward_map_custom1(params, tensor);
|
15161
15616
|
}
|
15162
15617
|
break;
|
15163
15618
|
case GGML_OP_MAP_CUSTOM2:
|
15164
15619
|
{
|
15165
|
-
ggml_compute_forward_map_custom2(params, tensor
|
15620
|
+
ggml_compute_forward_map_custom2(params, tensor);
|
15166
15621
|
}
|
15167
15622
|
break;
|
15168
15623
|
case GGML_OP_MAP_CUSTOM3:
|
15169
15624
|
{
|
15170
|
-
ggml_compute_forward_map_custom3(params, tensor
|
15625
|
+
ggml_compute_forward_map_custom3(params, tensor);
|
15171
15626
|
}
|
15172
15627
|
break;
|
15173
15628
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
15174
15629
|
{
|
15175
|
-
ggml_compute_forward_cross_entropy_loss(params, tensor
|
15630
|
+
ggml_compute_forward_cross_entropy_loss(params, tensor);
|
15176
15631
|
}
|
15177
15632
|
break;
|
15178
15633
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
15179
15634
|
{
|
15180
|
-
ggml_compute_forward_cross_entropy_loss_back(params, tensor
|
15635
|
+
ggml_compute_forward_cross_entropy_loss_back(params, tensor);
|
15181
15636
|
}
|
15182
15637
|
break;
|
15183
15638
|
case GGML_OP_NONE:
|
@@ -15311,7 +15766,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
15311
15766
|
return NULL;
|
15312
15767
|
}
|
15313
15768
|
|
15314
|
-
if (node->
|
15769
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
15315
15770
|
return node;
|
15316
15771
|
}
|
15317
15772
|
|
@@ -15345,7 +15800,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
15345
15800
|
|
15346
15801
|
clone->op = node->op;
|
15347
15802
|
clone->grad = node->grad;
|
15348
|
-
clone->
|
15803
|
+
clone->flags = node->flags;
|
15349
15804
|
clone->extra = node->extra;
|
15350
15805
|
for (int k = 0; k < GGML_MAX_DIMS; ++k) {
|
15351
15806
|
clone->nb[k] = node->nb[k];
|
@@ -16377,7 +16832,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
16377
16832
|
for (int i = 0; i < gf->n_nodes; i++) {
|
16378
16833
|
struct ggml_tensor * node = gf->nodes[i];
|
16379
16834
|
|
16380
|
-
if (node->
|
16835
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
16381
16836
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16382
16837
|
ggml_build_forward_expand(gb, node->grad);
|
16383
16838
|
}
|
@@ -16581,27 +17036,47 @@ typedef pthread_t ggml_thread_t;
|
|
16581
17036
|
#endif
|
16582
17037
|
|
16583
17038
|
// Android's libc implementation "bionic" does not support setting affinity
|
16584
|
-
#if defined(
|
16585
|
-
static void set_numa_thread_affinity(int thread_n
|
17039
|
+
#if defined(__gnu_linux__)
|
17040
|
+
static void set_numa_thread_affinity(int thread_n) {
|
16586
17041
|
if (!ggml_is_numa()) {
|
16587
17042
|
return;
|
16588
17043
|
}
|
16589
17044
|
|
16590
|
-
|
16591
|
-
|
16592
|
-
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
17045
|
+
int node_num;
|
17046
|
+
int rv;
|
16593
17047
|
size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);
|
16594
17048
|
|
17049
|
+
switch(g_state.numa.numa_strategy) {
|
17050
|
+
case GGML_NUMA_STRATEGY_DISTRIBUTE:
|
17051
|
+
// run thread on node_num thread_n / (threads per node)
|
17052
|
+
node_num = thread_n % g_state.numa.n_nodes;
|
17053
|
+
break;
|
17054
|
+
case GGML_NUMA_STRATEGY_ISOLATE:
|
17055
|
+
// run thread on current_node
|
17056
|
+
node_num = g_state.numa.current_node;
|
17057
|
+
break;
|
17058
|
+
case GGML_NUMA_STRATEGY_NUMACTL:
|
17059
|
+
// use the cpuset that numactl gave us
|
17060
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset);
|
17061
|
+
if (rv) {
|
17062
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
|
17063
|
+
}
|
17064
|
+
return;
|
17065
|
+
default:
|
17066
|
+
return;
|
17067
|
+
}
|
17068
|
+
|
17069
|
+
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
17070
|
+
|
16595
17071
|
cpu_set_t * cpus = CPU_ALLOC(g_state.numa.total_cpus);
|
16596
17072
|
CPU_ZERO_S(setsize, cpus);
|
16597
17073
|
for (size_t i = 0; i < node->n_cpus; ++i) {
|
16598
17074
|
CPU_SET_S(node->cpus[i], setsize, cpus);
|
16599
17075
|
}
|
16600
17076
|
|
16601
|
-
|
17077
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
16602
17078
|
if (rv) {
|
16603
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
16604
|
-
strerror(rv));
|
17079
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
16605
17080
|
}
|
16606
17081
|
|
16607
17082
|
CPU_FREE(cpus);
|
@@ -16622,8 +17097,7 @@ static void clear_numa_thread_affinity(void) {
|
|
16622
17097
|
|
16623
17098
|
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
16624
17099
|
if (rv) {
|
16625
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
16626
|
-
strerror(rv));
|
17100
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
16627
17101
|
}
|
16628
17102
|
|
16629
17103
|
CPU_FREE(cpus);
|
@@ -16631,7 +17105,7 @@ static void clear_numa_thread_affinity(void) {
|
|
16631
17105
|
#else
|
16632
17106
|
// TODO: Windows etc.
|
16633
17107
|
// (the linux implementation may also work on BSD, someone should test)
|
16634
|
-
static void set_numa_thread_affinity(int thread_n
|
17108
|
+
static void set_numa_thread_affinity(int thread_n) { UNUSED(thread_n); }
|
16635
17109
|
static void clear_numa_thread_affinity(void) {}
|
16636
17110
|
#endif
|
16637
17111
|
|
@@ -16649,7 +17123,7 @@ struct ggml_compute_state_shared {
|
|
16649
17123
|
atomic_int node_n; // active graph node
|
16650
17124
|
atomic_int node_task; // active graph node task phase
|
16651
17125
|
|
16652
|
-
|
17126
|
+
ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
|
16653
17127
|
void * abort_callback_data;
|
16654
17128
|
};
|
16655
17129
|
|
@@ -16931,7 +17405,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16931
17405
|
|
16932
17406
|
const int n_threads = state->shared->n_threads;
|
16933
17407
|
|
16934
|
-
set_numa_thread_affinity(state->ith
|
17408
|
+
set_numa_thread_affinity(state->ith);
|
16935
17409
|
|
16936
17410
|
int node_n = -1;
|
16937
17411
|
int task_phase = GGML_TASK_FINALIZE;
|
@@ -17737,7 +18211,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
17737
18211
|
|
17738
18212
|
ptr += ggml_nbytes(tensor);
|
17739
18213
|
|
17740
|
-
fprintf(stderr, "%s: loaded leaf %
|
18214
|
+
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
17741
18215
|
}
|
17742
18216
|
}
|
17743
18217
|
|
@@ -17840,7 +18314,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
17840
18314
|
|
17841
18315
|
result->nodes[i] = tensor;
|
17842
18316
|
|
17843
|
-
fprintf(stderr, "%s: loaded node %
|
18317
|
+
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
17844
18318
|
}
|
17845
18319
|
}
|
17846
18320
|
}
|
@@ -17862,7 +18336,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17862
18336
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17863
18337
|
i,
|
17864
18338
|
node->ne[0], node->ne[1], node->ne[2],
|
17865
|
-
ggml_op_name(node->op), node->
|
18339
|
+
ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17866
18340
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17867
18341
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17868
18342
|
(double) node->perf_time_us / 1000.0,
|
@@ -17955,7 +18429,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17955
18429
|
continue;
|
17956
18430
|
}
|
17957
18431
|
|
17958
|
-
if (node->
|
18432
|
+
if (node->flags & GGML_TENSOR_FLAG_PARAM) {
|
17959
18433
|
snprintf(color, sizeof(color), "yellow");
|
17960
18434
|
} else if (node->grad) {
|
17961
18435
|
if (ggml_graph_find(gf, node)) {
|
@@ -18129,7 +18603,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18129
18603
|
int np = 0;
|
18130
18604
|
int64_t nx = 0;
|
18131
18605
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18132
|
-
if (gf->nodes[i]->
|
18606
|
+
if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
|
18133
18607
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
18134
18608
|
|
18135
18609
|
GGML_ASSERT(np < GGML_MAX_PARAMS);
|
@@ -18382,7 +18856,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18382
18856
|
}
|
18383
18857
|
|
18384
18858
|
// compute the initial gradient in the search direction
|
18385
|
-
ggml_vec_dot_f32(nx, &dginit, g, d);
|
18859
|
+
ggml_vec_dot_f32(nx, &dginit, 0, g, 0, d, 0, 1);
|
18386
18860
|
|
18387
18861
|
// make sure that d points to a descent direction
|
18388
18862
|
if (0 < dginit) {
|
@@ -18432,7 +18906,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18432
18906
|
return count;
|
18433
18907
|
}
|
18434
18908
|
|
18435
|
-
ggml_vec_dot_f32(nx, &dg, g, d);
|
18909
|
+
ggml_vec_dot_f32(nx, &dg, 0, g, 0, d, 0, 1);
|
18436
18910
|
|
18437
18911
|
// check the Wolfe condition
|
18438
18912
|
if (dg < params->lbfgs.wolfe * dginit) {
|
@@ -18465,7 +18939,9 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18465
18939
|
(*step) *= width;
|
18466
18940
|
}
|
18467
18941
|
|
18468
|
-
|
18942
|
+
GGML_ASSERT(false && "line search failed");
|
18943
|
+
|
18944
|
+
return GGML_LINESEARCH_FAIL;
|
18469
18945
|
}
|
18470
18946
|
|
18471
18947
|
static enum ggml_opt_result ggml_opt_lbfgs(
|
@@ -18492,7 +18968,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18492
18968
|
int np = 0;
|
18493
18969
|
int nx = 0;
|
18494
18970
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18495
|
-
if (gf->nodes[i]->
|
18971
|
+
if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
|
18496
18972
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
18497
18973
|
|
18498
18974
|
GGML_ASSERT(np < GGML_MAX_PARAMS);
|
@@ -18693,8 +19169,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18693
19169
|
// ys = y^t \cdot s -> 1 / \rho.
|
18694
19170
|
// yy = y^t \cdot y.
|
18695
19171
|
//
|
18696
|
-
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]);
|
18697
|
-
ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]);
|
19172
|
+
ggml_vec_dot_f32(nx, &ys, 0, &lm_y[end[0]*nx], 0, &lm_s[end[0]*nx], 0, 1);
|
19173
|
+
ggml_vec_dot_f32(nx, &yy, 0, &lm_y[end[0]*nx], 0, &lm_y[end[0]*nx], 0, 1);
|
18698
19174
|
|
18699
19175
|
lm_ys[end[0]] = ys;
|
18700
19176
|
|
@@ -18713,7 +19189,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18713
19189
|
for (int i = 0; i < bound; ++i) {
|
18714
19190
|
j[0] = (j[0] + m - 1) % m;
|
18715
19191
|
// \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}
|
18716
|
-
ggml_vec_dot_f32(nx, &lm_alpha[j[0]], &lm_s[j[0]*nx], d);
|
19192
|
+
ggml_vec_dot_f32(nx, &lm_alpha[j[0]], 0, &lm_s[j[0]*nx], 0, d, 0, 1);
|
18717
19193
|
lm_alpha[j[0]] /= lm_ys[j[0]];
|
18718
19194
|
// q_{i} = q_{i+1} - \alpha_{i} y_{i}
|
18719
19195
|
ggml_vec_mad_f32(nx, d, &lm_y[j[0]*nx], -lm_alpha[j[0]]);
|
@@ -18723,7 +19199,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18723
19199
|
|
18724
19200
|
for (int i = 0; i < bound; ++i) {
|
18725
19201
|
// \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}
|
18726
|
-
ggml_vec_dot_f32(nx, &beta, &lm_y[j[0]*nx], d);
|
19202
|
+
ggml_vec_dot_f32(nx, &beta, 0, &lm_y[j[0]*nx], 0, d, 0, 1);
|
18727
19203
|
beta /= lm_ys[j[0]];
|
18728
19204
|
// \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}
|
18729
19205
|
ggml_vec_mad_f32(nx, d, &lm_s[j[0]*nx], lm_alpha[j[0]] - beta);
|
@@ -18733,7 +19209,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18733
19209
|
step[0] = 1.0;
|
18734
19210
|
}
|
18735
19211
|
|
18736
|
-
|
19212
|
+
GGML_ASSERT(false && "lbfgs failed");
|
19213
|
+
|
19214
|
+
return GGML_OPT_DID_NOT_CONVERGE;
|
18737
19215
|
}
|
18738
19216
|
|
18739
19217
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
@@ -18967,12 +19445,23 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
18967
19445
|
|
18968
19446
|
////////////////////////////////////////////////////////////////////////////////
|
18969
19447
|
|
19448
|
+
void ggml_set_input(struct ggml_tensor * tensor) {
|
19449
|
+
tensor->flags |= GGML_TENSOR_FLAG_INPUT;
|
19450
|
+
}
|
19451
|
+
|
19452
|
+
void ggml_set_output(struct ggml_tensor * tensor) {
|
19453
|
+
tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
|
19454
|
+
}
|
19455
|
+
|
19456
|
+
////////////////////////////////////////////////////////////////////////////////
|
19457
|
+
|
18970
19458
|
void ggml_quantize_init(enum ggml_type type) {
|
18971
19459
|
ggml_critical_section_start();
|
18972
19460
|
|
18973
19461
|
switch (type) {
|
18974
|
-
case GGML_TYPE_IQ2_XXS:
|
18975
|
-
case GGML_TYPE_IQ2_XS:
|
19462
|
+
case GGML_TYPE_IQ2_XXS:
|
19463
|
+
case GGML_TYPE_IQ2_XS:
|
19464
|
+
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
18976
19465
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
18977
19466
|
default: // nothing
|
18978
19467
|
break;
|
@@ -18984,8 +19473,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
18984
19473
|
void ggml_quantize_free(void) {
|
18985
19474
|
ggml_critical_section_start();
|
18986
19475
|
|
18987
|
-
iq2xs_free_impl(
|
18988
|
-
iq2xs_free_impl(
|
19476
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
|
19477
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XS);
|
19478
|
+
iq2xs_free_impl(GGML_TYPE_IQ1_S);
|
19479
|
+
iq3xs_free_impl(256);
|
18989
19480
|
|
18990
19481
|
ggml_critical_section_end();
|
18991
19482
|
}
|
@@ -19120,7 +19611,8 @@ size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t *
|
|
19120
19611
|
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
19121
19612
|
return
|
19122
19613
|
type == GGML_TYPE_IQ2_XXS ||
|
19123
|
-
type == GGML_TYPE_IQ2_XS
|
19614
|
+
type == GGML_TYPE_IQ2_XS ||
|
19615
|
+
type == GGML_TYPE_IQ1_S;
|
19124
19616
|
}
|
19125
19617
|
|
19126
19618
|
size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start,
|
@@ -19245,6 +19737,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19245
19737
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19246
19738
|
GGML_ASSERT(result == row_size * nrows);
|
19247
19739
|
} break;
|
19740
|
+
case GGML_TYPE_IQ1_S:
|
19741
|
+
{
|
19742
|
+
GGML_ASSERT(start % QK_K == 0);
|
19743
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19744
|
+
size_t start_row = start / n_per_row;
|
19745
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19746
|
+
result = quantize_iq1_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19747
|
+
GGML_ASSERT(result == row_size * nrows);
|
19748
|
+
} break;
|
19749
|
+
case GGML_TYPE_IQ4_NL:
|
19750
|
+
{
|
19751
|
+
GGML_ASSERT(start % QK4_NL == 0);
|
19752
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19753
|
+
size_t start_row = start / n_per_row;
|
19754
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19755
|
+
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19756
|
+
GGML_ASSERT(result == row_size * nrows);
|
19757
|
+
} break;
|
19248
19758
|
case GGML_TYPE_F16:
|
19249
19759
|
{
|
19250
19760
|
size_t elemsize = sizeof(ggml_fp16_t);
|
@@ -20611,4 +21121,12 @@ int ggml_cpu_has_vsx(void) {
|
|
20611
21121
|
#endif
|
20612
21122
|
}
|
20613
21123
|
|
21124
|
+
int ggml_cpu_has_matmul_int8(void) {
|
21125
|
+
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
21126
|
+
return 1;
|
21127
|
+
#else
|
21128
|
+
return 0;
|
21129
|
+
#endif
|
21130
|
+
}
|
21131
|
+
|
20614
21132
|
////////////////////////////////////////////////////////////////////////////////
|