llama_cpp 0.12.6 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/ext/llama_cpp/llama_cpp.cpp +90 -269
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +28 -23
- data/vendor/tmp/llama.cpp/Makefile +51 -15
- data/vendor/tmp/llama.cpp/ggml-alloc.c +73 -43
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +32 -11
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +560 -346
- data/vendor/tmp/llama.cpp/ggml-impl.h +20 -7
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +7 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +191 -22
- data/vendor/tmp/llama.cpp/ggml-metal.metal +2472 -862
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +25 -25
- data/vendor/tmp/llama.cpp/ggml-quants.c +3176 -667
- data/vendor/tmp/llama.cpp/ggml-quants.h +77 -2
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +373 -424
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +186 -102
- data/vendor/tmp/llama.cpp/ggml.c +1266 -699
- data/vendor/tmp/llama.cpp/ggml.h +59 -30
- data/vendor/tmp/llama.cpp/llama.cpp +1517 -717
- data/vendor/tmp/llama.cpp/llama.h +87 -63
- data/vendor/tmp/llama.cpp/scripts/get-flags.mk +1 -1
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
|
@@ -23,6 +23,9 @@
|
|
|
23
23
|
#include <limits.h>
|
|
24
24
|
#include <stdarg.h>
|
|
25
25
|
#include <signal.h>
|
|
26
|
+
#if defined(__gnu_linux__)
|
|
27
|
+
#include <syscall.h>
|
|
28
|
+
#endif
|
|
26
29
|
|
|
27
30
|
#ifdef GGML_USE_METAL
|
|
28
31
|
#include <unistd.h>
|
|
@@ -270,6 +273,8 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|
|
270
273
|
#include <Accelerate/Accelerate.h>
|
|
271
274
|
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
|
272
275
|
#include "ggml-opencl.h"
|
|
276
|
+
#elif defined(GGML_USE_VULKAN)
|
|
277
|
+
#include "ggml-vulkan.h"
|
|
273
278
|
#endif
|
|
274
279
|
#elif defined(GGML_USE_OPENBLAS)
|
|
275
280
|
#if defined(GGML_BLAS_USE_MKL)
|
|
@@ -318,7 +323,7 @@ float ggml_table_f32_f16[1 << 16];
|
|
|
318
323
|
// note: do not use these inside ggml.c
|
|
319
324
|
// these are meant to be used via the ggml.h API
|
|
320
325
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
|
321
|
-
return
|
|
326
|
+
return GGML_FP16_TO_FP32(x);
|
|
322
327
|
}
|
|
323
328
|
|
|
324
329
|
ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
|
@@ -350,6 +355,10 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
|
|
|
350
355
|
}
|
|
351
356
|
}
|
|
352
357
|
|
|
358
|
+
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
|
|
359
|
+
return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
|
|
360
|
+
}
|
|
361
|
+
|
|
353
362
|
//
|
|
354
363
|
// timing
|
|
355
364
|
//
|
|
@@ -673,6 +682,74 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
|
673
682
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
|
674
683
|
.nrows = 1,
|
|
675
684
|
},
|
|
685
|
+
[GGML_TYPE_IQ3_S] = {
|
|
686
|
+
.type_name = "iq3_s",
|
|
687
|
+
.blck_size = QK_K,
|
|
688
|
+
.type_size = sizeof(block_iq3_s),
|
|
689
|
+
.is_quantized = true,
|
|
690
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq3_s,
|
|
691
|
+
.from_float = quantize_row_iq3_s,
|
|
692
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_s_reference,
|
|
693
|
+
.vec_dot = ggml_vec_dot_iq3_s_q8_K,
|
|
694
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
|
695
|
+
.nrows = 1,
|
|
696
|
+
},
|
|
697
|
+
[GGML_TYPE_IQ2_S] = {
|
|
698
|
+
.type_name = "iq2_s",
|
|
699
|
+
.blck_size = QK_K,
|
|
700
|
+
.type_size = sizeof(block_iq2_s),
|
|
701
|
+
.is_quantized = true,
|
|
702
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq2_s,
|
|
703
|
+
.from_float = quantize_row_iq2_s,
|
|
704
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq2_s_reference,
|
|
705
|
+
.vec_dot = ggml_vec_dot_iq2_s_q8_K,
|
|
706
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
|
707
|
+
.nrows = 1,
|
|
708
|
+
},
|
|
709
|
+
[GGML_TYPE_IQ1_S] = {
|
|
710
|
+
.type_name = "iq1_s",
|
|
711
|
+
.blck_size = QK_K,
|
|
712
|
+
.type_size = sizeof(block_iq1_s),
|
|
713
|
+
.is_quantized = true,
|
|
714
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq1_s,
|
|
715
|
+
.from_float = NULL,
|
|
716
|
+
.from_float_reference = NULL,
|
|
717
|
+
.vec_dot = ggml_vec_dot_iq1_s_q8_K,
|
|
718
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
|
719
|
+
.nrows = 1,
|
|
720
|
+
},
|
|
721
|
+
[GGML_TYPE_IQ4_NL] = {
|
|
722
|
+
.type_name = "iq4_nl",
|
|
723
|
+
.blck_size = QK4_NL,
|
|
724
|
+
.type_size = sizeof(block_iq4_nl),
|
|
725
|
+
.is_quantized = true,
|
|
726
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
|
727
|
+
.from_float = quantize_row_iq4_nl,
|
|
728
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_nl_reference,
|
|
729
|
+
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
|
|
730
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
731
|
+
.nrows = 1,
|
|
732
|
+
},
|
|
733
|
+
[GGML_TYPE_IQ4_XS] = {
|
|
734
|
+
.type_name = "iq4_xs",
|
|
735
|
+
#if QK_K == 64
|
|
736
|
+
.blck_size = QK4_NL,
|
|
737
|
+
#else
|
|
738
|
+
.blck_size = QK_K,
|
|
739
|
+
#endif
|
|
740
|
+
.type_size = sizeof(block_iq4_xs),
|
|
741
|
+
.is_quantized = true,
|
|
742
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
|
|
743
|
+
.from_float = quantize_row_iq4_xs,
|
|
744
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_xs_reference,
|
|
745
|
+
.vec_dot = ggml_vec_dot_iq4_xs_q8_K,
|
|
746
|
+
#if QK_K == 64
|
|
747
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
748
|
+
#else
|
|
749
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
|
750
|
+
#endif
|
|
751
|
+
.nrows = 1,
|
|
752
|
+
},
|
|
676
753
|
[GGML_TYPE_Q8_K] = {
|
|
677
754
|
.type_name = "q8_K",
|
|
678
755
|
.blck_size = QK_K,
|
|
@@ -769,7 +846,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
|
769
846
|
#define GGML_F16x8 float16x8_t
|
|
770
847
|
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
|
771
848
|
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
|
772
|
-
#define GGML_F16x8_LOAD
|
|
849
|
+
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
|
|
773
850
|
#define GGML_F16x8_STORE vst1q_f16
|
|
774
851
|
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
|
775
852
|
#define GGML_F16x8_ADD vaddq_f16
|
|
@@ -812,7 +889,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
|
812
889
|
#define GGML_F32Cx4 float32x4_t
|
|
813
890
|
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
|
814
891
|
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
|
815
|
-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x))
|
|
892
|
+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
|
|
816
893
|
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
|
817
894
|
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
|
818
895
|
#define GGML_F32Cx4_ADD vaddq_f32
|
|
@@ -868,7 +945,7 @@ do { \
|
|
|
868
945
|
const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
|
|
869
946
|
_mm256_extractf128_ps(x[0], 1)); \
|
|
870
947
|
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
|
871
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1));
|
|
948
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
|
872
949
|
} while (0)
|
|
873
950
|
// TODO: is this optimal ?
|
|
874
951
|
|
|
@@ -1149,7 +1226,7 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
|
|
|
1149
1226
|
x[i] = _mm_add_ps(x[i], x[offset+i]); \
|
|
1150
1227
|
} \
|
|
1151
1228
|
const __m128 t0 = _mm_hadd_ps(x[0], x[0]); \
|
|
1152
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t0, t0));
|
|
1229
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t0, t0)); \
|
|
1153
1230
|
}
|
|
1154
1231
|
// TODO: is this optimal ?
|
|
1155
1232
|
|
|
@@ -1531,9 +1608,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
|
|
|
1531
1608
|
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
|
1532
1609
|
uint16_t t;
|
|
1533
1610
|
for (int i = 0; i < n; ++i) {
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1611
|
+
if (x[i] <= -10.0f) {
|
|
1612
|
+
y[i] = 0.0f;
|
|
1613
|
+
} else if (x[i] >= 10.0f) {
|
|
1614
|
+
y[i] = x[i];
|
|
1615
|
+
} else {
|
|
1616
|
+
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
|
1617
|
+
memcpy(&t, &fp16, sizeof(uint16_t));
|
|
1618
|
+
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
|
1619
|
+
}
|
|
1537
1620
|
}
|
|
1538
1621
|
}
|
|
1539
1622
|
#else
|
|
@@ -1954,9 +2037,16 @@ struct ggml_numa_node {
|
|
|
1954
2037
|
};
|
|
1955
2038
|
|
|
1956
2039
|
struct ggml_numa_nodes {
|
|
2040
|
+
enum ggml_numa_strategy numa_strategy;
|
|
1957
2041
|
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
|
|
1958
2042
|
uint32_t n_nodes;
|
|
1959
2043
|
uint32_t total_cpus; // hardware threads on system
|
|
2044
|
+
uint32_t current_node; // node on which main process is execting
|
|
2045
|
+
#if defined(__gnu_linux__)
|
|
2046
|
+
cpu_set_t cpuset; // cpuset from numactl
|
|
2047
|
+
#else
|
|
2048
|
+
uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
|
|
2049
|
+
#endif
|
|
1960
2050
|
};
|
|
1961
2051
|
|
|
1962
2052
|
//
|
|
@@ -1990,18 +2080,40 @@ inline static void ggml_critical_section_end(void) {
|
|
|
1990
2080
|
atomic_fetch_sub(&g_state_barrier, 1);
|
|
1991
2081
|
}
|
|
1992
2082
|
|
|
1993
|
-
|
|
2083
|
+
#if defined(__gnu_linux__)
|
|
2084
|
+
static cpu_set_t ggml_get_numa_affinity(void) {
|
|
2085
|
+
cpu_set_t cpuset;
|
|
2086
|
+
pthread_t thread;
|
|
2087
|
+
thread = pthread_self();
|
|
2088
|
+
CPU_ZERO(&cpuset);
|
|
2089
|
+
pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
|
2090
|
+
return cpuset;
|
|
2091
|
+
}
|
|
2092
|
+
#else
|
|
2093
|
+
static uint32_t ggml_get_numa_affinity(void) {
|
|
2094
|
+
return 0; // no NUMA support
|
|
2095
|
+
}
|
|
2096
|
+
#endif
|
|
2097
|
+
|
|
2098
|
+
void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
|
1994
2099
|
if (g_state.numa.n_nodes > 0) {
|
|
1995
2100
|
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
|
|
1996
2101
|
|
|
1997
2102
|
return;
|
|
1998
2103
|
}
|
|
1999
2104
|
|
|
2000
|
-
#
|
|
2105
|
+
#if defined(__gnu_linux__)
|
|
2001
2106
|
struct stat st;
|
|
2002
2107
|
char path[256];
|
|
2003
2108
|
int rv;
|
|
2004
2109
|
|
|
2110
|
+
// set numa scheme
|
|
2111
|
+
g_state.numa.numa_strategy = numa_flag;
|
|
2112
|
+
|
|
2113
|
+
GGML_PRINT_DEBUG("numa strategy %u\n",g_state.numa.numa_strategy);
|
|
2114
|
+
|
|
2115
|
+
g_state.numa.cpuset = ggml_get_numa_affinity();
|
|
2116
|
+
|
|
2005
2117
|
// enumerate nodes
|
|
2006
2118
|
while (g_state.numa.n_nodes < GGML_NUMA_MAX_NODES) {
|
|
2007
2119
|
rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", g_state.numa.n_nodes);
|
|
@@ -2020,11 +2132,23 @@ void ggml_numa_init(void) {
|
|
|
2020
2132
|
|
|
2021
2133
|
GGML_PRINT_DEBUG("found %u numa nodes, %u CPUs\n", g_state.numa.n_nodes, g_state.numa.total_cpus);
|
|
2022
2134
|
|
|
2023
|
-
|
|
2135
|
+
// figure out which node we're on
|
|
2136
|
+
uint current_cpu;
|
|
2137
|
+
int getcpu_ret = 0;
|
|
2138
|
+
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 28)
|
|
2139
|
+
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
|
2140
|
+
#else
|
|
2141
|
+
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
|
2142
|
+
getcpu_ret = syscall(SYS_getcpu,¤t_cpu,&g_state.numa.current_node);
|
|
2143
|
+
#endif
|
|
2144
|
+
|
|
2145
|
+
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
|
2024
2146
|
g_state.numa.n_nodes = 0;
|
|
2025
2147
|
return;
|
|
2026
2148
|
}
|
|
2027
2149
|
|
|
2150
|
+
GGML_PRINT_DEBUG("found our process on numa node %u, CPU %u\n", g_state.numa.current_node, current_cpu);
|
|
2151
|
+
|
|
2028
2152
|
for (uint32_t n = 0; n < g_state.numa.n_nodes; ++n) {
|
|
2029
2153
|
struct ggml_numa_node * node = &g_state.numa.nodes[n];
|
|
2030
2154
|
GGML_PRINT_DEBUG("CPUs on node %u:", n);
|
|
@@ -2051,6 +2175,7 @@ void ggml_numa_init(void) {
|
|
|
2051
2175
|
}
|
|
2052
2176
|
}
|
|
2053
2177
|
#else
|
|
2178
|
+
GGML_UNUSED(numa_flag);
|
|
2054
2179
|
// TODO
|
|
2055
2180
|
#endif
|
|
2056
2181
|
}
|
|
@@ -2231,6 +2356,11 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
|
2231
2356
|
case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
|
|
2232
2357
|
case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
|
|
2233
2358
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
|
2359
|
+
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
|
2360
|
+
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
|
2361
|
+
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
|
|
2362
|
+
case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
|
|
2363
|
+
case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
|
|
2234
2364
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
|
2235
2365
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
|
2236
2366
|
}
|
|
@@ -2635,7 +2765,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
|
2635
2765
|
}
|
|
2636
2766
|
}
|
|
2637
2767
|
|
|
2638
|
-
struct ggml_object * const obj_new = ggml_new_object(ctx,
|
|
2768
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
|
2639
2769
|
|
|
2640
2770
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
|
2641
2771
|
|
|
@@ -2643,7 +2773,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
|
2643
2773
|
|
|
2644
2774
|
*result = (struct ggml_tensor) {
|
|
2645
2775
|
/*.type =*/ type,
|
|
2646
|
-
/*.backend =*/
|
|
2776
|
+
/*.backend =*/ GGML_BACKEND_TYPE_CPU,
|
|
2647
2777
|
/*.buffer =*/ NULL,
|
|
2648
2778
|
/*.ne =*/ { 1, 1, 1, 1 },
|
|
2649
2779
|
/*.nb =*/ { 0, 0, 0, 0 },
|
|
@@ -3184,7 +3314,7 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
|
|
3184
3314
|
}
|
|
3185
3315
|
|
|
3186
3316
|
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
|
|
3187
|
-
strncpy(tensor->name, name, sizeof(tensor->name));
|
|
3317
|
+
strncpy(tensor->name, name, sizeof(tensor->name) - 1);
|
|
3188
3318
|
tensor->name[sizeof(tensor->name) - 1] = '\0';
|
|
3189
3319
|
return tensor;
|
|
3190
3320
|
}
|
|
@@ -3216,7 +3346,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
|
|
|
3216
3346
|
char * const mem_buffer = ctx->mem_buffer;
|
|
3217
3347
|
|
|
3218
3348
|
while (obj != NULL) {
|
|
3219
|
-
if (obj->type ==
|
|
3349
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
|
3220
3350
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
|
3221
3351
|
}
|
|
3222
3352
|
|
|
@@ -3233,7 +3363,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
|
|
|
3233
3363
|
char * const mem_buffer = ctx->mem_buffer;
|
|
3234
3364
|
|
|
3235
3365
|
while (obj != NULL) {
|
|
3236
|
-
if (obj->type ==
|
|
3366
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
|
3237
3367
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
|
3238
3368
|
}
|
|
3239
3369
|
|
|
@@ -3249,7 +3379,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
|
3249
3379
|
char * const mem_buffer = ctx->mem_buffer;
|
|
3250
3380
|
|
|
3251
3381
|
while (obj != NULL) {
|
|
3252
|
-
if (obj->type ==
|
|
3382
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
|
3253
3383
|
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
|
3254
3384
|
if (strcmp(cur->name, name) == 0) {
|
|
3255
3385
|
return cur;
|
|
@@ -5060,16 +5190,28 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
|
5060
5190
|
struct ggml_context * ctx,
|
|
5061
5191
|
struct ggml_tensor * a,
|
|
5062
5192
|
struct ggml_tensor * mask,
|
|
5193
|
+
struct ggml_tensor * pos,
|
|
5063
5194
|
float scale,
|
|
5195
|
+
float max_bias,
|
|
5064
5196
|
bool inplace) {
|
|
5065
5197
|
GGML_ASSERT(ggml_is_contiguous(a));
|
|
5198
|
+
|
|
5066
5199
|
if (mask) {
|
|
5067
5200
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
5068
|
-
GGML_ASSERT(mask
|
|
5069
|
-
GGML_ASSERT(mask->ne[3] == 1);
|
|
5201
|
+
GGML_ASSERT(ggml_is_matrix(mask));
|
|
5070
5202
|
GGML_ASSERT(ggml_can_repeat_rows(mask, a));
|
|
5071
5203
|
}
|
|
5072
5204
|
|
|
5205
|
+
if (pos) {
|
|
5206
|
+
GGML_ASSERT(ggml_is_vector(pos));
|
|
5207
|
+
GGML_ASSERT(pos->type == GGML_TYPE_F32);
|
|
5208
|
+
GGML_ASSERT(pos->ne[0] == a->ne[0]);
|
|
5209
|
+
}
|
|
5210
|
+
|
|
5211
|
+
if (max_bias > 0.0f) {
|
|
5212
|
+
GGML_ASSERT(pos);
|
|
5213
|
+
}
|
|
5214
|
+
|
|
5073
5215
|
bool is_node = false;
|
|
5074
5216
|
|
|
5075
5217
|
if (a->grad) {
|
|
@@ -5078,13 +5220,14 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
|
5078
5220
|
|
|
5079
5221
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
5080
5222
|
|
|
5081
|
-
float params[] = { scale };
|
|
5223
|
+
float params[] = { scale, max_bias };
|
|
5082
5224
|
ggml_set_op_params(result, params, sizeof(params));
|
|
5083
5225
|
|
|
5084
5226
|
result->op = GGML_OP_SOFT_MAX;
|
|
5085
5227
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
|
5086
5228
|
result->src[0] = a;
|
|
5087
5229
|
result->src[1] = mask;
|
|
5230
|
+
result->src[2] = pos;
|
|
5088
5231
|
|
|
5089
5232
|
return result;
|
|
5090
5233
|
}
|
|
@@ -5092,21 +5235,23 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
|
5092
5235
|
struct ggml_tensor * ggml_soft_max(
|
|
5093
5236
|
struct ggml_context * ctx,
|
|
5094
5237
|
struct ggml_tensor * a) {
|
|
5095
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, false);
|
|
5238
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, false);
|
|
5096
5239
|
}
|
|
5097
5240
|
|
|
5098
5241
|
struct ggml_tensor * ggml_soft_max_inplace(
|
|
5099
5242
|
struct ggml_context * ctx,
|
|
5100
5243
|
struct ggml_tensor * a) {
|
|
5101
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, true);
|
|
5244
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, true);
|
|
5102
5245
|
}
|
|
5103
5246
|
|
|
5104
5247
|
struct ggml_tensor * ggml_soft_max_ext(
|
|
5105
5248
|
struct ggml_context * ctx,
|
|
5106
5249
|
struct ggml_tensor * a,
|
|
5107
5250
|
struct ggml_tensor * mask,
|
|
5108
|
-
|
|
5109
|
-
|
|
5251
|
+
struct ggml_tensor * pos,
|
|
5252
|
+
float scale,
|
|
5253
|
+
float max_bias) {
|
|
5254
|
+
return ggml_soft_max_impl(ctx, a, mask, pos, scale, max_bias, false);
|
|
5110
5255
|
}
|
|
5111
5256
|
|
|
5112
5257
|
// ggml_soft_max_back
|
|
@@ -5556,7 +5701,9 @@ struct ggml_tensor * ggml_conv_2d(
|
|
|
5556
5701
|
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
|
5557
5702
|
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
|
5558
5703
|
|
|
5559
|
-
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2],
|
|
5704
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
|
|
5705
|
+
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
|
|
5706
|
+
|
|
5560
5707
|
|
|
5561
5708
|
return result;
|
|
5562
5709
|
}
|
|
@@ -5639,11 +5786,13 @@ struct ggml_tensor * ggml_pool_1d(
|
|
|
5639
5786
|
is_node = true;
|
|
5640
5787
|
}
|
|
5641
5788
|
|
|
5642
|
-
const int64_t ne[
|
|
5789
|
+
const int64_t ne[4] = {
|
|
5643
5790
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
|
5644
5791
|
a->ne[1],
|
|
5792
|
+
a->ne[2],
|
|
5793
|
+
a->ne[3],
|
|
5645
5794
|
};
|
|
5646
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
|
5795
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
5647
5796
|
|
|
5648
5797
|
int32_t params[] = { op, k0, s0, p0 };
|
|
5649
5798
|
ggml_set_op_params(result, params, sizeof(params));
|
|
@@ -5776,7 +5925,7 @@ struct ggml_tensor * ggml_top_k(
|
|
|
5776
5925
|
int k) {
|
|
5777
5926
|
GGML_ASSERT(a->ne[0] >= k);
|
|
5778
5927
|
|
|
5779
|
-
struct ggml_tensor * result = ggml_argsort(ctx, a,
|
|
5928
|
+
struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
|
|
5780
5929
|
|
|
5781
5930
|
result = ggml_view_4d(ctx, result,
|
|
5782
5931
|
k, result->ne[1], result->ne[2], result->ne[3],
|
|
@@ -6562,13 +6711,15 @@ void ggml_set_param(
|
|
|
6562
6711
|
|
|
6563
6712
|
static void ggml_compute_forward_dup_same_cont(
|
|
6564
6713
|
const struct ggml_compute_params * params,
|
|
6565
|
-
const struct ggml_tensor * src0,
|
|
6566
6714
|
struct ggml_tensor * dst) {
|
|
6715
|
+
|
|
6716
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6717
|
+
|
|
6567
6718
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
|
6568
6719
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
|
6569
6720
|
GGML_ASSERT(src0->type == dst->type);
|
|
6570
6721
|
|
|
6571
|
-
if (params->type ==
|
|
6722
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
6572
6723
|
return;
|
|
6573
6724
|
}
|
|
6574
6725
|
|
|
@@ -6594,11 +6745,13 @@ static void ggml_compute_forward_dup_same_cont(
|
|
|
6594
6745
|
}
|
|
6595
6746
|
static void ggml_compute_forward_dup_f16(
|
|
6596
6747
|
const struct ggml_compute_params * params,
|
|
6597
|
-
const struct ggml_tensor * src0,
|
|
6598
6748
|
struct ggml_tensor * dst) {
|
|
6749
|
+
|
|
6750
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6751
|
+
|
|
6599
6752
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
|
6600
6753
|
|
|
6601
|
-
if (params->type ==
|
|
6754
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
6602
6755
|
return;
|
|
6603
6756
|
}
|
|
6604
6757
|
|
|
@@ -6608,7 +6761,7 @@ static void ggml_compute_forward_dup_f16(
|
|
|
6608
6761
|
const int nth = params->nth; // number of threads
|
|
6609
6762
|
|
|
6610
6763
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
|
6611
|
-
ggml_compute_forward_dup_same_cont(params,
|
|
6764
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
|
6612
6765
|
return;
|
|
6613
6766
|
}
|
|
6614
6767
|
|
|
@@ -6865,11 +7018,13 @@ static void ggml_compute_forward_dup_f16(
|
|
|
6865
7018
|
|
|
6866
7019
|
static void ggml_compute_forward_dup_f32(
|
|
6867
7020
|
const struct ggml_compute_params * params,
|
|
6868
|
-
const struct ggml_tensor * src0,
|
|
6869
7021
|
struct ggml_tensor * dst) {
|
|
7022
|
+
|
|
7023
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7024
|
+
|
|
6870
7025
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
|
6871
7026
|
|
|
6872
|
-
if (params->type ==
|
|
7027
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
6873
7028
|
return;
|
|
6874
7029
|
}
|
|
6875
7030
|
|
|
@@ -6879,7 +7034,7 @@ static void ggml_compute_forward_dup_f32(
|
|
|
6879
7034
|
const int nth = params->nth; // number of threads
|
|
6880
7035
|
|
|
6881
7036
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
|
6882
|
-
ggml_compute_forward_dup_same_cont(params,
|
|
7037
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
|
6883
7038
|
return;
|
|
6884
7039
|
}
|
|
6885
7040
|
|
|
@@ -7115,17 +7270,19 @@ static void ggml_compute_forward_dup_f32(
|
|
|
7115
7270
|
// A simplified version of ggml_compute_forward_dup that doesn't do float upcasting, and just plain old memcpy.
|
|
7116
7271
|
static void ggml_compute_forward_dup_bytes(
|
|
7117
7272
|
const struct ggml_compute_params * params,
|
|
7118
|
-
const struct ggml_tensor * src0,
|
|
7119
7273
|
struct ggml_tensor * dst) {
|
|
7274
|
+
|
|
7275
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7276
|
+
|
|
7120
7277
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
|
7121
7278
|
GGML_ASSERT(src0->type == dst->type);
|
|
7122
7279
|
|
|
7123
|
-
if (params->type ==
|
|
7280
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7124
7281
|
return;
|
|
7125
7282
|
}
|
|
7126
7283
|
|
|
7127
7284
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
|
7128
|
-
ggml_compute_forward_dup_same_cont(params,
|
|
7285
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
|
7129
7286
|
return;
|
|
7130
7287
|
}
|
|
7131
7288
|
|
|
@@ -7264,21 +7421,23 @@ static void ggml_compute_forward_dup_bytes(
|
|
|
7264
7421
|
|
|
7265
7422
|
static void ggml_compute_forward_dup(
|
|
7266
7423
|
const struct ggml_compute_params * params,
|
|
7267
|
-
const struct ggml_tensor * src0,
|
|
7268
7424
|
struct ggml_tensor * dst) {
|
|
7425
|
+
|
|
7426
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7427
|
+
|
|
7269
7428
|
if (src0->type == dst->type) {
|
|
7270
|
-
ggml_compute_forward_dup_bytes(params,
|
|
7429
|
+
ggml_compute_forward_dup_bytes(params, dst);
|
|
7271
7430
|
return;
|
|
7272
7431
|
}
|
|
7273
7432
|
|
|
7274
7433
|
switch (src0->type) {
|
|
7275
7434
|
case GGML_TYPE_F16:
|
|
7276
7435
|
{
|
|
7277
|
-
ggml_compute_forward_dup_f16(params,
|
|
7436
|
+
ggml_compute_forward_dup_f16(params, dst);
|
|
7278
7437
|
} break;
|
|
7279
7438
|
case GGML_TYPE_F32:
|
|
7280
7439
|
{
|
|
7281
|
-
ggml_compute_forward_dup_f32(params,
|
|
7440
|
+
ggml_compute_forward_dup_f32(params, dst);
|
|
7282
7441
|
} break;
|
|
7283
7442
|
default:
|
|
7284
7443
|
{
|
|
@@ -7291,12 +7450,14 @@ static void ggml_compute_forward_dup(
|
|
|
7291
7450
|
|
|
7292
7451
|
static void ggml_compute_forward_add_f32(
|
|
7293
7452
|
const struct ggml_compute_params * params,
|
|
7294
|
-
const struct ggml_tensor * src0,
|
|
7295
|
-
const struct ggml_tensor * src1,
|
|
7296
7453
|
struct ggml_tensor * dst) {
|
|
7454
|
+
|
|
7455
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7456
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7457
|
+
|
|
7297
7458
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
7298
7459
|
|
|
7299
|
-
if (params->type ==
|
|
7460
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7300
7461
|
return;
|
|
7301
7462
|
}
|
|
7302
7463
|
|
|
@@ -7304,7 +7465,7 @@ static void ggml_compute_forward_add_f32(
|
|
|
7304
7465
|
const int nth = params->nth;
|
|
7305
7466
|
|
|
7306
7467
|
#ifdef GGML_USE_CLBLAST
|
|
7307
|
-
if (src1->backend ==
|
|
7468
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
|
7308
7469
|
// TODO: OpenCL kernel support full broadcast
|
|
7309
7470
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
|
7310
7471
|
if (ith == 0) {
|
|
@@ -7379,12 +7540,14 @@ static void ggml_compute_forward_add_f32(
|
|
|
7379
7540
|
|
|
7380
7541
|
static void ggml_compute_forward_add_f16_f32(
|
|
7381
7542
|
const struct ggml_compute_params * params,
|
|
7382
|
-
const struct ggml_tensor * src0,
|
|
7383
|
-
const struct ggml_tensor * src1,
|
|
7384
7543
|
struct ggml_tensor * dst) {
|
|
7544
|
+
|
|
7545
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7546
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7547
|
+
|
|
7385
7548
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
|
7386
7549
|
|
|
7387
|
-
if (params->type ==
|
|
7550
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7388
7551
|
return;
|
|
7389
7552
|
}
|
|
7390
7553
|
|
|
@@ -7456,12 +7619,14 @@ static void ggml_compute_forward_add_f16_f32(
|
|
|
7456
7619
|
|
|
7457
7620
|
static void ggml_compute_forward_add_f16_f16(
|
|
7458
7621
|
const struct ggml_compute_params * params,
|
|
7459
|
-
const struct ggml_tensor * src0,
|
|
7460
|
-
const struct ggml_tensor * src1,
|
|
7461
7622
|
struct ggml_tensor * dst) {
|
|
7623
|
+
|
|
7624
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7625
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7626
|
+
|
|
7462
7627
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
|
7463
7628
|
|
|
7464
|
-
if (params->type ==
|
|
7629
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7465
7630
|
return;
|
|
7466
7631
|
}
|
|
7467
7632
|
|
|
@@ -7510,12 +7675,14 @@ static void ggml_compute_forward_add_f16_f16(
|
|
|
7510
7675
|
|
|
7511
7676
|
static void ggml_compute_forward_add_q_f32(
|
|
7512
7677
|
const struct ggml_compute_params * params,
|
|
7513
|
-
const struct ggml_tensor * src0,
|
|
7514
|
-
const struct ggml_tensor * src1,
|
|
7515
7678
|
struct ggml_tensor * dst) {
|
|
7679
|
+
|
|
7680
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7681
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7682
|
+
|
|
7516
7683
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
|
7517
7684
|
|
|
7518
|
-
if (params->type ==
|
|
7685
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7519
7686
|
return;
|
|
7520
7687
|
}
|
|
7521
7688
|
|
|
@@ -7588,14 +7755,16 @@ static void ggml_compute_forward_add_q_f32(
|
|
|
7588
7755
|
|
|
7589
7756
|
static void ggml_compute_forward_add(
|
|
7590
7757
|
const struct ggml_compute_params * params,
|
|
7591
|
-
const struct ggml_tensor * src0,
|
|
7592
|
-
const struct ggml_tensor * src1,
|
|
7593
7758
|
struct ggml_tensor * dst) {
|
|
7759
|
+
|
|
7760
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7761
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7762
|
+
|
|
7594
7763
|
switch (src0->type) {
|
|
7595
7764
|
case GGML_TYPE_F32:
|
|
7596
7765
|
{
|
|
7597
7766
|
if (src1->type == GGML_TYPE_F32) {
|
|
7598
|
-
ggml_compute_forward_add_f32(params,
|
|
7767
|
+
ggml_compute_forward_add_f32(params, dst);
|
|
7599
7768
|
}
|
|
7600
7769
|
else {
|
|
7601
7770
|
GGML_ASSERT(false);
|
|
@@ -7604,10 +7773,10 @@ static void ggml_compute_forward_add(
|
|
|
7604
7773
|
case GGML_TYPE_F16:
|
|
7605
7774
|
{
|
|
7606
7775
|
if (src1->type == GGML_TYPE_F16) {
|
|
7607
|
-
ggml_compute_forward_add_f16_f16(params,
|
|
7776
|
+
ggml_compute_forward_add_f16_f16(params, dst);
|
|
7608
7777
|
}
|
|
7609
7778
|
else if (src1->type == GGML_TYPE_F32) {
|
|
7610
|
-
ggml_compute_forward_add_f16_f32(params,
|
|
7779
|
+
ggml_compute_forward_add_f16_f32(params, dst);
|
|
7611
7780
|
}
|
|
7612
7781
|
else {
|
|
7613
7782
|
GGML_ASSERT(false);
|
|
@@ -7626,8 +7795,13 @@ static void ggml_compute_forward_add(
|
|
|
7626
7795
|
case GGML_TYPE_IQ2_XXS:
|
|
7627
7796
|
case GGML_TYPE_IQ2_XS:
|
|
7628
7797
|
case GGML_TYPE_IQ3_XXS:
|
|
7798
|
+
case GGML_TYPE_IQ1_S:
|
|
7799
|
+
case GGML_TYPE_IQ4_NL:
|
|
7800
|
+
case GGML_TYPE_IQ4_XS:
|
|
7801
|
+
case GGML_TYPE_IQ3_S:
|
|
7802
|
+
case GGML_TYPE_IQ2_S:
|
|
7629
7803
|
{
|
|
7630
|
-
ggml_compute_forward_add_q_f32(params,
|
|
7804
|
+
ggml_compute_forward_add_q_f32(params, dst);
|
|
7631
7805
|
} break;
|
|
7632
7806
|
default:
|
|
7633
7807
|
{
|
|
@@ -7640,13 +7814,15 @@ static void ggml_compute_forward_add(
|
|
|
7640
7814
|
|
|
7641
7815
|
static void ggml_compute_forward_add1_f32(
|
|
7642
7816
|
const struct ggml_compute_params * params,
|
|
7643
|
-
const struct ggml_tensor * src0,
|
|
7644
|
-
const struct ggml_tensor * src1,
|
|
7645
7817
|
struct ggml_tensor * dst) {
|
|
7818
|
+
|
|
7819
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7820
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7821
|
+
|
|
7646
7822
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
7647
7823
|
GGML_ASSERT(ggml_is_scalar(src1));
|
|
7648
7824
|
|
|
7649
|
-
if (params->type ==
|
|
7825
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7650
7826
|
return;
|
|
7651
7827
|
}
|
|
7652
7828
|
|
|
@@ -7692,13 +7868,15 @@ static void ggml_compute_forward_add1_f32(
|
|
|
7692
7868
|
|
|
7693
7869
|
static void ggml_compute_forward_add1_f16_f32(
|
|
7694
7870
|
const struct ggml_compute_params * params,
|
|
7695
|
-
const struct ggml_tensor * src0,
|
|
7696
|
-
const struct ggml_tensor * src1,
|
|
7697
7871
|
struct ggml_tensor * dst) {
|
|
7872
|
+
|
|
7873
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7874
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7875
|
+
|
|
7698
7876
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
7699
7877
|
GGML_ASSERT(ggml_is_scalar(src1));
|
|
7700
7878
|
|
|
7701
|
-
if (params->type ==
|
|
7879
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7702
7880
|
return;
|
|
7703
7881
|
}
|
|
7704
7882
|
|
|
@@ -7742,13 +7920,15 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
|
7742
7920
|
|
|
7743
7921
|
static void ggml_compute_forward_add1_f16_f16(
|
|
7744
7922
|
const struct ggml_compute_params * params,
|
|
7745
|
-
const struct ggml_tensor * src0,
|
|
7746
|
-
const struct ggml_tensor * src1,
|
|
7747
7923
|
struct ggml_tensor * dst) {
|
|
7924
|
+
|
|
7925
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7926
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7927
|
+
|
|
7748
7928
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
7749
7929
|
GGML_ASSERT(ggml_is_scalar(src1));
|
|
7750
7930
|
|
|
7751
|
-
if (params->type ==
|
|
7931
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7752
7932
|
return;
|
|
7753
7933
|
}
|
|
7754
7934
|
|
|
@@ -7792,13 +7972,15 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
|
7792
7972
|
|
|
7793
7973
|
static void ggml_compute_forward_add1_q_f32(
|
|
7794
7974
|
const struct ggml_compute_params * params,
|
|
7795
|
-
const struct ggml_tensor * src0,
|
|
7796
|
-
const struct ggml_tensor * src1,
|
|
7797
7975
|
struct ggml_tensor * dst) {
|
|
7976
|
+
|
|
7977
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7978
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7979
|
+
|
|
7798
7980
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
7799
7981
|
GGML_ASSERT(ggml_is_scalar(src1));
|
|
7800
7982
|
|
|
7801
|
-
if (params->type ==
|
|
7983
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7802
7984
|
return;
|
|
7803
7985
|
}
|
|
7804
7986
|
|
|
@@ -7859,21 +8041,23 @@ static void ggml_compute_forward_add1_q_f32(
|
|
|
7859
8041
|
|
|
7860
8042
|
static void ggml_compute_forward_add1(
|
|
7861
8043
|
const struct ggml_compute_params * params,
|
|
7862
|
-
const struct ggml_tensor * src0,
|
|
7863
|
-
const struct ggml_tensor * src1,
|
|
7864
8044
|
struct ggml_tensor * dst) {
|
|
8045
|
+
|
|
8046
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8047
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
8048
|
+
|
|
7865
8049
|
switch (src0->type) {
|
|
7866
8050
|
case GGML_TYPE_F32:
|
|
7867
8051
|
{
|
|
7868
|
-
ggml_compute_forward_add1_f32(params,
|
|
8052
|
+
ggml_compute_forward_add1_f32(params, dst);
|
|
7869
8053
|
} break;
|
|
7870
8054
|
case GGML_TYPE_F16:
|
|
7871
8055
|
{
|
|
7872
8056
|
if (src1->type == GGML_TYPE_F16) {
|
|
7873
|
-
ggml_compute_forward_add1_f16_f16(params,
|
|
8057
|
+
ggml_compute_forward_add1_f16_f16(params, dst);
|
|
7874
8058
|
}
|
|
7875
8059
|
else if (src1->type == GGML_TYPE_F32) {
|
|
7876
|
-
ggml_compute_forward_add1_f16_f32(params,
|
|
8060
|
+
ggml_compute_forward_add1_f16_f32(params, dst);
|
|
7877
8061
|
}
|
|
7878
8062
|
else {
|
|
7879
8063
|
GGML_ASSERT(false);
|
|
@@ -7893,8 +8077,13 @@ static void ggml_compute_forward_add1(
|
|
|
7893
8077
|
case GGML_TYPE_IQ2_XXS:
|
|
7894
8078
|
case GGML_TYPE_IQ2_XS:
|
|
7895
8079
|
case GGML_TYPE_IQ3_XXS:
|
|
8080
|
+
case GGML_TYPE_IQ1_S:
|
|
8081
|
+
case GGML_TYPE_IQ4_NL:
|
|
8082
|
+
case GGML_TYPE_IQ4_XS:
|
|
8083
|
+
case GGML_TYPE_IQ3_S:
|
|
8084
|
+
case GGML_TYPE_IQ2_S:
|
|
7896
8085
|
{
|
|
7897
|
-
ggml_compute_forward_add1_q_f32(params,
|
|
8086
|
+
ggml_compute_forward_add1_q_f32(params, dst);
|
|
7898
8087
|
} break;
|
|
7899
8088
|
default:
|
|
7900
8089
|
{
|
|
@@ -7907,9 +8096,11 @@ static void ggml_compute_forward_add1(
|
|
|
7907
8096
|
|
|
7908
8097
|
static void ggml_compute_forward_acc_f32(
|
|
7909
8098
|
const struct ggml_compute_params * params,
|
|
7910
|
-
const struct ggml_tensor * src0,
|
|
7911
|
-
const struct ggml_tensor * src1,
|
|
7912
8099
|
struct ggml_tensor * dst) {
|
|
8100
|
+
|
|
8101
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8102
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
8103
|
+
|
|
7913
8104
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
7914
8105
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
|
7915
8106
|
|
|
@@ -7921,7 +8112,7 @@ static void ggml_compute_forward_acc_f32(
|
|
|
7921
8112
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
|
7922
8113
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
|
7923
8114
|
|
|
7924
|
-
if (!inplace && (params->type ==
|
|
8115
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
|
7925
8116
|
if (params->ith != 0) {
|
|
7926
8117
|
return;
|
|
7927
8118
|
}
|
|
@@ -7933,7 +8124,7 @@ static void ggml_compute_forward_acc_f32(
|
|
|
7933
8124
|
ggml_nbytes(dst));
|
|
7934
8125
|
}
|
|
7935
8126
|
|
|
7936
|
-
if (params->type ==
|
|
8127
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
7937
8128
|
return;
|
|
7938
8129
|
}
|
|
7939
8130
|
|
|
@@ -7989,14 +8180,14 @@ static void ggml_compute_forward_acc_f32(
|
|
|
7989
8180
|
|
|
7990
8181
|
static void ggml_compute_forward_acc(
|
|
7991
8182
|
const struct ggml_compute_params * params,
|
|
7992
|
-
const struct ggml_tensor * src0,
|
|
7993
|
-
const struct ggml_tensor * src1,
|
|
7994
8183
|
struct ggml_tensor * dst) {
|
|
7995
8184
|
|
|
8185
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8186
|
+
|
|
7996
8187
|
switch (src0->type) {
|
|
7997
8188
|
case GGML_TYPE_F32:
|
|
7998
8189
|
{
|
|
7999
|
-
ggml_compute_forward_acc_f32(params,
|
|
8190
|
+
ggml_compute_forward_acc_f32(params, dst);
|
|
8000
8191
|
} break;
|
|
8001
8192
|
case GGML_TYPE_F16:
|
|
8002
8193
|
case GGML_TYPE_Q4_0:
|
|
@@ -8013,6 +8204,11 @@ static void ggml_compute_forward_acc(
|
|
|
8013
8204
|
case GGML_TYPE_IQ2_XXS:
|
|
8014
8205
|
case GGML_TYPE_IQ2_XS:
|
|
8015
8206
|
case GGML_TYPE_IQ3_XXS:
|
|
8207
|
+
case GGML_TYPE_IQ1_S:
|
|
8208
|
+
case GGML_TYPE_IQ4_NL:
|
|
8209
|
+
case GGML_TYPE_IQ4_XS:
|
|
8210
|
+
case GGML_TYPE_IQ3_S:
|
|
8211
|
+
case GGML_TYPE_IQ2_S:
|
|
8016
8212
|
default:
|
|
8017
8213
|
{
|
|
8018
8214
|
GGML_ASSERT(false);
|
|
@@ -8024,13 +8220,15 @@ static void ggml_compute_forward_acc(
|
|
|
8024
8220
|
|
|
8025
8221
|
static void ggml_compute_forward_sub_f32(
|
|
8026
8222
|
const struct ggml_compute_params * params,
|
|
8027
|
-
const struct ggml_tensor * src0,
|
|
8028
|
-
const struct ggml_tensor * src1,
|
|
8029
8223
|
struct ggml_tensor * dst) {
|
|
8224
|
+
|
|
8225
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8226
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
8227
|
+
|
|
8030
8228
|
assert(params->ith == 0);
|
|
8031
8229
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
|
8032
8230
|
|
|
8033
|
-
if (params->type ==
|
|
8231
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8034
8232
|
return;
|
|
8035
8233
|
}
|
|
8036
8234
|
|
|
@@ -8084,13 +8282,14 @@ static void ggml_compute_forward_sub_f32(
|
|
|
8084
8282
|
|
|
8085
8283
|
static void ggml_compute_forward_sub(
|
|
8086
8284
|
const struct ggml_compute_params * params,
|
|
8087
|
-
const struct ggml_tensor * src0,
|
|
8088
|
-
const struct ggml_tensor * src1,
|
|
8089
8285
|
struct ggml_tensor * dst) {
|
|
8286
|
+
|
|
8287
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8288
|
+
|
|
8090
8289
|
switch (src0->type) {
|
|
8091
8290
|
case GGML_TYPE_F32:
|
|
8092
8291
|
{
|
|
8093
|
-
ggml_compute_forward_sub_f32(params,
|
|
8292
|
+
ggml_compute_forward_sub_f32(params, dst);
|
|
8094
8293
|
} break;
|
|
8095
8294
|
default:
|
|
8096
8295
|
{
|
|
@@ -8103,19 +8302,21 @@ static void ggml_compute_forward_sub(
|
|
|
8103
8302
|
|
|
8104
8303
|
static void ggml_compute_forward_mul_f32(
|
|
8105
8304
|
const struct ggml_compute_params * params,
|
|
8106
|
-
const struct ggml_tensor * src0,
|
|
8107
|
-
const struct ggml_tensor * src1,
|
|
8108
8305
|
struct ggml_tensor * dst) {
|
|
8306
|
+
|
|
8307
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8308
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
8309
|
+
|
|
8109
8310
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
8110
8311
|
|
|
8111
|
-
if (params->type ==
|
|
8312
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8112
8313
|
return;
|
|
8113
8314
|
}
|
|
8114
8315
|
const int ith = params->ith;
|
|
8115
8316
|
const int nth = params->nth;
|
|
8116
8317
|
|
|
8117
8318
|
#if defined(GGML_USE_CLBLAST)
|
|
8118
|
-
if (src1->backend ==
|
|
8319
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
|
8119
8320
|
// TODO: OpenCL kernel support full broadcast
|
|
8120
8321
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
|
8121
8322
|
if (ith == 0) {
|
|
@@ -8186,15 +8387,17 @@ static void ggml_compute_forward_mul_f32(
|
|
|
8186
8387
|
|
|
8187
8388
|
static void ggml_compute_forward_mul(
|
|
8188
8389
|
const struct ggml_compute_params * params,
|
|
8189
|
-
const struct ggml_tensor * src0,
|
|
8190
|
-
const struct ggml_tensor * src1,
|
|
8191
8390
|
struct ggml_tensor * dst) {
|
|
8391
|
+
|
|
8392
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8393
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
8394
|
+
|
|
8192
8395
|
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
|
|
8193
8396
|
|
|
8194
8397
|
switch (src0->type) {
|
|
8195
8398
|
case GGML_TYPE_F32:
|
|
8196
8399
|
{
|
|
8197
|
-
ggml_compute_forward_mul_f32(params,
|
|
8400
|
+
ggml_compute_forward_mul_f32(params, dst);
|
|
8198
8401
|
} break;
|
|
8199
8402
|
default:
|
|
8200
8403
|
{
|
|
@@ -8207,12 +8410,14 @@ static void ggml_compute_forward_mul(
|
|
|
8207
8410
|
|
|
8208
8411
|
static void ggml_compute_forward_div_f32(
|
|
8209
8412
|
const struct ggml_compute_params * params,
|
|
8210
|
-
const struct ggml_tensor * src0,
|
|
8211
|
-
const struct ggml_tensor * src1,
|
|
8212
8413
|
struct ggml_tensor * dst) {
|
|
8414
|
+
|
|
8415
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8416
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
8417
|
+
|
|
8213
8418
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
8214
8419
|
|
|
8215
|
-
if (params->type ==
|
|
8420
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8216
8421
|
return;
|
|
8217
8422
|
}
|
|
8218
8423
|
|
|
@@ -8280,13 +8485,14 @@ static void ggml_compute_forward_div_f32(
|
|
|
8280
8485
|
|
|
8281
8486
|
static void ggml_compute_forward_div(
|
|
8282
8487
|
const struct ggml_compute_params * params,
|
|
8283
|
-
const struct ggml_tensor * src0,
|
|
8284
|
-
const struct ggml_tensor * src1,
|
|
8285
8488
|
struct ggml_tensor * dst) {
|
|
8489
|
+
|
|
8490
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8491
|
+
|
|
8286
8492
|
switch (src0->type) {
|
|
8287
8493
|
case GGML_TYPE_F32:
|
|
8288
8494
|
{
|
|
8289
|
-
ggml_compute_forward_div_f32(params,
|
|
8495
|
+
ggml_compute_forward_div_f32(params, dst);
|
|
8290
8496
|
} break;
|
|
8291
8497
|
default:
|
|
8292
8498
|
{
|
|
@@ -8299,12 +8505,14 @@ static void ggml_compute_forward_div(
|
|
|
8299
8505
|
|
|
8300
8506
|
static void ggml_compute_forward_sqr_f32(
|
|
8301
8507
|
const struct ggml_compute_params * params,
|
|
8302
|
-
const struct ggml_tensor * src0,
|
|
8303
8508
|
struct ggml_tensor * dst) {
|
|
8509
|
+
|
|
8510
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8511
|
+
|
|
8304
8512
|
assert(params->ith == 0);
|
|
8305
8513
|
assert(ggml_are_same_shape(src0, dst));
|
|
8306
8514
|
|
|
8307
|
-
if (params->type ==
|
|
8515
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8308
8516
|
return;
|
|
8309
8517
|
}
|
|
8310
8518
|
|
|
@@ -8323,12 +8531,14 @@ static void ggml_compute_forward_sqr_f32(
|
|
|
8323
8531
|
|
|
8324
8532
|
static void ggml_compute_forward_sqr(
|
|
8325
8533
|
const struct ggml_compute_params * params,
|
|
8326
|
-
const struct ggml_tensor * src0,
|
|
8327
8534
|
struct ggml_tensor * dst) {
|
|
8535
|
+
|
|
8536
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8537
|
+
|
|
8328
8538
|
switch (src0->type) {
|
|
8329
8539
|
case GGML_TYPE_F32:
|
|
8330
8540
|
{
|
|
8331
|
-
ggml_compute_forward_sqr_f32(params,
|
|
8541
|
+
ggml_compute_forward_sqr_f32(params, dst);
|
|
8332
8542
|
} break;
|
|
8333
8543
|
default:
|
|
8334
8544
|
{
|
|
@@ -8341,12 +8551,14 @@ static void ggml_compute_forward_sqr(
|
|
|
8341
8551
|
|
|
8342
8552
|
static void ggml_compute_forward_sqrt_f32(
|
|
8343
8553
|
const struct ggml_compute_params * params,
|
|
8344
|
-
const struct ggml_tensor * src0,
|
|
8345
8554
|
struct ggml_tensor * dst) {
|
|
8555
|
+
|
|
8556
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8557
|
+
|
|
8346
8558
|
assert(params->ith == 0);
|
|
8347
8559
|
assert(ggml_are_same_shape(src0, dst));
|
|
8348
8560
|
|
|
8349
|
-
if (params->type ==
|
|
8561
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8350
8562
|
return;
|
|
8351
8563
|
}
|
|
8352
8564
|
|
|
@@ -8365,12 +8577,14 @@ static void ggml_compute_forward_sqrt_f32(
|
|
|
8365
8577
|
|
|
8366
8578
|
static void ggml_compute_forward_sqrt(
|
|
8367
8579
|
const struct ggml_compute_params * params,
|
|
8368
|
-
const struct ggml_tensor * src0,
|
|
8369
8580
|
struct ggml_tensor * dst) {
|
|
8581
|
+
|
|
8582
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8583
|
+
|
|
8370
8584
|
switch (src0->type) {
|
|
8371
8585
|
case GGML_TYPE_F32:
|
|
8372
8586
|
{
|
|
8373
|
-
ggml_compute_forward_sqrt_f32(params,
|
|
8587
|
+
ggml_compute_forward_sqrt_f32(params, dst);
|
|
8374
8588
|
} break;
|
|
8375
8589
|
default:
|
|
8376
8590
|
{
|
|
@@ -8383,12 +8597,14 @@ static void ggml_compute_forward_sqrt(
|
|
|
8383
8597
|
|
|
8384
8598
|
static void ggml_compute_forward_log_f32(
|
|
8385
8599
|
const struct ggml_compute_params * params,
|
|
8386
|
-
const struct ggml_tensor * src0,
|
|
8387
8600
|
struct ggml_tensor * dst) {
|
|
8601
|
+
|
|
8602
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8603
|
+
|
|
8388
8604
|
GGML_ASSERT(params->ith == 0);
|
|
8389
8605
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
8390
8606
|
|
|
8391
|
-
if (params->type ==
|
|
8607
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8392
8608
|
return;
|
|
8393
8609
|
}
|
|
8394
8610
|
|
|
@@ -8407,12 +8623,14 @@ static void ggml_compute_forward_log_f32(
|
|
|
8407
8623
|
|
|
8408
8624
|
static void ggml_compute_forward_log(
|
|
8409
8625
|
const struct ggml_compute_params * params,
|
|
8410
|
-
const struct ggml_tensor * src0,
|
|
8411
8626
|
struct ggml_tensor * dst) {
|
|
8627
|
+
|
|
8628
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8629
|
+
|
|
8412
8630
|
switch (src0->type) {
|
|
8413
8631
|
case GGML_TYPE_F32:
|
|
8414
8632
|
{
|
|
8415
|
-
ggml_compute_forward_log_f32(params,
|
|
8633
|
+
ggml_compute_forward_log_f32(params, dst);
|
|
8416
8634
|
} break;
|
|
8417
8635
|
default:
|
|
8418
8636
|
{
|
|
@@ -8425,12 +8643,14 @@ static void ggml_compute_forward_log(
|
|
|
8425
8643
|
|
|
8426
8644
|
static void ggml_compute_forward_sum_f32(
|
|
8427
8645
|
const struct ggml_compute_params * params,
|
|
8428
|
-
const struct ggml_tensor * src0,
|
|
8429
8646
|
struct ggml_tensor * dst) {
|
|
8647
|
+
|
|
8648
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8649
|
+
|
|
8430
8650
|
assert(params->ith == 0);
|
|
8431
8651
|
assert(ggml_is_scalar(dst));
|
|
8432
8652
|
|
|
8433
|
-
if (params->type ==
|
|
8653
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8434
8654
|
return;
|
|
8435
8655
|
}
|
|
8436
8656
|
|
|
@@ -8458,12 +8678,14 @@ static void ggml_compute_forward_sum_f32(
|
|
|
8458
8678
|
|
|
8459
8679
|
static void ggml_compute_forward_sum_f16(
|
|
8460
8680
|
const struct ggml_compute_params * params,
|
|
8461
|
-
const struct ggml_tensor * src0,
|
|
8462
8681
|
struct ggml_tensor * dst) {
|
|
8682
|
+
|
|
8683
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8684
|
+
|
|
8463
8685
|
assert(params->ith == 0);
|
|
8464
8686
|
assert(ggml_is_scalar(dst));
|
|
8465
8687
|
|
|
8466
|
-
if (params->type ==
|
|
8688
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8467
8689
|
return;
|
|
8468
8690
|
}
|
|
8469
8691
|
|
|
@@ -8490,16 +8712,18 @@ static void ggml_compute_forward_sum_f16(
|
|
|
8490
8712
|
|
|
8491
8713
|
static void ggml_compute_forward_sum(
|
|
8492
8714
|
const struct ggml_compute_params * params,
|
|
8493
|
-
const struct ggml_tensor * src0,
|
|
8494
8715
|
struct ggml_tensor * dst) {
|
|
8716
|
+
|
|
8717
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8718
|
+
|
|
8495
8719
|
switch (src0->type) {
|
|
8496
8720
|
case GGML_TYPE_F32:
|
|
8497
8721
|
{
|
|
8498
|
-
ggml_compute_forward_sum_f32(params,
|
|
8722
|
+
ggml_compute_forward_sum_f32(params, dst);
|
|
8499
8723
|
} break;
|
|
8500
8724
|
case GGML_TYPE_F16:
|
|
8501
8725
|
{
|
|
8502
|
-
ggml_compute_forward_sum_f16(params,
|
|
8726
|
+
ggml_compute_forward_sum_f16(params, dst);
|
|
8503
8727
|
} break;
|
|
8504
8728
|
default:
|
|
8505
8729
|
{
|
|
@@ -8512,11 +8736,13 @@ static void ggml_compute_forward_sum(
|
|
|
8512
8736
|
|
|
8513
8737
|
static void ggml_compute_forward_sum_rows_f32(
|
|
8514
8738
|
const struct ggml_compute_params * params,
|
|
8515
|
-
const struct ggml_tensor * src0,
|
|
8516
8739
|
struct ggml_tensor * dst) {
|
|
8740
|
+
|
|
8741
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8742
|
+
|
|
8517
8743
|
GGML_ASSERT(params->ith == 0);
|
|
8518
8744
|
|
|
8519
|
-
if (params->type ==
|
|
8745
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8520
8746
|
return;
|
|
8521
8747
|
}
|
|
8522
8748
|
|
|
@@ -8545,12 +8771,14 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
|
8545
8771
|
|
|
8546
8772
|
static void ggml_compute_forward_sum_rows(
|
|
8547
8773
|
const struct ggml_compute_params * params,
|
|
8548
|
-
const struct ggml_tensor * src0,
|
|
8549
8774
|
struct ggml_tensor * dst) {
|
|
8775
|
+
|
|
8776
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8777
|
+
|
|
8550
8778
|
switch (src0->type) {
|
|
8551
8779
|
case GGML_TYPE_F32:
|
|
8552
8780
|
{
|
|
8553
|
-
ggml_compute_forward_sum_rows_f32(params,
|
|
8781
|
+
ggml_compute_forward_sum_rows_f32(params, dst);
|
|
8554
8782
|
} break;
|
|
8555
8783
|
default:
|
|
8556
8784
|
{
|
|
@@ -8563,11 +8791,13 @@ static void ggml_compute_forward_sum_rows(
|
|
|
8563
8791
|
|
|
8564
8792
|
static void ggml_compute_forward_mean_f32(
|
|
8565
8793
|
const struct ggml_compute_params * params,
|
|
8566
|
-
const struct ggml_tensor * src0,
|
|
8567
8794
|
struct ggml_tensor * dst) {
|
|
8795
|
+
|
|
8796
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8797
|
+
|
|
8568
8798
|
assert(params->ith == 0);
|
|
8569
8799
|
|
|
8570
|
-
if (params->type ==
|
|
8800
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8571
8801
|
return;
|
|
8572
8802
|
}
|
|
8573
8803
|
|
|
@@ -8600,12 +8830,14 @@ static void ggml_compute_forward_mean_f32(
|
|
|
8600
8830
|
|
|
8601
8831
|
static void ggml_compute_forward_mean(
|
|
8602
8832
|
const struct ggml_compute_params * params,
|
|
8603
|
-
const struct ggml_tensor * src0,
|
|
8604
8833
|
struct ggml_tensor * dst) {
|
|
8834
|
+
|
|
8835
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8836
|
+
|
|
8605
8837
|
switch (src0->type) {
|
|
8606
8838
|
case GGML_TYPE_F32:
|
|
8607
8839
|
{
|
|
8608
|
-
ggml_compute_forward_mean_f32(params,
|
|
8840
|
+
ggml_compute_forward_mean_f32(params, dst);
|
|
8609
8841
|
} break;
|
|
8610
8842
|
default:
|
|
8611
8843
|
{
|
|
@@ -8618,11 +8850,13 @@ static void ggml_compute_forward_mean(
|
|
|
8618
8850
|
|
|
8619
8851
|
static void ggml_compute_forward_argmax_f32(
|
|
8620
8852
|
const struct ggml_compute_params * params,
|
|
8621
|
-
const struct ggml_tensor * src0,
|
|
8622
8853
|
struct ggml_tensor * dst) {
|
|
8854
|
+
|
|
8855
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8856
|
+
|
|
8623
8857
|
assert(params->ith == 0);
|
|
8624
8858
|
|
|
8625
|
-
if (params->type ==
|
|
8859
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8626
8860
|
return;
|
|
8627
8861
|
}
|
|
8628
8862
|
|
|
@@ -8646,12 +8880,14 @@ static void ggml_compute_forward_argmax_f32(
|
|
|
8646
8880
|
|
|
8647
8881
|
static void ggml_compute_forward_argmax(
|
|
8648
8882
|
const struct ggml_compute_params * params,
|
|
8649
|
-
const struct ggml_tensor * src0,
|
|
8650
8883
|
struct ggml_tensor * dst) {
|
|
8884
|
+
|
|
8885
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8886
|
+
|
|
8651
8887
|
switch (src0->type) {
|
|
8652
8888
|
case GGML_TYPE_F32:
|
|
8653
8889
|
{
|
|
8654
|
-
ggml_compute_forward_argmax_f32(params,
|
|
8890
|
+
ggml_compute_forward_argmax_f32(params, dst);
|
|
8655
8891
|
} break;
|
|
8656
8892
|
default:
|
|
8657
8893
|
{
|
|
@@ -8664,12 +8900,14 @@ static void ggml_compute_forward_argmax(
|
|
|
8664
8900
|
|
|
8665
8901
|
static void ggml_compute_forward_repeat_f32(
|
|
8666
8902
|
const struct ggml_compute_params * params,
|
|
8667
|
-
const struct ggml_tensor * src0,
|
|
8668
8903
|
struct ggml_tensor * dst) {
|
|
8904
|
+
|
|
8905
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8906
|
+
|
|
8669
8907
|
GGML_ASSERT(params->ith == 0);
|
|
8670
8908
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
|
8671
8909
|
|
|
8672
|
-
if (params->type ==
|
|
8910
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8673
8911
|
return;
|
|
8674
8912
|
}
|
|
8675
8913
|
|
|
@@ -8707,12 +8945,14 @@ static void ggml_compute_forward_repeat_f32(
|
|
|
8707
8945
|
|
|
8708
8946
|
static void ggml_compute_forward_repeat_f16(
|
|
8709
8947
|
const struct ggml_compute_params * params,
|
|
8710
|
-
const struct ggml_tensor * src0,
|
|
8711
8948
|
struct ggml_tensor * dst) {
|
|
8949
|
+
|
|
8950
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8951
|
+
|
|
8712
8952
|
GGML_ASSERT(params->ith == 0);
|
|
8713
8953
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
|
8714
8954
|
|
|
8715
|
-
if (params->type ==
|
|
8955
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8716
8956
|
return;
|
|
8717
8957
|
}
|
|
8718
8958
|
|
|
@@ -8753,18 +8993,20 @@ static void ggml_compute_forward_repeat_f16(
|
|
|
8753
8993
|
|
|
8754
8994
|
static void ggml_compute_forward_repeat(
|
|
8755
8995
|
const struct ggml_compute_params * params,
|
|
8756
|
-
const struct ggml_tensor * src0,
|
|
8757
8996
|
struct ggml_tensor * dst) {
|
|
8997
|
+
|
|
8998
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8999
|
+
|
|
8758
9000
|
switch (src0->type) {
|
|
8759
9001
|
case GGML_TYPE_F16:
|
|
8760
9002
|
case GGML_TYPE_I16:
|
|
8761
9003
|
{
|
|
8762
|
-
ggml_compute_forward_repeat_f16(params,
|
|
9004
|
+
ggml_compute_forward_repeat_f16(params, dst);
|
|
8763
9005
|
} break;
|
|
8764
9006
|
case GGML_TYPE_F32:
|
|
8765
9007
|
case GGML_TYPE_I32:
|
|
8766
9008
|
{
|
|
8767
|
-
ggml_compute_forward_repeat_f32(params,
|
|
9009
|
+
ggml_compute_forward_repeat_f32(params, dst);
|
|
8768
9010
|
} break;
|
|
8769
9011
|
default:
|
|
8770
9012
|
{
|
|
@@ -8777,12 +9019,14 @@ static void ggml_compute_forward_repeat(
|
|
|
8777
9019
|
|
|
8778
9020
|
static void ggml_compute_forward_repeat_back_f32(
|
|
8779
9021
|
const struct ggml_compute_params * params,
|
|
8780
|
-
const struct ggml_tensor * src0,
|
|
8781
9022
|
struct ggml_tensor * dst) {
|
|
9023
|
+
|
|
9024
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9025
|
+
|
|
8782
9026
|
GGML_ASSERT(params->ith == 0);
|
|
8783
9027
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
|
8784
9028
|
|
|
8785
|
-
if (params->type ==
|
|
9029
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8786
9030
|
return;
|
|
8787
9031
|
}
|
|
8788
9032
|
|
|
@@ -8834,12 +9078,14 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
|
8834
9078
|
|
|
8835
9079
|
static void ggml_compute_forward_repeat_back(
|
|
8836
9080
|
const struct ggml_compute_params * params,
|
|
8837
|
-
const struct ggml_tensor * src0,
|
|
8838
9081
|
struct ggml_tensor * dst) {
|
|
9082
|
+
|
|
9083
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9084
|
+
|
|
8839
9085
|
switch (src0->type) {
|
|
8840
9086
|
case GGML_TYPE_F32:
|
|
8841
9087
|
{
|
|
8842
|
-
ggml_compute_forward_repeat_back_f32(params,
|
|
9088
|
+
ggml_compute_forward_repeat_back_f32(params, dst);
|
|
8843
9089
|
} break;
|
|
8844
9090
|
default:
|
|
8845
9091
|
{
|
|
@@ -8852,11 +9098,12 @@ static void ggml_compute_forward_repeat_back(
|
|
|
8852
9098
|
|
|
8853
9099
|
static void ggml_compute_forward_concat_f32(
|
|
8854
9100
|
const struct ggml_compute_params * params,
|
|
8855
|
-
const struct ggml_tensor * src0,
|
|
8856
|
-
const struct ggml_tensor * src1,
|
|
8857
9101
|
struct ggml_tensor * dst) {
|
|
8858
9102
|
|
|
8859
|
-
|
|
9103
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9104
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
9105
|
+
|
|
9106
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8860
9107
|
return;
|
|
8861
9108
|
}
|
|
8862
9109
|
|
|
@@ -8900,14 +9147,15 @@ static void ggml_compute_forward_concat_f32(
|
|
|
8900
9147
|
|
|
8901
9148
|
static void ggml_compute_forward_concat(
|
|
8902
9149
|
const struct ggml_compute_params* params,
|
|
8903
|
-
const struct ggml_tensor* src0,
|
|
8904
|
-
const struct ggml_tensor* src1,
|
|
8905
9150
|
struct ggml_tensor* dst) {
|
|
9151
|
+
|
|
9152
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9153
|
+
|
|
8906
9154
|
switch (src0->type) {
|
|
8907
9155
|
case GGML_TYPE_F32:
|
|
8908
9156
|
case GGML_TYPE_I32:
|
|
8909
9157
|
{
|
|
8910
|
-
ggml_compute_forward_concat_f32(params,
|
|
9158
|
+
ggml_compute_forward_concat_f32(params, dst);
|
|
8911
9159
|
} break;
|
|
8912
9160
|
default:
|
|
8913
9161
|
{
|
|
@@ -8920,12 +9168,14 @@ static void ggml_compute_forward_concat(
|
|
|
8920
9168
|
|
|
8921
9169
|
static void ggml_compute_forward_abs_f32(
|
|
8922
9170
|
const struct ggml_compute_params * params,
|
|
8923
|
-
const struct ggml_tensor * src0,
|
|
8924
9171
|
struct ggml_tensor * dst) {
|
|
9172
|
+
|
|
9173
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9174
|
+
|
|
8925
9175
|
assert(params->ith == 0);
|
|
8926
9176
|
assert(ggml_are_same_shape(src0, dst));
|
|
8927
9177
|
|
|
8928
|
-
if (params->type ==
|
|
9178
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8929
9179
|
return;
|
|
8930
9180
|
}
|
|
8931
9181
|
|
|
@@ -8944,12 +9194,14 @@ static void ggml_compute_forward_abs_f32(
|
|
|
8944
9194
|
|
|
8945
9195
|
static void ggml_compute_forward_abs(
|
|
8946
9196
|
const struct ggml_compute_params * params,
|
|
8947
|
-
const struct ggml_tensor * src0,
|
|
8948
9197
|
struct ggml_tensor * dst) {
|
|
9198
|
+
|
|
9199
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9200
|
+
|
|
8949
9201
|
switch (src0->type) {
|
|
8950
9202
|
case GGML_TYPE_F32:
|
|
8951
9203
|
{
|
|
8952
|
-
ggml_compute_forward_abs_f32(params,
|
|
9204
|
+
ggml_compute_forward_abs_f32(params, dst);
|
|
8953
9205
|
} break;
|
|
8954
9206
|
default:
|
|
8955
9207
|
{
|
|
@@ -8962,12 +9214,14 @@ static void ggml_compute_forward_abs(
|
|
|
8962
9214
|
|
|
8963
9215
|
static void ggml_compute_forward_sgn_f32(
|
|
8964
9216
|
const struct ggml_compute_params * params,
|
|
8965
|
-
const struct ggml_tensor * src0,
|
|
8966
9217
|
struct ggml_tensor * dst) {
|
|
9218
|
+
|
|
9219
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9220
|
+
|
|
8967
9221
|
assert(params->ith == 0);
|
|
8968
9222
|
assert(ggml_are_same_shape(src0, dst));
|
|
8969
9223
|
|
|
8970
|
-
if (params->type ==
|
|
9224
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
8971
9225
|
return;
|
|
8972
9226
|
}
|
|
8973
9227
|
|
|
@@ -8986,12 +9240,14 @@ static void ggml_compute_forward_sgn_f32(
|
|
|
8986
9240
|
|
|
8987
9241
|
static void ggml_compute_forward_sgn(
|
|
8988
9242
|
const struct ggml_compute_params * params,
|
|
8989
|
-
const struct ggml_tensor * src0,
|
|
8990
9243
|
struct ggml_tensor * dst) {
|
|
9244
|
+
|
|
9245
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9246
|
+
|
|
8991
9247
|
switch (src0->type) {
|
|
8992
9248
|
case GGML_TYPE_F32:
|
|
8993
9249
|
{
|
|
8994
|
-
ggml_compute_forward_sgn_f32(params,
|
|
9250
|
+
ggml_compute_forward_sgn_f32(params, dst);
|
|
8995
9251
|
} break;
|
|
8996
9252
|
default:
|
|
8997
9253
|
{
|
|
@@ -9004,12 +9260,14 @@ static void ggml_compute_forward_sgn(
|
|
|
9004
9260
|
|
|
9005
9261
|
static void ggml_compute_forward_neg_f32(
|
|
9006
9262
|
const struct ggml_compute_params * params,
|
|
9007
|
-
const struct ggml_tensor * src0,
|
|
9008
9263
|
struct ggml_tensor * dst) {
|
|
9264
|
+
|
|
9265
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9266
|
+
|
|
9009
9267
|
assert(params->ith == 0);
|
|
9010
9268
|
assert(ggml_are_same_shape(src0, dst));
|
|
9011
9269
|
|
|
9012
|
-
if (params->type ==
|
|
9270
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9013
9271
|
return;
|
|
9014
9272
|
}
|
|
9015
9273
|
|
|
@@ -9028,12 +9286,14 @@ static void ggml_compute_forward_neg_f32(
|
|
|
9028
9286
|
|
|
9029
9287
|
static void ggml_compute_forward_neg(
|
|
9030
9288
|
const struct ggml_compute_params * params,
|
|
9031
|
-
const struct ggml_tensor * src0,
|
|
9032
9289
|
struct ggml_tensor * dst) {
|
|
9290
|
+
|
|
9291
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9292
|
+
|
|
9033
9293
|
switch (src0->type) {
|
|
9034
9294
|
case GGML_TYPE_F32:
|
|
9035
9295
|
{
|
|
9036
|
-
ggml_compute_forward_neg_f32(params,
|
|
9296
|
+
ggml_compute_forward_neg_f32(params, dst);
|
|
9037
9297
|
} break;
|
|
9038
9298
|
default:
|
|
9039
9299
|
{
|
|
@@ -9046,12 +9306,14 @@ static void ggml_compute_forward_neg(
|
|
|
9046
9306
|
|
|
9047
9307
|
static void ggml_compute_forward_step_f32(
|
|
9048
9308
|
const struct ggml_compute_params * params,
|
|
9049
|
-
const struct ggml_tensor * src0,
|
|
9050
9309
|
struct ggml_tensor * dst) {
|
|
9310
|
+
|
|
9311
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9312
|
+
|
|
9051
9313
|
assert(params->ith == 0);
|
|
9052
9314
|
assert(ggml_are_same_shape(src0, dst));
|
|
9053
9315
|
|
|
9054
|
-
if (params->type ==
|
|
9316
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9055
9317
|
return;
|
|
9056
9318
|
}
|
|
9057
9319
|
|
|
@@ -9070,12 +9332,14 @@ static void ggml_compute_forward_step_f32(
|
|
|
9070
9332
|
|
|
9071
9333
|
static void ggml_compute_forward_step(
|
|
9072
9334
|
const struct ggml_compute_params * params,
|
|
9073
|
-
const struct ggml_tensor * src0,
|
|
9074
9335
|
struct ggml_tensor * dst) {
|
|
9336
|
+
|
|
9337
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9338
|
+
|
|
9075
9339
|
switch (src0->type) {
|
|
9076
9340
|
case GGML_TYPE_F32:
|
|
9077
9341
|
{
|
|
9078
|
-
ggml_compute_forward_step_f32(params,
|
|
9342
|
+
ggml_compute_forward_step_f32(params, dst);
|
|
9079
9343
|
} break;
|
|
9080
9344
|
default:
|
|
9081
9345
|
{
|
|
@@ -9088,12 +9352,14 @@ static void ggml_compute_forward_step(
|
|
|
9088
9352
|
|
|
9089
9353
|
static void ggml_compute_forward_tanh_f32(
|
|
9090
9354
|
const struct ggml_compute_params * params,
|
|
9091
|
-
const struct ggml_tensor * src0,
|
|
9092
9355
|
struct ggml_tensor * dst) {
|
|
9356
|
+
|
|
9357
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9358
|
+
|
|
9093
9359
|
assert(params->ith == 0);
|
|
9094
9360
|
assert(ggml_are_same_shape(src0, dst));
|
|
9095
9361
|
|
|
9096
|
-
if (params->type ==
|
|
9362
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9097
9363
|
return;
|
|
9098
9364
|
}
|
|
9099
9365
|
|
|
@@ -9112,12 +9378,14 @@ static void ggml_compute_forward_tanh_f32(
|
|
|
9112
9378
|
|
|
9113
9379
|
static void ggml_compute_forward_tanh(
|
|
9114
9380
|
const struct ggml_compute_params * params,
|
|
9115
|
-
const struct ggml_tensor * src0,
|
|
9116
9381
|
struct ggml_tensor * dst) {
|
|
9382
|
+
|
|
9383
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9384
|
+
|
|
9117
9385
|
switch (src0->type) {
|
|
9118
9386
|
case GGML_TYPE_F32:
|
|
9119
9387
|
{
|
|
9120
|
-
ggml_compute_forward_tanh_f32(params,
|
|
9388
|
+
ggml_compute_forward_tanh_f32(params, dst);
|
|
9121
9389
|
} break;
|
|
9122
9390
|
default:
|
|
9123
9391
|
{
|
|
@@ -9130,12 +9398,14 @@ static void ggml_compute_forward_tanh(
|
|
|
9130
9398
|
|
|
9131
9399
|
static void ggml_compute_forward_elu_f32(
|
|
9132
9400
|
const struct ggml_compute_params * params,
|
|
9133
|
-
const struct ggml_tensor * src0,
|
|
9134
9401
|
struct ggml_tensor * dst) {
|
|
9402
|
+
|
|
9403
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9404
|
+
|
|
9135
9405
|
assert(params->ith == 0);
|
|
9136
9406
|
assert(ggml_are_same_shape(src0, dst));
|
|
9137
9407
|
|
|
9138
|
-
if (params->type ==
|
|
9408
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9139
9409
|
return;
|
|
9140
9410
|
}
|
|
9141
9411
|
|
|
@@ -9154,12 +9424,14 @@ static void ggml_compute_forward_elu_f32(
|
|
|
9154
9424
|
|
|
9155
9425
|
static void ggml_compute_forward_elu(
|
|
9156
9426
|
const struct ggml_compute_params * params,
|
|
9157
|
-
const struct ggml_tensor * src0,
|
|
9158
9427
|
struct ggml_tensor * dst) {
|
|
9428
|
+
|
|
9429
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9430
|
+
|
|
9159
9431
|
switch (src0->type) {
|
|
9160
9432
|
case GGML_TYPE_F32:
|
|
9161
9433
|
{
|
|
9162
|
-
ggml_compute_forward_elu_f32(params,
|
|
9434
|
+
ggml_compute_forward_elu_f32(params, dst);
|
|
9163
9435
|
} break;
|
|
9164
9436
|
default:
|
|
9165
9437
|
{
|
|
@@ -9172,12 +9444,14 @@ static void ggml_compute_forward_elu(
|
|
|
9172
9444
|
|
|
9173
9445
|
static void ggml_compute_forward_relu_f32(
|
|
9174
9446
|
const struct ggml_compute_params * params,
|
|
9175
|
-
const struct ggml_tensor * src0,
|
|
9176
9447
|
struct ggml_tensor * dst) {
|
|
9448
|
+
|
|
9449
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9450
|
+
|
|
9177
9451
|
assert(params->ith == 0);
|
|
9178
9452
|
assert(ggml_are_same_shape(src0, dst));
|
|
9179
9453
|
|
|
9180
|
-
if (params->type ==
|
|
9454
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9181
9455
|
return;
|
|
9182
9456
|
}
|
|
9183
9457
|
|
|
@@ -9196,12 +9470,14 @@ static void ggml_compute_forward_relu_f32(
|
|
|
9196
9470
|
|
|
9197
9471
|
static void ggml_compute_forward_relu(
|
|
9198
9472
|
const struct ggml_compute_params * params,
|
|
9199
|
-
const struct ggml_tensor * src0,
|
|
9200
9473
|
struct ggml_tensor * dst) {
|
|
9474
|
+
|
|
9475
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9476
|
+
|
|
9201
9477
|
switch (src0->type) {
|
|
9202
9478
|
case GGML_TYPE_F32:
|
|
9203
9479
|
{
|
|
9204
|
-
ggml_compute_forward_relu_f32(params,
|
|
9480
|
+
ggml_compute_forward_relu_f32(params, dst);
|
|
9205
9481
|
} break;
|
|
9206
9482
|
default:
|
|
9207
9483
|
{
|
|
@@ -9214,13 +9490,15 @@ static void ggml_compute_forward_relu(
|
|
|
9214
9490
|
|
|
9215
9491
|
static void ggml_compute_forward_gelu_f32(
|
|
9216
9492
|
const struct ggml_compute_params * params,
|
|
9217
|
-
const struct ggml_tensor * src0,
|
|
9218
9493
|
struct ggml_tensor * dst) {
|
|
9219
|
-
|
|
9494
|
+
|
|
9495
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9496
|
+
|
|
9497
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
|
9220
9498
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
|
9221
9499
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9222
9500
|
|
|
9223
|
-
if (params->type ==
|
|
9501
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9224
9502
|
return;
|
|
9225
9503
|
}
|
|
9226
9504
|
|
|
@@ -9255,12 +9533,14 @@ static void ggml_compute_forward_gelu_f32(
|
|
|
9255
9533
|
|
|
9256
9534
|
static void ggml_compute_forward_gelu(
|
|
9257
9535
|
const struct ggml_compute_params * params,
|
|
9258
|
-
const struct ggml_tensor * src0,
|
|
9259
9536
|
struct ggml_tensor * dst) {
|
|
9537
|
+
|
|
9538
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9539
|
+
|
|
9260
9540
|
switch (src0->type) {
|
|
9261
9541
|
case GGML_TYPE_F32:
|
|
9262
9542
|
{
|
|
9263
|
-
ggml_compute_forward_gelu_f32(params,
|
|
9543
|
+
ggml_compute_forward_gelu_f32(params, dst);
|
|
9264
9544
|
} break;
|
|
9265
9545
|
default:
|
|
9266
9546
|
{
|
|
@@ -9273,13 +9553,15 @@ static void ggml_compute_forward_gelu(
|
|
|
9273
9553
|
|
|
9274
9554
|
static void ggml_compute_forward_gelu_quick_f32(
|
|
9275
9555
|
const struct ggml_compute_params * params,
|
|
9276
|
-
const struct ggml_tensor * src0,
|
|
9277
9556
|
struct ggml_tensor * dst) {
|
|
9557
|
+
|
|
9558
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9559
|
+
|
|
9278
9560
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
|
9279
9561
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
|
9280
9562
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9281
9563
|
|
|
9282
|
-
if (params->type ==
|
|
9564
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9283
9565
|
return;
|
|
9284
9566
|
}
|
|
9285
9567
|
|
|
@@ -9314,12 +9596,14 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
|
9314
9596
|
|
|
9315
9597
|
static void ggml_compute_forward_gelu_quick(
|
|
9316
9598
|
const struct ggml_compute_params * params,
|
|
9317
|
-
const struct ggml_tensor * src0,
|
|
9318
9599
|
struct ggml_tensor * dst) {
|
|
9600
|
+
|
|
9601
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9602
|
+
|
|
9319
9603
|
switch (src0->type) {
|
|
9320
9604
|
case GGML_TYPE_F32:
|
|
9321
9605
|
{
|
|
9322
|
-
ggml_compute_forward_gelu_quick_f32(params,
|
|
9606
|
+
ggml_compute_forward_gelu_quick_f32(params, dst);
|
|
9323
9607
|
} break;
|
|
9324
9608
|
default:
|
|
9325
9609
|
{
|
|
@@ -9332,13 +9616,15 @@ static void ggml_compute_forward_gelu_quick(
|
|
|
9332
9616
|
|
|
9333
9617
|
static void ggml_compute_forward_silu_f32(
|
|
9334
9618
|
const struct ggml_compute_params * params,
|
|
9335
|
-
const struct ggml_tensor * src0,
|
|
9336
9619
|
struct ggml_tensor * dst) {
|
|
9620
|
+
|
|
9621
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9622
|
+
|
|
9337
9623
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
|
9338
9624
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
|
9339
9625
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9340
9626
|
|
|
9341
|
-
if (params->type ==
|
|
9627
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9342
9628
|
return;
|
|
9343
9629
|
}
|
|
9344
9630
|
|
|
@@ -9373,12 +9659,14 @@ static void ggml_compute_forward_silu_f32(
|
|
|
9373
9659
|
|
|
9374
9660
|
static void ggml_compute_forward_silu(
|
|
9375
9661
|
const struct ggml_compute_params * params,
|
|
9376
|
-
const struct ggml_tensor * src0,
|
|
9377
9662
|
struct ggml_tensor * dst) {
|
|
9663
|
+
|
|
9664
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9665
|
+
|
|
9378
9666
|
switch (src0->type) {
|
|
9379
9667
|
case GGML_TYPE_F32:
|
|
9380
9668
|
{
|
|
9381
|
-
ggml_compute_forward_silu_f32(params,
|
|
9669
|
+
ggml_compute_forward_silu_f32(params, dst);
|
|
9382
9670
|
} break;
|
|
9383
9671
|
default:
|
|
9384
9672
|
{
|
|
@@ -9390,12 +9678,14 @@ static void ggml_compute_forward_silu(
|
|
|
9390
9678
|
|
|
9391
9679
|
static void ggml_compute_forward_leaky_relu_f32(
|
|
9392
9680
|
const struct ggml_compute_params * params,
|
|
9393
|
-
const struct ggml_tensor * src0,
|
|
9394
9681
|
struct ggml_tensor * dst) {
|
|
9682
|
+
|
|
9683
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9684
|
+
|
|
9395
9685
|
assert(params->ith == 0);
|
|
9396
9686
|
assert(ggml_are_same_shape(src0, dst));
|
|
9397
9687
|
|
|
9398
|
-
if (params->type ==
|
|
9688
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9399
9689
|
return;
|
|
9400
9690
|
}
|
|
9401
9691
|
|
|
@@ -9417,12 +9707,14 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
|
9417
9707
|
|
|
9418
9708
|
static void ggml_compute_forward_leaky_relu(
|
|
9419
9709
|
const struct ggml_compute_params * params,
|
|
9420
|
-
const struct ggml_tensor * src0,
|
|
9421
9710
|
struct ggml_tensor * dst) {
|
|
9711
|
+
|
|
9712
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9713
|
+
|
|
9422
9714
|
switch (src0->type) {
|
|
9423
9715
|
case GGML_TYPE_F32:
|
|
9424
9716
|
{
|
|
9425
|
-
ggml_compute_forward_leaky_relu_f32(params,
|
|
9717
|
+
ggml_compute_forward_leaky_relu_f32(params, dst);
|
|
9426
9718
|
} break;
|
|
9427
9719
|
default:
|
|
9428
9720
|
{
|
|
@@ -9435,16 +9727,18 @@ static void ggml_compute_forward_leaky_relu(
|
|
|
9435
9727
|
|
|
9436
9728
|
static void ggml_compute_forward_silu_back_f32(
|
|
9437
9729
|
const struct ggml_compute_params * params,
|
|
9438
|
-
const struct ggml_tensor * src0,
|
|
9439
|
-
const struct ggml_tensor * grad,
|
|
9440
9730
|
struct ggml_tensor * dst) {
|
|
9731
|
+
|
|
9732
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9733
|
+
const struct ggml_tensor * grad = dst->src[1];
|
|
9734
|
+
|
|
9441
9735
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
|
9442
9736
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
|
9443
9737
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
|
9444
9738
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9445
9739
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
|
9446
9740
|
|
|
9447
|
-
if (params->type ==
|
|
9741
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9448
9742
|
return;
|
|
9449
9743
|
}
|
|
9450
9744
|
|
|
@@ -9480,13 +9774,14 @@ static void ggml_compute_forward_silu_back_f32(
|
|
|
9480
9774
|
|
|
9481
9775
|
static void ggml_compute_forward_silu_back(
|
|
9482
9776
|
const struct ggml_compute_params * params,
|
|
9483
|
-
const struct ggml_tensor * src0,
|
|
9484
|
-
const struct ggml_tensor * grad,
|
|
9485
9777
|
struct ggml_tensor * dst) {
|
|
9778
|
+
|
|
9779
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9780
|
+
|
|
9486
9781
|
switch (src0->type) {
|
|
9487
9782
|
case GGML_TYPE_F32:
|
|
9488
9783
|
{
|
|
9489
|
-
ggml_compute_forward_silu_back_f32(params,
|
|
9784
|
+
ggml_compute_forward_silu_back_f32(params, dst);
|
|
9490
9785
|
} break;
|
|
9491
9786
|
default:
|
|
9492
9787
|
{
|
|
@@ -9498,12 +9793,14 @@ static void ggml_compute_forward_silu_back(
|
|
|
9498
9793
|
|
|
9499
9794
|
static void ggml_compute_forward_hardswish_f32(
|
|
9500
9795
|
const struct ggml_compute_params * params,
|
|
9501
|
-
const struct ggml_tensor * src0,
|
|
9502
9796
|
struct ggml_tensor * dst) {
|
|
9797
|
+
|
|
9798
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9799
|
+
|
|
9503
9800
|
assert(params->ith == 0);
|
|
9504
9801
|
assert(ggml_are_same_shape(src0, dst));
|
|
9505
9802
|
|
|
9506
|
-
if (params->type ==
|
|
9803
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9507
9804
|
return;
|
|
9508
9805
|
}
|
|
9509
9806
|
|
|
@@ -9521,12 +9818,14 @@ static void ggml_compute_forward_hardswish_f32(
|
|
|
9521
9818
|
}
|
|
9522
9819
|
static void ggml_compute_forward_hardswish(
|
|
9523
9820
|
const struct ggml_compute_params * params,
|
|
9524
|
-
const struct ggml_tensor * src0,
|
|
9525
9821
|
struct ggml_tensor * dst) {
|
|
9822
|
+
|
|
9823
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9824
|
+
|
|
9526
9825
|
switch (src0->type) {
|
|
9527
9826
|
case GGML_TYPE_F32:
|
|
9528
9827
|
{
|
|
9529
|
-
ggml_compute_forward_hardswish_f32(params,
|
|
9828
|
+
ggml_compute_forward_hardswish_f32(params, dst);
|
|
9530
9829
|
} break;
|
|
9531
9830
|
default:
|
|
9532
9831
|
{
|
|
@@ -9537,12 +9836,14 @@ static void ggml_compute_forward_hardswish(
|
|
|
9537
9836
|
|
|
9538
9837
|
static void ggml_compute_forward_hardsigmoid_f32(
|
|
9539
9838
|
const struct ggml_compute_params * params,
|
|
9540
|
-
const struct ggml_tensor * src0,
|
|
9541
9839
|
struct ggml_tensor * dst) {
|
|
9840
|
+
|
|
9841
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9842
|
+
|
|
9542
9843
|
assert(params->ith == 0);
|
|
9543
9844
|
assert(ggml_are_same_shape(src0, dst));
|
|
9544
9845
|
|
|
9545
|
-
if (params->type ==
|
|
9846
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9546
9847
|
return;
|
|
9547
9848
|
}
|
|
9548
9849
|
|
|
@@ -9561,12 +9862,14 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
|
9561
9862
|
|
|
9562
9863
|
static void ggml_compute_forward_hardsigmoid(
|
|
9563
9864
|
const struct ggml_compute_params * params,
|
|
9564
|
-
const struct ggml_tensor * src0,
|
|
9565
9865
|
struct ggml_tensor * dst) {
|
|
9866
|
+
|
|
9867
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9868
|
+
|
|
9566
9869
|
switch (src0->type) {
|
|
9567
9870
|
case GGML_TYPE_F32:
|
|
9568
9871
|
{
|
|
9569
|
-
ggml_compute_forward_hardsigmoid_f32(params,
|
|
9872
|
+
ggml_compute_forward_hardsigmoid_f32(params, dst);
|
|
9570
9873
|
} break;
|
|
9571
9874
|
default:
|
|
9572
9875
|
{
|
|
@@ -9580,11 +9883,13 @@ static void ggml_compute_forward_hardsigmoid(
|
|
|
9580
9883
|
|
|
9581
9884
|
static void ggml_compute_forward_norm_f32(
|
|
9582
9885
|
const struct ggml_compute_params * params,
|
|
9583
|
-
const struct ggml_tensor * src0,
|
|
9584
9886
|
struct ggml_tensor * dst) {
|
|
9887
|
+
|
|
9888
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9889
|
+
|
|
9585
9890
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9586
9891
|
|
|
9587
|
-
if (params->type ==
|
|
9892
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9588
9893
|
return;
|
|
9589
9894
|
}
|
|
9590
9895
|
|
|
@@ -9633,12 +9938,14 @@ static void ggml_compute_forward_norm_f32(
|
|
|
9633
9938
|
|
|
9634
9939
|
static void ggml_compute_forward_norm(
|
|
9635
9940
|
const struct ggml_compute_params * params,
|
|
9636
|
-
const struct ggml_tensor * src0,
|
|
9637
9941
|
struct ggml_tensor * dst) {
|
|
9942
|
+
|
|
9943
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9944
|
+
|
|
9638
9945
|
switch (src0->type) {
|
|
9639
9946
|
case GGML_TYPE_F32:
|
|
9640
9947
|
{
|
|
9641
|
-
ggml_compute_forward_norm_f32(params,
|
|
9948
|
+
ggml_compute_forward_norm_f32(params, dst);
|
|
9642
9949
|
} break;
|
|
9643
9950
|
default:
|
|
9644
9951
|
{
|
|
@@ -9651,11 +9958,13 @@ static void ggml_compute_forward_norm(
|
|
|
9651
9958
|
|
|
9652
9959
|
static void ggml_compute_forward_rms_norm_f32(
|
|
9653
9960
|
const struct ggml_compute_params * params,
|
|
9654
|
-
const struct ggml_tensor * src0,
|
|
9655
9961
|
struct ggml_tensor * dst) {
|
|
9962
|
+
|
|
9963
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
9964
|
+
|
|
9656
9965
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9657
9966
|
|
|
9658
|
-
if (params->type ==
|
|
9967
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9659
9968
|
return;
|
|
9660
9969
|
}
|
|
9661
9970
|
|
|
@@ -9701,12 +10010,14 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
|
9701
10010
|
|
|
9702
10011
|
static void ggml_compute_forward_rms_norm(
|
|
9703
10012
|
const struct ggml_compute_params * params,
|
|
9704
|
-
const struct ggml_tensor * src0,
|
|
9705
10013
|
struct ggml_tensor * dst) {
|
|
10014
|
+
|
|
10015
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10016
|
+
|
|
9706
10017
|
switch (src0->type) {
|
|
9707
10018
|
case GGML_TYPE_F32:
|
|
9708
10019
|
{
|
|
9709
|
-
ggml_compute_forward_rms_norm_f32(params,
|
|
10020
|
+
ggml_compute_forward_rms_norm_f32(params, dst);
|
|
9710
10021
|
} break;
|
|
9711
10022
|
default:
|
|
9712
10023
|
{
|
|
@@ -9717,12 +10028,14 @@ static void ggml_compute_forward_rms_norm(
|
|
|
9717
10028
|
|
|
9718
10029
|
static void ggml_compute_forward_rms_norm_back_f32(
|
|
9719
10030
|
const struct ggml_compute_params * params,
|
|
9720
|
-
const struct ggml_tensor * src0,
|
|
9721
|
-
const struct ggml_tensor * src1,
|
|
9722
10031
|
struct ggml_tensor * dst) {
|
|
10032
|
+
|
|
10033
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10034
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
10035
|
+
|
|
9723
10036
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
|
9724
10037
|
|
|
9725
|
-
if (params->type ==
|
|
10038
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9726
10039
|
return;
|
|
9727
10040
|
}
|
|
9728
10041
|
|
|
@@ -9874,13 +10187,14 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
|
9874
10187
|
|
|
9875
10188
|
static void ggml_compute_forward_rms_norm_back(
|
|
9876
10189
|
const struct ggml_compute_params * params,
|
|
9877
|
-
const struct ggml_tensor * src0,
|
|
9878
|
-
const struct ggml_tensor * src1,
|
|
9879
10190
|
struct ggml_tensor * dst) {
|
|
10191
|
+
|
|
10192
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10193
|
+
|
|
9880
10194
|
switch (src0->type) {
|
|
9881
10195
|
case GGML_TYPE_F32:
|
|
9882
10196
|
{
|
|
9883
|
-
ggml_compute_forward_rms_norm_back_f32(params,
|
|
10197
|
+
ggml_compute_forward_rms_norm_back_f32(params, dst);
|
|
9884
10198
|
} break;
|
|
9885
10199
|
default:
|
|
9886
10200
|
{
|
|
@@ -9893,11 +10207,13 @@ static void ggml_compute_forward_rms_norm_back(
|
|
|
9893
10207
|
|
|
9894
10208
|
static void ggml_compute_forward_group_norm_f32(
|
|
9895
10209
|
const struct ggml_compute_params * params,
|
|
9896
|
-
const struct ggml_tensor * src0,
|
|
9897
10210
|
struct ggml_tensor * dst) {
|
|
10211
|
+
|
|
10212
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10213
|
+
|
|
9898
10214
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
9899
10215
|
|
|
9900
|
-
if (params->type ==
|
|
10216
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
9901
10217
|
return;
|
|
9902
10218
|
}
|
|
9903
10219
|
|
|
@@ -9965,12 +10281,14 @@ static void ggml_compute_forward_group_norm_f32(
|
|
|
9965
10281
|
|
|
9966
10282
|
static void ggml_compute_forward_group_norm(
|
|
9967
10283
|
const struct ggml_compute_params * params,
|
|
9968
|
-
const struct ggml_tensor * src0,
|
|
9969
10284
|
struct ggml_tensor * dst) {
|
|
10285
|
+
|
|
10286
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10287
|
+
|
|
9970
10288
|
switch (src0->type) {
|
|
9971
10289
|
case GGML_TYPE_F32:
|
|
9972
10290
|
{
|
|
9973
|
-
ggml_compute_forward_group_norm_f32(params,
|
|
10291
|
+
ggml_compute_forward_group_norm_f32(params, dst);
|
|
9974
10292
|
} break;
|
|
9975
10293
|
default:
|
|
9976
10294
|
{
|
|
@@ -10016,9 +10334,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
|
|
|
10016
10334
|
|
|
10017
10335
|
static void ggml_compute_forward_mul_mat(
|
|
10018
10336
|
const struct ggml_compute_params * params,
|
|
10019
|
-
const struct ggml_tensor * src0,
|
|
10020
|
-
const struct ggml_tensor * src1,
|
|
10021
10337
|
struct ggml_tensor * dst) {
|
|
10338
|
+
|
|
10339
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10340
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
10341
|
+
|
|
10022
10342
|
int64_t t0 = ggml_perf_time_us();
|
|
10023
10343
|
UNUSED(t0);
|
|
10024
10344
|
|
|
@@ -10060,7 +10380,7 @@ static void ggml_compute_forward_mul_mat(
|
|
|
10060
10380
|
|
|
10061
10381
|
#if defined(GGML_USE_CLBLAST)
|
|
10062
10382
|
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
|
10063
|
-
if (params->ith == 0 && params->type ==
|
|
10383
|
+
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
|
10064
10384
|
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
|
10065
10385
|
}
|
|
10066
10386
|
return;
|
|
@@ -10073,7 +10393,7 @@ static void ggml_compute_forward_mul_mat(
|
|
|
10073
10393
|
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
|
10074
10394
|
UNUSED(desired_wsize);
|
|
10075
10395
|
|
|
10076
|
-
if (params->type ==
|
|
10396
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
10077
10397
|
if (type != GGML_TYPE_F32) {
|
|
10078
10398
|
assert(params->wsize >= desired_wsize);
|
|
10079
10399
|
// parallelize by src0 rows
|
|
@@ -10096,7 +10416,7 @@ static void ggml_compute_forward_mul_mat(
|
|
|
10096
10416
|
return;
|
|
10097
10417
|
}
|
|
10098
10418
|
|
|
10099
|
-
if (params->type ==
|
|
10419
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10100
10420
|
return;
|
|
10101
10421
|
}
|
|
10102
10422
|
|
|
@@ -10134,7 +10454,7 @@ static void ggml_compute_forward_mul_mat(
|
|
|
10134
10454
|
}
|
|
10135
10455
|
#endif
|
|
10136
10456
|
|
|
10137
|
-
if (params->type ==
|
|
10457
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
10138
10458
|
if (ith != 0) {
|
|
10139
10459
|
return;
|
|
10140
10460
|
}
|
|
@@ -10158,7 +10478,7 @@ static void ggml_compute_forward_mul_mat(
|
|
|
10158
10478
|
return;
|
|
10159
10479
|
}
|
|
10160
10480
|
|
|
10161
|
-
if (params->type ==
|
|
10481
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10162
10482
|
return;
|
|
10163
10483
|
}
|
|
10164
10484
|
|
|
@@ -10263,10 +10583,11 @@ static void ggml_compute_forward_mul_mat(
|
|
|
10263
10583
|
|
|
10264
10584
|
static void ggml_compute_forward_mul_mat_id(
|
|
10265
10585
|
const struct ggml_compute_params * params,
|
|
10266
|
-
const struct ggml_tensor * ids,
|
|
10267
|
-
const struct ggml_tensor * src1,
|
|
10268
10586
|
struct ggml_tensor * dst) {
|
|
10269
10587
|
|
|
10588
|
+
const struct ggml_tensor * ids = dst->src[0];
|
|
10589
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
10590
|
+
|
|
10270
10591
|
const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
|
|
10271
10592
|
|
|
10272
10593
|
GGML_TENSOR_BINARY_OP_LOCALS
|
|
@@ -10314,7 +10635,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
|
10314
10635
|
|
|
10315
10636
|
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
|
10316
10637
|
|
|
10317
|
-
if (params->type ==
|
|
10638
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
10318
10639
|
if (ith != 0) {
|
|
10319
10640
|
return;
|
|
10320
10641
|
}
|
|
@@ -10351,7 +10672,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
|
10351
10672
|
return;
|
|
10352
10673
|
}
|
|
10353
10674
|
|
|
10354
|
-
if (params->type ==
|
|
10675
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10355
10676
|
return;
|
|
10356
10677
|
}
|
|
10357
10678
|
|
|
@@ -10457,9 +10778,11 @@ static void ggml_compute_forward_mul_mat_id(
|
|
|
10457
10778
|
|
|
10458
10779
|
static void ggml_compute_forward_out_prod_f32(
|
|
10459
10780
|
const struct ggml_compute_params * params,
|
|
10460
|
-
const struct ggml_tensor * src0,
|
|
10461
|
-
const struct ggml_tensor * src1,
|
|
10462
10781
|
struct ggml_tensor * dst) {
|
|
10782
|
+
|
|
10783
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10784
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
10785
|
+
|
|
10463
10786
|
// int64_t t0 = ggml_perf_time_us();
|
|
10464
10787
|
// UNUSED(t0);
|
|
10465
10788
|
|
|
@@ -10497,7 +10820,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
|
10497
10820
|
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
|
10498
10821
|
#endif
|
|
10499
10822
|
|
|
10500
|
-
if (params->type ==
|
|
10823
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
10501
10824
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
|
10502
10825
|
if (use_blas) {
|
|
10503
10826
|
return;
|
|
@@ -10510,7 +10833,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
|
10510
10833
|
return;
|
|
10511
10834
|
}
|
|
10512
10835
|
|
|
10513
|
-
if (params->type ==
|
|
10836
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10514
10837
|
return;
|
|
10515
10838
|
}
|
|
10516
10839
|
|
|
@@ -10649,9 +10972,11 @@ static void ggml_compute_forward_out_prod_f32(
|
|
|
10649
10972
|
|
|
10650
10973
|
static void ggml_compute_forward_out_prod_q_f32(
|
|
10651
10974
|
const struct ggml_compute_params * params,
|
|
10652
|
-
const struct ggml_tensor * src0,
|
|
10653
|
-
const struct ggml_tensor * src1,
|
|
10654
10975
|
struct ggml_tensor * dst) {
|
|
10976
|
+
|
|
10977
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10978
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
10979
|
+
|
|
10655
10980
|
// int64_t t0 = ggml_perf_time_us();
|
|
10656
10981
|
// UNUSED(t0);
|
|
10657
10982
|
|
|
@@ -10688,7 +11013,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
|
10688
11013
|
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
|
10689
11014
|
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
|
10690
11015
|
|
|
10691
|
-
if (params->type ==
|
|
11016
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
10692
11017
|
if (ith != 0) {
|
|
10693
11018
|
return;
|
|
10694
11019
|
}
|
|
@@ -10696,7 +11021,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
|
10696
11021
|
return;
|
|
10697
11022
|
}
|
|
10698
11023
|
|
|
10699
|
-
if (params->type ==
|
|
11024
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10700
11025
|
return;
|
|
10701
11026
|
}
|
|
10702
11027
|
|
|
@@ -10762,9 +11087,10 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
|
10762
11087
|
|
|
10763
11088
|
static void ggml_compute_forward_out_prod(
|
|
10764
11089
|
const struct ggml_compute_params * params,
|
|
10765
|
-
const struct ggml_tensor * src0,
|
|
10766
|
-
const struct ggml_tensor * src1,
|
|
10767
11090
|
struct ggml_tensor * dst) {
|
|
11091
|
+
|
|
11092
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11093
|
+
|
|
10768
11094
|
switch (src0->type) {
|
|
10769
11095
|
case GGML_TYPE_Q4_0:
|
|
10770
11096
|
case GGML_TYPE_Q4_1:
|
|
@@ -10779,17 +11105,22 @@ static void ggml_compute_forward_out_prod(
|
|
|
10779
11105
|
case GGML_TYPE_IQ2_XXS:
|
|
10780
11106
|
case GGML_TYPE_IQ2_XS:
|
|
10781
11107
|
case GGML_TYPE_IQ3_XXS:
|
|
11108
|
+
case GGML_TYPE_IQ1_S:
|
|
11109
|
+
case GGML_TYPE_IQ4_NL:
|
|
11110
|
+
case GGML_TYPE_IQ4_XS:
|
|
11111
|
+
case GGML_TYPE_IQ3_S:
|
|
11112
|
+
case GGML_TYPE_IQ2_S:
|
|
10782
11113
|
{
|
|
10783
|
-
ggml_compute_forward_out_prod_q_f32(params,
|
|
11114
|
+
ggml_compute_forward_out_prod_q_f32(params, dst);
|
|
10784
11115
|
} break;
|
|
10785
11116
|
case GGML_TYPE_F16:
|
|
10786
11117
|
{
|
|
10787
11118
|
GGML_ASSERT(false); // todo
|
|
10788
|
-
// ggml_compute_forward_out_prod_f16_f32(params,
|
|
11119
|
+
// ggml_compute_forward_out_prod_f16_f32(params, dst);
|
|
10789
11120
|
} break;
|
|
10790
11121
|
case GGML_TYPE_F32:
|
|
10791
11122
|
{
|
|
10792
|
-
ggml_compute_forward_out_prod_f32(params,
|
|
11123
|
+
ggml_compute_forward_out_prod_f32(params, dst);
|
|
10793
11124
|
} break;
|
|
10794
11125
|
default:
|
|
10795
11126
|
{
|
|
@@ -10802,13 +11133,15 @@ static void ggml_compute_forward_out_prod(
|
|
|
10802
11133
|
|
|
10803
11134
|
static void ggml_compute_forward_scale_f32(
|
|
10804
11135
|
const struct ggml_compute_params * params,
|
|
10805
|
-
const struct ggml_tensor * src0,
|
|
10806
11136
|
struct ggml_tensor * dst) {
|
|
11137
|
+
|
|
11138
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11139
|
+
|
|
10807
11140
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
10808
11141
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
10809
11142
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
10810
11143
|
|
|
10811
|
-
if (params->type ==
|
|
11144
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10812
11145
|
return;
|
|
10813
11146
|
}
|
|
10814
11147
|
|
|
@@ -10844,12 +11177,14 @@ static void ggml_compute_forward_scale_f32(
|
|
|
10844
11177
|
|
|
10845
11178
|
static void ggml_compute_forward_scale(
|
|
10846
11179
|
const struct ggml_compute_params * params,
|
|
10847
|
-
const struct ggml_tensor * src0,
|
|
10848
11180
|
struct ggml_tensor * dst) {
|
|
11181
|
+
|
|
11182
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11183
|
+
|
|
10849
11184
|
switch (src0->type) {
|
|
10850
11185
|
case GGML_TYPE_F32:
|
|
10851
11186
|
{
|
|
10852
|
-
ggml_compute_forward_scale_f32(params,
|
|
11187
|
+
ggml_compute_forward_scale_f32(params, dst);
|
|
10853
11188
|
} break;
|
|
10854
11189
|
default:
|
|
10855
11190
|
{
|
|
@@ -10862,9 +11197,11 @@ static void ggml_compute_forward_scale(
|
|
|
10862
11197
|
|
|
10863
11198
|
static void ggml_compute_forward_set_f32(
|
|
10864
11199
|
const struct ggml_compute_params * params,
|
|
10865
|
-
const struct ggml_tensor * src0,
|
|
10866
|
-
const struct ggml_tensor * src1,
|
|
10867
11200
|
struct ggml_tensor * dst) {
|
|
11201
|
+
|
|
11202
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11203
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11204
|
+
|
|
10868
11205
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
10869
11206
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
|
10870
11207
|
|
|
@@ -10876,7 +11213,7 @@ static void ggml_compute_forward_set_f32(
|
|
|
10876
11213
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
|
10877
11214
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
|
10878
11215
|
|
|
10879
|
-
if (!inplace && (params->type ==
|
|
11216
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
|
10880
11217
|
if (params->ith != 0) {
|
|
10881
11218
|
return;
|
|
10882
11219
|
}
|
|
@@ -10888,7 +11225,7 @@ static void ggml_compute_forward_set_f32(
|
|
|
10888
11225
|
ggml_nbytes(dst));
|
|
10889
11226
|
}
|
|
10890
11227
|
|
|
10891
|
-
if (params->type ==
|
|
11228
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
10892
11229
|
return;
|
|
10893
11230
|
}
|
|
10894
11231
|
|
|
@@ -10935,14 +11272,14 @@ static void ggml_compute_forward_set_f32(
|
|
|
10935
11272
|
|
|
10936
11273
|
static void ggml_compute_forward_set(
|
|
10937
11274
|
const struct ggml_compute_params * params,
|
|
10938
|
-
const struct ggml_tensor * src0,
|
|
10939
|
-
const struct ggml_tensor * src1,
|
|
10940
11275
|
struct ggml_tensor * dst) {
|
|
10941
11276
|
|
|
11277
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11278
|
+
|
|
10942
11279
|
switch (src0->type) {
|
|
10943
11280
|
case GGML_TYPE_F32:
|
|
10944
11281
|
{
|
|
10945
|
-
ggml_compute_forward_set_f32(params,
|
|
11282
|
+
ggml_compute_forward_set_f32(params, dst);
|
|
10946
11283
|
} break;
|
|
10947
11284
|
case GGML_TYPE_F16:
|
|
10948
11285
|
case GGML_TYPE_Q4_0:
|
|
@@ -10959,6 +11296,11 @@ static void ggml_compute_forward_set(
|
|
|
10959
11296
|
case GGML_TYPE_IQ2_XXS:
|
|
10960
11297
|
case GGML_TYPE_IQ2_XS:
|
|
10961
11298
|
case GGML_TYPE_IQ3_XXS:
|
|
11299
|
+
case GGML_TYPE_IQ1_S:
|
|
11300
|
+
case GGML_TYPE_IQ4_NL:
|
|
11301
|
+
case GGML_TYPE_IQ4_XS:
|
|
11302
|
+
case GGML_TYPE_IQ3_S:
|
|
11303
|
+
case GGML_TYPE_IQ2_S:
|
|
10962
11304
|
default:
|
|
10963
11305
|
{
|
|
10964
11306
|
GGML_ASSERT(false);
|
|
@@ -10970,29 +11312,25 @@ static void ggml_compute_forward_set(
|
|
|
10970
11312
|
|
|
10971
11313
|
static void ggml_compute_forward_cpy(
|
|
10972
11314
|
const struct ggml_compute_params * params,
|
|
10973
|
-
const struct ggml_tensor * src0,
|
|
10974
11315
|
struct ggml_tensor * dst) {
|
|
10975
|
-
ggml_compute_forward_dup(params,
|
|
11316
|
+
ggml_compute_forward_dup(params, dst);
|
|
10976
11317
|
}
|
|
10977
11318
|
|
|
10978
11319
|
// ggml_compute_forward_cont
|
|
10979
11320
|
|
|
10980
11321
|
static void ggml_compute_forward_cont(
|
|
10981
11322
|
const struct ggml_compute_params * params,
|
|
10982
|
-
const struct ggml_tensor * src0,
|
|
10983
11323
|
struct ggml_tensor * dst) {
|
|
10984
|
-
ggml_compute_forward_dup(params,
|
|
11324
|
+
ggml_compute_forward_dup(params, dst);
|
|
10985
11325
|
}
|
|
10986
11326
|
|
|
10987
11327
|
// ggml_compute_forward_reshape
|
|
10988
11328
|
|
|
10989
11329
|
static void ggml_compute_forward_reshape(
|
|
10990
11330
|
const struct ggml_compute_params * params,
|
|
10991
|
-
const struct ggml_tensor * src0,
|
|
10992
11331
|
struct ggml_tensor * dst) {
|
|
10993
11332
|
// NOP
|
|
10994
11333
|
UNUSED(params);
|
|
10995
|
-
UNUSED(src0);
|
|
10996
11334
|
UNUSED(dst);
|
|
10997
11335
|
}
|
|
10998
11336
|
|
|
@@ -11000,42 +11338,44 @@ static void ggml_compute_forward_reshape(
|
|
|
11000
11338
|
|
|
11001
11339
|
static void ggml_compute_forward_view(
|
|
11002
11340
|
const struct ggml_compute_params * params,
|
|
11003
|
-
const struct ggml_tensor *
|
|
11341
|
+
const struct ggml_tensor * dst) {
|
|
11004
11342
|
// NOP
|
|
11005
11343
|
UNUSED(params);
|
|
11006
|
-
UNUSED(
|
|
11344
|
+
UNUSED(dst);
|
|
11007
11345
|
}
|
|
11008
11346
|
|
|
11009
11347
|
// ggml_compute_forward_permute
|
|
11010
11348
|
|
|
11011
11349
|
static void ggml_compute_forward_permute(
|
|
11012
11350
|
const struct ggml_compute_params * params,
|
|
11013
|
-
const struct ggml_tensor *
|
|
11351
|
+
const struct ggml_tensor * dst) {
|
|
11014
11352
|
// NOP
|
|
11015
11353
|
UNUSED(params);
|
|
11016
|
-
UNUSED(
|
|
11354
|
+
UNUSED(dst);
|
|
11017
11355
|
}
|
|
11018
11356
|
|
|
11019
11357
|
// ggml_compute_forward_transpose
|
|
11020
11358
|
|
|
11021
11359
|
static void ggml_compute_forward_transpose(
|
|
11022
11360
|
const struct ggml_compute_params * params,
|
|
11023
|
-
const struct ggml_tensor *
|
|
11361
|
+
const struct ggml_tensor * dst) {
|
|
11024
11362
|
// NOP
|
|
11025
11363
|
UNUSED(params);
|
|
11026
|
-
UNUSED(
|
|
11364
|
+
UNUSED(dst);
|
|
11027
11365
|
}
|
|
11028
11366
|
|
|
11029
11367
|
// ggml_compute_forward_get_rows
|
|
11030
11368
|
|
|
11031
11369
|
static void ggml_compute_forward_get_rows_q(
|
|
11032
11370
|
const struct ggml_compute_params * params,
|
|
11033
|
-
const struct ggml_tensor * src0,
|
|
11034
|
-
const struct ggml_tensor * src1,
|
|
11035
11371
|
struct ggml_tensor * dst) {
|
|
11372
|
+
|
|
11373
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11374
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11375
|
+
|
|
11036
11376
|
assert(params->ith == 0);
|
|
11037
11377
|
|
|
11038
|
-
if (params->type ==
|
|
11378
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11039
11379
|
return;
|
|
11040
11380
|
}
|
|
11041
11381
|
|
|
@@ -11068,12 +11408,14 @@ static void ggml_compute_forward_get_rows_q(
|
|
|
11068
11408
|
|
|
11069
11409
|
static void ggml_compute_forward_get_rows_f16(
|
|
11070
11410
|
const struct ggml_compute_params * params,
|
|
11071
|
-
const struct ggml_tensor * src0,
|
|
11072
|
-
const struct ggml_tensor * src1,
|
|
11073
11411
|
struct ggml_tensor * dst) {
|
|
11412
|
+
|
|
11413
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11414
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11415
|
+
|
|
11074
11416
|
assert(params->ith == 0);
|
|
11075
11417
|
|
|
11076
|
-
if (params->type ==
|
|
11418
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11077
11419
|
return;
|
|
11078
11420
|
}
|
|
11079
11421
|
|
|
@@ -11103,12 +11445,14 @@ static void ggml_compute_forward_get_rows_f16(
|
|
|
11103
11445
|
|
|
11104
11446
|
static void ggml_compute_forward_get_rows_f32(
|
|
11105
11447
|
const struct ggml_compute_params * params,
|
|
11106
|
-
const struct ggml_tensor * src0,
|
|
11107
|
-
const struct ggml_tensor * src1,
|
|
11108
11448
|
struct ggml_tensor * dst) {
|
|
11449
|
+
|
|
11450
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11451
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11452
|
+
|
|
11109
11453
|
assert(params->ith == 0);
|
|
11110
11454
|
|
|
11111
|
-
if (params->type ==
|
|
11455
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11112
11456
|
return;
|
|
11113
11457
|
}
|
|
11114
11458
|
|
|
@@ -11138,9 +11482,10 @@ static void ggml_compute_forward_get_rows_f32(
|
|
|
11138
11482
|
|
|
11139
11483
|
static void ggml_compute_forward_get_rows(
|
|
11140
11484
|
const struct ggml_compute_params * params,
|
|
11141
|
-
const struct ggml_tensor * src0,
|
|
11142
|
-
const struct ggml_tensor * src1,
|
|
11143
11485
|
struct ggml_tensor * dst) {
|
|
11486
|
+
|
|
11487
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11488
|
+
|
|
11144
11489
|
switch (src0->type) {
|
|
11145
11490
|
case GGML_TYPE_Q4_0:
|
|
11146
11491
|
case GGML_TYPE_Q4_1:
|
|
@@ -11156,17 +11501,22 @@ static void ggml_compute_forward_get_rows(
|
|
|
11156
11501
|
case GGML_TYPE_IQ2_XXS:
|
|
11157
11502
|
case GGML_TYPE_IQ2_XS:
|
|
11158
11503
|
case GGML_TYPE_IQ3_XXS:
|
|
11504
|
+
case GGML_TYPE_IQ1_S:
|
|
11505
|
+
case GGML_TYPE_IQ4_NL:
|
|
11506
|
+
case GGML_TYPE_IQ4_XS:
|
|
11507
|
+
case GGML_TYPE_IQ3_S:
|
|
11508
|
+
case GGML_TYPE_IQ2_S:
|
|
11159
11509
|
{
|
|
11160
|
-
ggml_compute_forward_get_rows_q(params,
|
|
11510
|
+
ggml_compute_forward_get_rows_q(params, dst);
|
|
11161
11511
|
} break;
|
|
11162
11512
|
case GGML_TYPE_F16:
|
|
11163
11513
|
{
|
|
11164
|
-
ggml_compute_forward_get_rows_f16(params,
|
|
11514
|
+
ggml_compute_forward_get_rows_f16(params, dst);
|
|
11165
11515
|
} break;
|
|
11166
11516
|
case GGML_TYPE_F32:
|
|
11167
11517
|
case GGML_TYPE_I32:
|
|
11168
11518
|
{
|
|
11169
|
-
ggml_compute_forward_get_rows_f32(params,
|
|
11519
|
+
ggml_compute_forward_get_rows_f32(params, dst);
|
|
11170
11520
|
} break;
|
|
11171
11521
|
default:
|
|
11172
11522
|
{
|
|
@@ -11197,22 +11547,24 @@ static void ggml_compute_forward_get_rows(
|
|
|
11197
11547
|
|
|
11198
11548
|
static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11199
11549
|
const struct ggml_compute_params * params,
|
|
11200
|
-
const struct ggml_tensor * src0,
|
|
11201
|
-
const struct ggml_tensor * src1,
|
|
11202
11550
|
struct ggml_tensor * dst) {
|
|
11551
|
+
|
|
11552
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11553
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11554
|
+
|
|
11203
11555
|
GGML_ASSERT(params->ith == 0);
|
|
11204
11556
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
11205
11557
|
|
|
11206
11558
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
|
11207
11559
|
|
|
11208
|
-
if (params->type ==
|
|
11560
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
11209
11561
|
if (params->ith != 0) {
|
|
11210
11562
|
return;
|
|
11211
11563
|
}
|
|
11212
11564
|
memset(dst->data, 0, ggml_nbytes(dst));
|
|
11213
11565
|
}
|
|
11214
11566
|
|
|
11215
|
-
if (params->type ==
|
|
11567
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11216
11568
|
return;
|
|
11217
11569
|
}
|
|
11218
11570
|
|
|
@@ -11234,22 +11586,24 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
|
11234
11586
|
|
|
11235
11587
|
static void ggml_compute_forward_get_rows_back_f32(
|
|
11236
11588
|
const struct ggml_compute_params * params,
|
|
11237
|
-
const struct ggml_tensor * src0,
|
|
11238
|
-
const struct ggml_tensor * src1,
|
|
11239
11589
|
struct ggml_tensor * dst) {
|
|
11590
|
+
|
|
11591
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11592
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11593
|
+
|
|
11240
11594
|
GGML_ASSERT(params->ith == 0);
|
|
11241
11595
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
11242
11596
|
|
|
11243
11597
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
|
11244
11598
|
|
|
11245
|
-
if (params->type ==
|
|
11599
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
11246
11600
|
if (params->ith != 0) {
|
|
11247
11601
|
return;
|
|
11248
11602
|
}
|
|
11249
11603
|
memset(dst->data, 0, ggml_nbytes(dst));
|
|
11250
11604
|
}
|
|
11251
11605
|
|
|
11252
|
-
if (params->type ==
|
|
11606
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11253
11607
|
return;
|
|
11254
11608
|
}
|
|
11255
11609
|
|
|
@@ -11271,17 +11625,18 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
|
11271
11625
|
|
|
11272
11626
|
static void ggml_compute_forward_get_rows_back(
|
|
11273
11627
|
const struct ggml_compute_params * params,
|
|
11274
|
-
const struct ggml_tensor * src0,
|
|
11275
|
-
const struct ggml_tensor * src1,
|
|
11276
11628
|
struct ggml_tensor * dst) {
|
|
11629
|
+
|
|
11630
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11631
|
+
|
|
11277
11632
|
switch (src0->type) {
|
|
11278
11633
|
case GGML_TYPE_F16:
|
|
11279
11634
|
{
|
|
11280
|
-
ggml_compute_forward_get_rows_back_f32_f16(params,
|
|
11635
|
+
ggml_compute_forward_get_rows_back_f32_f16(params, dst);
|
|
11281
11636
|
} break;
|
|
11282
11637
|
case GGML_TYPE_F32:
|
|
11283
11638
|
{
|
|
11284
|
-
ggml_compute_forward_get_rows_back_f32(params,
|
|
11639
|
+
ggml_compute_forward_get_rows_back_f32(params, dst);
|
|
11285
11640
|
} break;
|
|
11286
11641
|
default:
|
|
11287
11642
|
{
|
|
@@ -11312,11 +11667,13 @@ static void ggml_compute_forward_get_rows_back(
|
|
|
11312
11667
|
|
|
11313
11668
|
static void ggml_compute_forward_diag_f32(
|
|
11314
11669
|
const struct ggml_compute_params * params,
|
|
11315
|
-
const struct ggml_tensor * src0,
|
|
11316
11670
|
struct ggml_tensor * dst) {
|
|
11671
|
+
|
|
11672
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11673
|
+
|
|
11317
11674
|
GGML_ASSERT(params->ith == 0);
|
|
11318
11675
|
|
|
11319
|
-
if (params->type ==
|
|
11676
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11320
11677
|
return;
|
|
11321
11678
|
}
|
|
11322
11679
|
|
|
@@ -11352,12 +11709,14 @@ static void ggml_compute_forward_diag_f32(
|
|
|
11352
11709
|
|
|
11353
11710
|
static void ggml_compute_forward_diag(
|
|
11354
11711
|
const struct ggml_compute_params * params,
|
|
11355
|
-
const struct ggml_tensor * src0,
|
|
11356
11712
|
struct ggml_tensor * dst) {
|
|
11713
|
+
|
|
11714
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11715
|
+
|
|
11357
11716
|
switch (src0->type) {
|
|
11358
11717
|
case GGML_TYPE_F32:
|
|
11359
11718
|
{
|
|
11360
|
-
ggml_compute_forward_diag_f32(params,
|
|
11719
|
+
ggml_compute_forward_diag_f32(params, dst);
|
|
11361
11720
|
} break;
|
|
11362
11721
|
default:
|
|
11363
11722
|
{
|
|
@@ -11370,10 +11729,11 @@ static void ggml_compute_forward_diag(
|
|
|
11370
11729
|
|
|
11371
11730
|
static void ggml_compute_forward_diag_mask_f32(
|
|
11372
11731
|
const struct ggml_compute_params * params,
|
|
11373
|
-
const struct ggml_tensor * src0,
|
|
11374
11732
|
struct ggml_tensor * dst,
|
|
11375
11733
|
const float value) {
|
|
11376
11734
|
|
|
11735
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11736
|
+
|
|
11377
11737
|
const int ith = params->ith;
|
|
11378
11738
|
const int nth = params->nth;
|
|
11379
11739
|
|
|
@@ -11382,7 +11742,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
|
11382
11742
|
|
|
11383
11743
|
GGML_ASSERT(n_past >= 0);
|
|
11384
11744
|
|
|
11385
|
-
if (!inplace && (params->type ==
|
|
11745
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
|
11386
11746
|
if (ith != 0) {
|
|
11387
11747
|
return;
|
|
11388
11748
|
}
|
|
@@ -11396,7 +11756,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
|
11396
11756
|
ggml_nbytes(dst));
|
|
11397
11757
|
}
|
|
11398
11758
|
|
|
11399
|
-
if (params->type ==
|
|
11759
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11400
11760
|
return;
|
|
11401
11761
|
}
|
|
11402
11762
|
|
|
@@ -11423,12 +11783,14 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
|
11423
11783
|
|
|
11424
11784
|
static void ggml_compute_forward_diag_mask_inf(
|
|
11425
11785
|
const struct ggml_compute_params * params,
|
|
11426
|
-
const struct ggml_tensor * src0,
|
|
11427
11786
|
struct ggml_tensor * dst) {
|
|
11787
|
+
|
|
11788
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11789
|
+
|
|
11428
11790
|
switch (src0->type) {
|
|
11429
11791
|
case GGML_TYPE_F32:
|
|
11430
11792
|
{
|
|
11431
|
-
ggml_compute_forward_diag_mask_f32(params,
|
|
11793
|
+
ggml_compute_forward_diag_mask_f32(params, dst, -INFINITY);
|
|
11432
11794
|
} break;
|
|
11433
11795
|
default:
|
|
11434
11796
|
{
|
|
@@ -11439,12 +11801,14 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
|
11439
11801
|
|
|
11440
11802
|
static void ggml_compute_forward_diag_mask_zero(
|
|
11441
11803
|
const struct ggml_compute_params * params,
|
|
11442
|
-
const struct ggml_tensor * src0,
|
|
11443
11804
|
struct ggml_tensor * dst) {
|
|
11805
|
+
|
|
11806
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11807
|
+
|
|
11444
11808
|
switch (src0->type) {
|
|
11445
11809
|
case GGML_TYPE_F32:
|
|
11446
11810
|
{
|
|
11447
|
-
ggml_compute_forward_diag_mask_f32(params,
|
|
11811
|
+
ggml_compute_forward_diag_mask_f32(params, dst, 0);
|
|
11448
11812
|
} break;
|
|
11449
11813
|
default:
|
|
11450
11814
|
{
|
|
@@ -11457,26 +11821,42 @@ static void ggml_compute_forward_diag_mask_zero(
|
|
|
11457
11821
|
|
|
11458
11822
|
static void ggml_compute_forward_soft_max_f32(
|
|
11459
11823
|
const struct ggml_compute_params * params,
|
|
11460
|
-
const struct ggml_tensor * src0,
|
|
11461
|
-
const struct ggml_tensor * src1,
|
|
11462
11824
|
struct ggml_tensor * dst) {
|
|
11825
|
+
|
|
11826
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11827
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11828
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
|
11829
|
+
|
|
11463
11830
|
assert(ggml_is_contiguous(dst));
|
|
11464
11831
|
assert(ggml_are_same_shape(src0, dst));
|
|
11465
11832
|
|
|
11466
|
-
if (params->type ==
|
|
11833
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11467
11834
|
return;
|
|
11468
11835
|
}
|
|
11469
11836
|
|
|
11470
|
-
float scale
|
|
11471
|
-
|
|
11837
|
+
float scale = 1.0f;
|
|
11838
|
+
float max_bias = 0.0f;
|
|
11839
|
+
|
|
11840
|
+
memcpy(&scale, (float *) dst->op_params + 0, sizeof(float));
|
|
11841
|
+
memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
|
|
11472
11842
|
|
|
11473
11843
|
// TODO: handle transposed/permuted matrices
|
|
11474
11844
|
|
|
11475
11845
|
const int ith = params->ith;
|
|
11476
11846
|
const int nth = params->nth;
|
|
11477
11847
|
|
|
11848
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
|
11849
|
+
|
|
11478
11850
|
const int64_t ne11 = src1 ? src1->ne[1] : 1;
|
|
11479
11851
|
|
|
11852
|
+
// TODO: is this supposed to be ceil instead of floor?
|
|
11853
|
+
// https://huggingface.co/mosaicml/mpt-7b/blob/main/attention.py#L370
|
|
11854
|
+
const uint32_t n_head_kv = ne02;
|
|
11855
|
+
const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2(n_head_kv));
|
|
11856
|
+
|
|
11857
|
+
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
|
|
11858
|
+
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
|
11859
|
+
|
|
11480
11860
|
const int nc = src0->ne[0];
|
|
11481
11861
|
const int nr = ggml_nrows(src0);
|
|
11482
11862
|
|
|
@@ -11489,6 +11869,9 @@ static void ggml_compute_forward_soft_max_f32(
|
|
|
11489
11869
|
|
|
11490
11870
|
float * wp = (float *) params->wdata + (nc + CACHE_LINE_SIZE_F32) * ith;
|
|
11491
11871
|
|
|
11872
|
+
// when max_bias <= 0.0f, src2 is not used and we default it to src0 to avoid branching
|
|
11873
|
+
float * pos = src2 ? (float *) src2->data : src0->data;
|
|
11874
|
+
|
|
11492
11875
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
11493
11876
|
float * sp = (float *)((char *) src0->data + i1*src0->nb[1]);
|
|
11494
11877
|
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
|
|
@@ -11502,6 +11885,16 @@ static void ggml_compute_forward_soft_max_f32(
|
|
|
11502
11885
|
ggml_vec_acc_f32(nc, wp, mp);
|
|
11503
11886
|
}
|
|
11504
11887
|
|
|
11888
|
+
// ALiBi bias
|
|
11889
|
+
if (max_bias > 0.0f) {
|
|
11890
|
+
const uint32_t h = (i1/ne01)%ne02; // head
|
|
11891
|
+
const float slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1);
|
|
11892
|
+
|
|
11893
|
+
for (int i = 0; i < nc; i++) {
|
|
11894
|
+
wp[i] = wp[i] + slope*pos[i];
|
|
11895
|
+
}
|
|
11896
|
+
}
|
|
11897
|
+
|
|
11505
11898
|
#ifndef NDEBUG
|
|
11506
11899
|
for (int i = 0; i < nc; ++i) {
|
|
11507
11900
|
//printf("p[%d] = %f\n", i, p[i]);
|
|
@@ -11544,13 +11937,14 @@ static void ggml_compute_forward_soft_max_f32(
|
|
|
11544
11937
|
|
|
11545
11938
|
static void ggml_compute_forward_soft_max(
|
|
11546
11939
|
const struct ggml_compute_params * params,
|
|
11547
|
-
const struct ggml_tensor * src0,
|
|
11548
|
-
const struct ggml_tensor * src1,
|
|
11549
11940
|
struct ggml_tensor * dst) {
|
|
11941
|
+
|
|
11942
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11943
|
+
|
|
11550
11944
|
switch (src0->type) {
|
|
11551
11945
|
case GGML_TYPE_F32:
|
|
11552
11946
|
{
|
|
11553
|
-
ggml_compute_forward_soft_max_f32(params,
|
|
11947
|
+
ggml_compute_forward_soft_max_f32(params, dst);
|
|
11554
11948
|
} break;
|
|
11555
11949
|
default:
|
|
11556
11950
|
{
|
|
@@ -11563,16 +11957,18 @@ static void ggml_compute_forward_soft_max(
|
|
|
11563
11957
|
|
|
11564
11958
|
static void ggml_compute_forward_soft_max_back_f32(
|
|
11565
11959
|
const struct ggml_compute_params * params,
|
|
11566
|
-
const struct ggml_tensor * src0,
|
|
11567
|
-
const struct ggml_tensor * src1,
|
|
11568
11960
|
struct ggml_tensor * dst) {
|
|
11961
|
+
|
|
11962
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
11963
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
11964
|
+
|
|
11569
11965
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
11570
11966
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
|
11571
11967
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
11572
11968
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
11573
11969
|
GGML_ASSERT(ggml_are_same_shape(src1, dst));
|
|
11574
11970
|
|
|
11575
|
-
if (params->type ==
|
|
11971
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11576
11972
|
return;
|
|
11577
11973
|
}
|
|
11578
11974
|
|
|
@@ -11640,13 +12036,14 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
|
11640
12036
|
|
|
11641
12037
|
static void ggml_compute_forward_soft_max_back(
|
|
11642
12038
|
const struct ggml_compute_params * params,
|
|
11643
|
-
const struct ggml_tensor * src0,
|
|
11644
|
-
const struct ggml_tensor * src1,
|
|
11645
12039
|
struct ggml_tensor * dst) {
|
|
12040
|
+
|
|
12041
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12042
|
+
|
|
11646
12043
|
switch (src0->type) {
|
|
11647
12044
|
case GGML_TYPE_F32:
|
|
11648
12045
|
{
|
|
11649
|
-
ggml_compute_forward_soft_max_back_f32(params,
|
|
12046
|
+
ggml_compute_forward_soft_max_back_f32(params, dst);
|
|
11650
12047
|
} break;
|
|
11651
12048
|
default:
|
|
11652
12049
|
{
|
|
@@ -11659,11 +12056,13 @@ static void ggml_compute_forward_soft_max_back(
|
|
|
11659
12056
|
|
|
11660
12057
|
static void ggml_compute_forward_alibi_f32(
|
|
11661
12058
|
const struct ggml_compute_params * params,
|
|
11662
|
-
const struct ggml_tensor * src0,
|
|
11663
12059
|
struct ggml_tensor * dst) {
|
|
12060
|
+
|
|
12061
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12062
|
+
|
|
11664
12063
|
assert(params->ith == 0);
|
|
11665
12064
|
|
|
11666
|
-
if (params->type ==
|
|
12065
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11667
12066
|
return;
|
|
11668
12067
|
}
|
|
11669
12068
|
|
|
@@ -11694,22 +12093,20 @@ static void ggml_compute_forward_alibi_f32(
|
|
|
11694
12093
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
|
11695
12094
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
|
11696
12095
|
|
|
11697
|
-
for (int64_t
|
|
11698
|
-
|
|
11699
|
-
|
|
11700
|
-
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
|
11701
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
|
11702
|
-
|
|
11703
|
-
// TODO: k*nb2 or k*nb3
|
|
12096
|
+
for (int64_t k = 0; k < ne2_ne3; k++) {
|
|
12097
|
+
// TODO: k*nb2 or k*nb3
|
|
12098
|
+
float m_k;
|
|
11704
12099
|
|
|
11705
|
-
|
|
11706
|
-
|
|
11707
|
-
|
|
11708
|
-
|
|
11709
|
-
|
|
11710
|
-
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
|
11711
|
-
}
|
|
12100
|
+
if (k < n_heads_log2_floor) {
|
|
12101
|
+
m_k = powf(m0, k + 1);
|
|
12102
|
+
} else {
|
|
12103
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
|
12104
|
+
}
|
|
11712
12105
|
|
|
12106
|
+
for (int64_t i = 0; i < ne0; i++) {
|
|
12107
|
+
for (int64_t j = 0; j < ne1; j++) {
|
|
12108
|
+
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
|
12109
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
|
11713
12110
|
pdst[0] = i * m_k + src[0];
|
|
11714
12111
|
}
|
|
11715
12112
|
}
|
|
@@ -11718,11 +12115,13 @@ static void ggml_compute_forward_alibi_f32(
|
|
|
11718
12115
|
|
|
11719
12116
|
static void ggml_compute_forward_alibi_f16(
|
|
11720
12117
|
const struct ggml_compute_params * params,
|
|
11721
|
-
const struct ggml_tensor * src0,
|
|
11722
12118
|
struct ggml_tensor * dst) {
|
|
12119
|
+
|
|
12120
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12121
|
+
|
|
11723
12122
|
assert(params->ith == 0);
|
|
11724
12123
|
|
|
11725
|
-
if (params->type ==
|
|
12124
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11726
12125
|
return;
|
|
11727
12126
|
}
|
|
11728
12127
|
|
|
@@ -11754,21 +12153,20 @@ static void ggml_compute_forward_alibi_f16(
|
|
|
11754
12153
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
|
11755
12154
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
|
11756
12155
|
|
|
11757
|
-
for (int
|
|
11758
|
-
|
|
11759
|
-
|
|
11760
|
-
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
|
11761
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
|
11762
|
-
|
|
11763
|
-
// TODO: k*nb2 or k*nb3
|
|
12156
|
+
for (int k = 0; k < ne2_ne3; k++) {
|
|
12157
|
+
// TODO: k*nb2 or k*nb3
|
|
12158
|
+
float m_k;
|
|
11764
12159
|
|
|
11765
|
-
|
|
12160
|
+
if (k < n_heads_log2_floor) {
|
|
12161
|
+
m_k = powf(m0, k + 1);
|
|
12162
|
+
} else {
|
|
12163
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
|
12164
|
+
}
|
|
11766
12165
|
|
|
11767
|
-
|
|
11768
|
-
|
|
11769
|
-
|
|
11770
|
-
|
|
11771
|
-
}
|
|
12166
|
+
for (int i = 0; i < ne0; i++) {
|
|
12167
|
+
for (int j = 0; j < ne1; j++) {
|
|
12168
|
+
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
|
12169
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
|
11772
12170
|
|
|
11773
12171
|
// we return F32
|
|
11774
12172
|
pdst[0] = i * m_k + GGML_FP16_TO_FP32(src[0]);
|
|
@@ -11779,16 +12177,18 @@ static void ggml_compute_forward_alibi_f16(
|
|
|
11779
12177
|
|
|
11780
12178
|
static void ggml_compute_forward_alibi(
|
|
11781
12179
|
const struct ggml_compute_params * params,
|
|
11782
|
-
const struct ggml_tensor * src0,
|
|
11783
12180
|
struct ggml_tensor * dst) {
|
|
12181
|
+
|
|
12182
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12183
|
+
|
|
11784
12184
|
switch (src0->type) {
|
|
11785
12185
|
case GGML_TYPE_F16:
|
|
11786
12186
|
{
|
|
11787
|
-
ggml_compute_forward_alibi_f16(params,
|
|
12187
|
+
ggml_compute_forward_alibi_f16(params, dst);
|
|
11788
12188
|
} break;
|
|
11789
12189
|
case GGML_TYPE_F32:
|
|
11790
12190
|
{
|
|
11791
|
-
ggml_compute_forward_alibi_f32(params,
|
|
12191
|
+
ggml_compute_forward_alibi_f32(params, dst);
|
|
11792
12192
|
} break;
|
|
11793
12193
|
case GGML_TYPE_Q4_0:
|
|
11794
12194
|
case GGML_TYPE_Q4_1:
|
|
@@ -11804,6 +12204,11 @@ static void ggml_compute_forward_alibi(
|
|
|
11804
12204
|
case GGML_TYPE_IQ2_XXS:
|
|
11805
12205
|
case GGML_TYPE_IQ2_XS:
|
|
11806
12206
|
case GGML_TYPE_IQ3_XXS:
|
|
12207
|
+
case GGML_TYPE_IQ1_S:
|
|
12208
|
+
case GGML_TYPE_IQ4_NL:
|
|
12209
|
+
case GGML_TYPE_IQ4_XS:
|
|
12210
|
+
case GGML_TYPE_IQ3_S:
|
|
12211
|
+
case GGML_TYPE_IQ2_S:
|
|
11807
12212
|
case GGML_TYPE_Q8_K:
|
|
11808
12213
|
case GGML_TYPE_I8:
|
|
11809
12214
|
case GGML_TYPE_I16:
|
|
@@ -11819,11 +12224,13 @@ static void ggml_compute_forward_alibi(
|
|
|
11819
12224
|
|
|
11820
12225
|
static void ggml_compute_forward_clamp_f32(
|
|
11821
12226
|
const struct ggml_compute_params * params,
|
|
11822
|
-
const struct ggml_tensor * src0,
|
|
11823
12227
|
struct ggml_tensor * dst) {
|
|
12228
|
+
|
|
12229
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12230
|
+
|
|
11824
12231
|
assert(params->ith == 0);
|
|
11825
12232
|
|
|
11826
|
-
if (params->type ==
|
|
12233
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11827
12234
|
return;
|
|
11828
12235
|
}
|
|
11829
12236
|
|
|
@@ -11859,12 +12266,14 @@ static void ggml_compute_forward_clamp_f32(
|
|
|
11859
12266
|
|
|
11860
12267
|
static void ggml_compute_forward_clamp(
|
|
11861
12268
|
const struct ggml_compute_params * params,
|
|
11862
|
-
const struct ggml_tensor * src0,
|
|
11863
12269
|
struct ggml_tensor * dst) {
|
|
12270
|
+
|
|
12271
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12272
|
+
|
|
11864
12273
|
switch (src0->type) {
|
|
11865
12274
|
case GGML_TYPE_F32:
|
|
11866
12275
|
{
|
|
11867
|
-
ggml_compute_forward_clamp_f32(params,
|
|
12276
|
+
ggml_compute_forward_clamp_f32(params, dst);
|
|
11868
12277
|
} break;
|
|
11869
12278
|
case GGML_TYPE_F16:
|
|
11870
12279
|
case GGML_TYPE_Q4_0:
|
|
@@ -11881,6 +12290,11 @@ static void ggml_compute_forward_clamp(
|
|
|
11881
12290
|
case GGML_TYPE_IQ2_XXS:
|
|
11882
12291
|
case GGML_TYPE_IQ2_XS:
|
|
11883
12292
|
case GGML_TYPE_IQ3_XXS:
|
|
12293
|
+
case GGML_TYPE_IQ1_S:
|
|
12294
|
+
case GGML_TYPE_IQ4_NL:
|
|
12295
|
+
case GGML_TYPE_IQ4_XS:
|
|
12296
|
+
case GGML_TYPE_IQ3_S:
|
|
12297
|
+
case GGML_TYPE_IQ2_S:
|
|
11884
12298
|
case GGML_TYPE_Q8_K:
|
|
11885
12299
|
case GGML_TYPE_I8:
|
|
11886
12300
|
case GGML_TYPE_I16:
|
|
@@ -11952,11 +12366,13 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
|
11952
12366
|
|
|
11953
12367
|
static void ggml_compute_forward_rope_f32(
|
|
11954
12368
|
const struct ggml_compute_params * params,
|
|
11955
|
-
const struct ggml_tensor * src0,
|
|
11956
|
-
const struct ggml_tensor * src1,
|
|
11957
12369
|
struct ggml_tensor * dst,
|
|
11958
12370
|
const bool forward) {
|
|
11959
|
-
|
|
12371
|
+
|
|
12372
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12373
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
12374
|
+
|
|
12375
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
11960
12376
|
return;
|
|
11961
12377
|
}
|
|
11962
12378
|
|
|
@@ -12128,11 +12544,13 @@ static void ggml_compute_forward_rope_f32(
|
|
|
12128
12544
|
|
|
12129
12545
|
static void ggml_compute_forward_rope_f16(
|
|
12130
12546
|
const struct ggml_compute_params * params,
|
|
12131
|
-
const struct ggml_tensor * src0,
|
|
12132
|
-
const struct ggml_tensor * src1,
|
|
12133
12547
|
struct ggml_tensor * dst,
|
|
12134
12548
|
const bool forward) {
|
|
12135
|
-
|
|
12549
|
+
|
|
12550
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12551
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
12552
|
+
|
|
12553
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12136
12554
|
return;
|
|
12137
12555
|
}
|
|
12138
12556
|
|
|
@@ -12293,17 +12711,18 @@ static void ggml_compute_forward_rope_f16(
|
|
|
12293
12711
|
|
|
12294
12712
|
static void ggml_compute_forward_rope(
|
|
12295
12713
|
const struct ggml_compute_params * params,
|
|
12296
|
-
const struct ggml_tensor * src0,
|
|
12297
|
-
const struct ggml_tensor * src1,
|
|
12298
12714
|
struct ggml_tensor * dst) {
|
|
12715
|
+
|
|
12716
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12717
|
+
|
|
12299
12718
|
switch (src0->type) {
|
|
12300
12719
|
case GGML_TYPE_F16:
|
|
12301
12720
|
{
|
|
12302
|
-
ggml_compute_forward_rope_f16(params,
|
|
12721
|
+
ggml_compute_forward_rope_f16(params, dst, true);
|
|
12303
12722
|
} break;
|
|
12304
12723
|
case GGML_TYPE_F32:
|
|
12305
12724
|
{
|
|
12306
|
-
ggml_compute_forward_rope_f32(params,
|
|
12725
|
+
ggml_compute_forward_rope_f32(params, dst, true);
|
|
12307
12726
|
} break;
|
|
12308
12727
|
default:
|
|
12309
12728
|
{
|
|
@@ -12316,17 +12735,18 @@ static void ggml_compute_forward_rope(
|
|
|
12316
12735
|
|
|
12317
12736
|
static void ggml_compute_forward_rope_back(
|
|
12318
12737
|
const struct ggml_compute_params * params,
|
|
12319
|
-
const struct ggml_tensor * src0,
|
|
12320
|
-
const struct ggml_tensor * src1,
|
|
12321
12738
|
struct ggml_tensor * dst) {
|
|
12739
|
+
|
|
12740
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12741
|
+
|
|
12322
12742
|
switch (src0->type) {
|
|
12323
12743
|
case GGML_TYPE_F16:
|
|
12324
12744
|
{
|
|
12325
|
-
ggml_compute_forward_rope_f16(params,
|
|
12745
|
+
ggml_compute_forward_rope_f16(params, dst, false);
|
|
12326
12746
|
} break;
|
|
12327
12747
|
case GGML_TYPE_F32:
|
|
12328
12748
|
{
|
|
12329
|
-
ggml_compute_forward_rope_f32(params,
|
|
12749
|
+
ggml_compute_forward_rope_f32(params, dst, false);
|
|
12330
12750
|
} break;
|
|
12331
12751
|
default:
|
|
12332
12752
|
{
|
|
@@ -12339,9 +12759,11 @@ static void ggml_compute_forward_rope_back(
|
|
|
12339
12759
|
|
|
12340
12760
|
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12341
12761
|
const struct ggml_compute_params * params,
|
|
12342
|
-
const struct ggml_tensor * src0,
|
|
12343
|
-
const struct ggml_tensor * src1,
|
|
12344
12762
|
struct ggml_tensor * dst) {
|
|
12763
|
+
|
|
12764
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12765
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
12766
|
+
|
|
12345
12767
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
12346
12768
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
12347
12769
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
@@ -12359,7 +12781,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
|
12359
12781
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
12360
12782
|
GGML_ASSERT(nb10 == sizeof(float));
|
|
12361
12783
|
|
|
12362
|
-
if (params->type ==
|
|
12784
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
12363
12785
|
if (ith != 0) {
|
|
12364
12786
|
return;
|
|
12365
12787
|
}
|
|
@@ -12399,7 +12821,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
|
12399
12821
|
return;
|
|
12400
12822
|
}
|
|
12401
12823
|
|
|
12402
|
-
if (params->type ==
|
|
12824
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12403
12825
|
return;
|
|
12404
12826
|
}
|
|
12405
12827
|
|
|
@@ -12436,9 +12858,11 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
|
12436
12858
|
|
|
12437
12859
|
static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12438
12860
|
const struct ggml_compute_params * params,
|
|
12439
|
-
const struct ggml_tensor * src0,
|
|
12440
|
-
const struct ggml_tensor * src1,
|
|
12441
12861
|
struct ggml_tensor * dst) {
|
|
12862
|
+
|
|
12863
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12864
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
12865
|
+
|
|
12442
12866
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
12443
12867
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
12444
12868
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
@@ -12456,7 +12880,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
|
12456
12880
|
GGML_ASSERT(nb00 == sizeof(float));
|
|
12457
12881
|
GGML_ASSERT(nb10 == sizeof(float));
|
|
12458
12882
|
|
|
12459
|
-
if (params->type ==
|
|
12883
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
12460
12884
|
if (ith != 0) {
|
|
12461
12885
|
return;
|
|
12462
12886
|
}
|
|
@@ -12496,7 +12920,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
|
12496
12920
|
return;
|
|
12497
12921
|
}
|
|
12498
12922
|
|
|
12499
|
-
if (params->type ==
|
|
12923
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12500
12924
|
return;
|
|
12501
12925
|
}
|
|
12502
12926
|
|
|
@@ -12533,17 +12957,18 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
|
12533
12957
|
|
|
12534
12958
|
static void ggml_compute_forward_conv_transpose_1d(
|
|
12535
12959
|
const struct ggml_compute_params * params,
|
|
12536
|
-
const struct ggml_tensor * src0,
|
|
12537
|
-
const struct ggml_tensor * src1,
|
|
12538
12960
|
struct ggml_tensor * dst) {
|
|
12961
|
+
|
|
12962
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12963
|
+
|
|
12539
12964
|
switch (src0->type) {
|
|
12540
12965
|
case GGML_TYPE_F16:
|
|
12541
12966
|
{
|
|
12542
|
-
ggml_compute_forward_conv_transpose_1d_f16_f32(params,
|
|
12967
|
+
ggml_compute_forward_conv_transpose_1d_f16_f32(params, dst);
|
|
12543
12968
|
} break;
|
|
12544
12969
|
case GGML_TYPE_F32:
|
|
12545
12970
|
{
|
|
12546
|
-
ggml_compute_forward_conv_transpose_1d_f32(params,
|
|
12971
|
+
ggml_compute_forward_conv_transpose_1d_f32(params, dst);
|
|
12547
12972
|
} break;
|
|
12548
12973
|
default:
|
|
12549
12974
|
{
|
|
@@ -12557,9 +12982,11 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
|
12557
12982
|
// dst: result [N, OH, OW, IC*KH*KW]
|
|
12558
12983
|
static void ggml_compute_forward_im2col_f32(
|
|
12559
12984
|
const struct ggml_compute_params * params,
|
|
12560
|
-
const struct ggml_tensor * src0,
|
|
12561
|
-
const struct ggml_tensor * src1,
|
|
12562
12985
|
struct ggml_tensor * dst) {
|
|
12986
|
+
|
|
12987
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
12988
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
12989
|
+
|
|
12563
12990
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
12564
12991
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
12565
12992
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
@@ -12597,11 +13024,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
|
12597
13024
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
12598
13025
|
GGML_ASSERT(nb10 == sizeof(float));
|
|
12599
13026
|
|
|
12600
|
-
if (params->type ==
|
|
13027
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
12601
13028
|
return;
|
|
12602
13029
|
}
|
|
12603
13030
|
|
|
12604
|
-
if (params->type ==
|
|
13031
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12605
13032
|
return;
|
|
12606
13033
|
}
|
|
12607
13034
|
|
|
@@ -12643,9 +13070,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
|
12643
13070
|
// dst: result [N, OH, OW, IC*KH*KW]
|
|
12644
13071
|
static void ggml_compute_forward_im2col_f16(
|
|
12645
13072
|
const struct ggml_compute_params * params,
|
|
12646
|
-
const struct ggml_tensor * src0,
|
|
12647
|
-
const struct ggml_tensor * src1,
|
|
12648
13073
|
struct ggml_tensor * dst) {
|
|
13074
|
+
|
|
13075
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13076
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
13077
|
+
|
|
12649
13078
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
12650
13079
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
12651
13080
|
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
|
@@ -12683,11 +13112,11 @@ static void ggml_compute_forward_im2col_f16(
|
|
|
12683
13112
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
12684
13113
|
GGML_ASSERT(nb10 == sizeof(float));
|
|
12685
13114
|
|
|
12686
|
-
if (params->type ==
|
|
13115
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
12687
13116
|
return;
|
|
12688
13117
|
}
|
|
12689
13118
|
|
|
12690
|
-
if (params->type ==
|
|
13119
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12691
13120
|
return;
|
|
12692
13121
|
}
|
|
12693
13122
|
|
|
@@ -12725,17 +13154,15 @@ static void ggml_compute_forward_im2col_f16(
|
|
|
12725
13154
|
|
|
12726
13155
|
static void ggml_compute_forward_im2col(
|
|
12727
13156
|
const struct ggml_compute_params * params,
|
|
12728
|
-
const struct ggml_tensor * src0,
|
|
12729
|
-
const struct ggml_tensor * src1,
|
|
12730
13157
|
struct ggml_tensor * dst) {
|
|
12731
13158
|
switch (dst->type) {
|
|
12732
13159
|
case GGML_TYPE_F16:
|
|
12733
13160
|
{
|
|
12734
|
-
ggml_compute_forward_im2col_f16(params,
|
|
13161
|
+
ggml_compute_forward_im2col_f16(params, dst);
|
|
12735
13162
|
} break;
|
|
12736
13163
|
case GGML_TYPE_F32:
|
|
12737
13164
|
{
|
|
12738
|
-
ggml_compute_forward_im2col_f32(params,
|
|
13165
|
+
ggml_compute_forward_im2col_f32(params, dst);
|
|
12739
13166
|
} break;
|
|
12740
13167
|
default:
|
|
12741
13168
|
{
|
|
@@ -12749,9 +13176,11 @@ static void ggml_compute_forward_im2col(
|
|
|
12749
13176
|
|
|
12750
13177
|
static void ggml_compute_forward_conv_transpose_2d(
|
|
12751
13178
|
const struct ggml_compute_params * params,
|
|
12752
|
-
const struct ggml_tensor * src0,
|
|
12753
|
-
const struct ggml_tensor * src1,
|
|
12754
13179
|
struct ggml_tensor * dst) {
|
|
13180
|
+
|
|
13181
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13182
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
13183
|
+
|
|
12755
13184
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
12756
13185
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
12757
13186
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
@@ -12769,7 +13198,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
|
12769
13198
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
12770
13199
|
GGML_ASSERT(nb10 == sizeof(float));
|
|
12771
13200
|
|
|
12772
|
-
if (params->type ==
|
|
13201
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
12773
13202
|
if (ith != 0) {
|
|
12774
13203
|
return;
|
|
12775
13204
|
}
|
|
@@ -12811,7 +13240,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
|
12811
13240
|
return;
|
|
12812
13241
|
}
|
|
12813
13242
|
|
|
12814
|
-
if (params->type ==
|
|
13243
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12815
13244
|
return;
|
|
12816
13245
|
}
|
|
12817
13246
|
|
|
@@ -12855,13 +13284,15 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
|
12855
13284
|
static void ggml_compute_forward_pool_1d_sk_p0(
|
|
12856
13285
|
const struct ggml_compute_params * params,
|
|
12857
13286
|
const enum ggml_op_pool op,
|
|
12858
|
-
const struct ggml_tensor * src,
|
|
12859
13287
|
const int k,
|
|
12860
13288
|
struct ggml_tensor * dst) {
|
|
13289
|
+
|
|
13290
|
+
const struct ggml_tensor * src = dst->src[0];
|
|
13291
|
+
|
|
12861
13292
|
assert(src->type == GGML_TYPE_F32);
|
|
12862
13293
|
assert(params->ith == 0);
|
|
12863
13294
|
|
|
12864
|
-
if (params->type ==
|
|
13295
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12865
13296
|
return;
|
|
12866
13297
|
}
|
|
12867
13298
|
|
|
@@ -12906,7 +13337,6 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
|
12906
13337
|
|
|
12907
13338
|
static void ggml_compute_forward_pool_1d(
|
|
12908
13339
|
const struct ggml_compute_params * params,
|
|
12909
|
-
const struct ggml_tensor * src0,
|
|
12910
13340
|
struct ggml_tensor * dst) {
|
|
12911
13341
|
|
|
12912
13342
|
const int32_t * opts = (const int32_t *)dst->op_params;
|
|
@@ -12917,19 +13347,21 @@ static void ggml_compute_forward_pool_1d(
|
|
|
12917
13347
|
GGML_ASSERT(p0 == 0); // padding not supported
|
|
12918
13348
|
GGML_ASSERT(k0 == s0); // only s = k supported
|
|
12919
13349
|
|
|
12920
|
-
ggml_compute_forward_pool_1d_sk_p0(params, op,
|
|
13350
|
+
ggml_compute_forward_pool_1d_sk_p0(params, op, k0, dst);
|
|
12921
13351
|
}
|
|
12922
13352
|
|
|
12923
13353
|
// ggml_compute_forward_pool_2d
|
|
12924
13354
|
|
|
12925
13355
|
static void ggml_compute_forward_pool_2d(
|
|
12926
13356
|
const struct ggml_compute_params * params,
|
|
12927
|
-
const struct ggml_tensor * src,
|
|
12928
13357
|
struct ggml_tensor * dst) {
|
|
13358
|
+
|
|
13359
|
+
const struct ggml_tensor * src = dst->src[0];
|
|
13360
|
+
|
|
12929
13361
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
|
12930
13362
|
GGML_ASSERT(params->ith == 0);
|
|
12931
13363
|
|
|
12932
|
-
if (params->type ==
|
|
13364
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
12933
13365
|
return;
|
|
12934
13366
|
}
|
|
12935
13367
|
|
|
@@ -12998,10 +13430,11 @@ static void ggml_compute_forward_pool_2d(
|
|
|
12998
13430
|
|
|
12999
13431
|
static void ggml_compute_forward_upscale_f32(
|
|
13000
13432
|
const struct ggml_compute_params * params,
|
|
13001
|
-
const struct ggml_tensor * src0,
|
|
13002
13433
|
struct ggml_tensor * dst) {
|
|
13003
13434
|
|
|
13004
|
-
|
|
13435
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13436
|
+
|
|
13437
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13005
13438
|
return;
|
|
13006
13439
|
}
|
|
13007
13440
|
|
|
@@ -13037,12 +13470,14 @@ static void ggml_compute_forward_upscale_f32(
|
|
|
13037
13470
|
|
|
13038
13471
|
static void ggml_compute_forward_upscale(
|
|
13039
13472
|
const struct ggml_compute_params * params,
|
|
13040
|
-
const struct ggml_tensor * src0,
|
|
13041
13473
|
struct ggml_tensor * dst) {
|
|
13474
|
+
|
|
13475
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13476
|
+
|
|
13042
13477
|
switch (src0->type) {
|
|
13043
13478
|
case GGML_TYPE_F32:
|
|
13044
13479
|
{
|
|
13045
|
-
ggml_compute_forward_upscale_f32(params,
|
|
13480
|
+
ggml_compute_forward_upscale_f32(params, dst);
|
|
13046
13481
|
} break;
|
|
13047
13482
|
default:
|
|
13048
13483
|
{
|
|
@@ -13055,10 +13490,11 @@ static void ggml_compute_forward_upscale(
|
|
|
13055
13490
|
|
|
13056
13491
|
static void ggml_compute_forward_pad_f32(
|
|
13057
13492
|
const struct ggml_compute_params * params,
|
|
13058
|
-
const struct ggml_tensor * src0,
|
|
13059
13493
|
struct ggml_tensor * dst) {
|
|
13060
13494
|
|
|
13061
|
-
|
|
13495
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13496
|
+
|
|
13497
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13062
13498
|
return;
|
|
13063
13499
|
}
|
|
13064
13500
|
|
|
@@ -13095,12 +13531,14 @@ static void ggml_compute_forward_pad_f32(
|
|
|
13095
13531
|
|
|
13096
13532
|
static void ggml_compute_forward_pad(
|
|
13097
13533
|
const struct ggml_compute_params * params,
|
|
13098
|
-
const struct ggml_tensor * src0,
|
|
13099
13534
|
struct ggml_tensor * dst) {
|
|
13535
|
+
|
|
13536
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13537
|
+
|
|
13100
13538
|
switch (src0->type) {
|
|
13101
13539
|
case GGML_TYPE_F32:
|
|
13102
13540
|
{
|
|
13103
|
-
ggml_compute_forward_pad_f32(params,
|
|
13541
|
+
ggml_compute_forward_pad_f32(params, dst);
|
|
13104
13542
|
} break;
|
|
13105
13543
|
default:
|
|
13106
13544
|
{
|
|
@@ -13113,10 +13551,11 @@ static void ggml_compute_forward_pad(
|
|
|
13113
13551
|
|
|
13114
13552
|
static void ggml_compute_forward_argsort_f32(
|
|
13115
13553
|
const struct ggml_compute_params * params,
|
|
13116
|
-
const struct ggml_tensor * src0,
|
|
13117
13554
|
struct ggml_tensor * dst) {
|
|
13118
13555
|
|
|
13119
|
-
|
|
13556
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13557
|
+
|
|
13558
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13120
13559
|
return;
|
|
13121
13560
|
}
|
|
13122
13561
|
|
|
@@ -13142,8 +13581,8 @@ static void ggml_compute_forward_argsort_f32(
|
|
|
13142
13581
|
// C doesn't have a functional sort, so we do a bubble sort instead
|
|
13143
13582
|
for (int64_t j = 0; j < ne0; j++) {
|
|
13144
13583
|
for (int64_t k = j + 1; k < ne0; k++) {
|
|
13145
|
-
if ((order ==
|
|
13146
|
-
(order ==
|
|
13584
|
+
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
|
13585
|
+
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
|
13147
13586
|
int32_t tmp = dst_data[j];
|
|
13148
13587
|
dst_data[j] = dst_data[k];
|
|
13149
13588
|
dst_data[k] = tmp;
|
|
@@ -13155,13 +13594,14 @@ static void ggml_compute_forward_argsort_f32(
|
|
|
13155
13594
|
|
|
13156
13595
|
static void ggml_compute_forward_argsort(
|
|
13157
13596
|
const struct ggml_compute_params * params,
|
|
13158
|
-
const struct ggml_tensor * src0,
|
|
13159
13597
|
struct ggml_tensor * dst) {
|
|
13160
13598
|
|
|
13599
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
13600
|
+
|
|
13161
13601
|
switch (src0->type) {
|
|
13162
13602
|
case GGML_TYPE_F32:
|
|
13163
13603
|
{
|
|
13164
|
-
ggml_compute_forward_argsort_f32(params,
|
|
13604
|
+
ggml_compute_forward_argsort_f32(params, dst);
|
|
13165
13605
|
} break;
|
|
13166
13606
|
default:
|
|
13167
13607
|
{
|
|
@@ -13174,11 +13614,13 @@ static void ggml_compute_forward_argsort(
|
|
|
13174
13614
|
|
|
13175
13615
|
static void ggml_compute_forward_flash_attn_f32(
|
|
13176
13616
|
const struct ggml_compute_params * params,
|
|
13177
|
-
const struct ggml_tensor * q,
|
|
13178
|
-
const struct ggml_tensor * k,
|
|
13179
|
-
const struct ggml_tensor * v,
|
|
13180
13617
|
const bool masked,
|
|
13181
13618
|
struct ggml_tensor * dst) {
|
|
13619
|
+
|
|
13620
|
+
const struct ggml_tensor * q = dst->src[0];
|
|
13621
|
+
const struct ggml_tensor * k = dst->src[1];
|
|
13622
|
+
const struct ggml_tensor * v = dst->src[2];
|
|
13623
|
+
|
|
13182
13624
|
int64_t t0 = ggml_perf_time_us();
|
|
13183
13625
|
UNUSED(t0);
|
|
13184
13626
|
|
|
@@ -13223,11 +13665,11 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
|
13223
13665
|
GGML_ASSERT(nb1 <= nb2);
|
|
13224
13666
|
GGML_ASSERT(nb2 <= nb3);
|
|
13225
13667
|
|
|
13226
|
-
if (params->type ==
|
|
13668
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
13227
13669
|
return;
|
|
13228
13670
|
}
|
|
13229
13671
|
|
|
13230
|
-
if (params->type ==
|
|
13672
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13231
13673
|
return;
|
|
13232
13674
|
}
|
|
13233
13675
|
|
|
@@ -13364,11 +13806,13 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
|
13364
13806
|
|
|
13365
13807
|
static void ggml_compute_forward_flash_attn_f16(
|
|
13366
13808
|
const struct ggml_compute_params * params,
|
|
13367
|
-
const struct ggml_tensor * q,
|
|
13368
|
-
const struct ggml_tensor * k,
|
|
13369
|
-
const struct ggml_tensor * v,
|
|
13370
13809
|
const bool masked,
|
|
13371
13810
|
struct ggml_tensor * dst) {
|
|
13811
|
+
|
|
13812
|
+
const struct ggml_tensor * q = dst->src[0];
|
|
13813
|
+
const struct ggml_tensor * k = dst->src[1];
|
|
13814
|
+
const struct ggml_tensor * v = dst->src[2];
|
|
13815
|
+
|
|
13372
13816
|
int64_t t0 = ggml_perf_time_us();
|
|
13373
13817
|
UNUSED(t0);
|
|
13374
13818
|
|
|
@@ -13413,11 +13857,11 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
|
13413
13857
|
GGML_ASSERT(nb1 <= nb2);
|
|
13414
13858
|
GGML_ASSERT(nb2 <= nb3);
|
|
13415
13859
|
|
|
13416
|
-
if (params->type ==
|
|
13860
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
13417
13861
|
return;
|
|
13418
13862
|
}
|
|
13419
13863
|
|
|
13420
|
-
if (params->type ==
|
|
13864
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13421
13865
|
return;
|
|
13422
13866
|
}
|
|
13423
13867
|
|
|
@@ -13590,19 +14034,19 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
|
13590
14034
|
|
|
13591
14035
|
static void ggml_compute_forward_flash_attn(
|
|
13592
14036
|
const struct ggml_compute_params * params,
|
|
13593
|
-
const struct ggml_tensor * q,
|
|
13594
|
-
const struct ggml_tensor * k,
|
|
13595
|
-
const struct ggml_tensor * v,
|
|
13596
14037
|
const bool masked,
|
|
13597
14038
|
struct ggml_tensor * dst) {
|
|
14039
|
+
|
|
14040
|
+
const struct ggml_tensor * q = dst->src[0];
|
|
14041
|
+
|
|
13598
14042
|
switch (q->type) {
|
|
13599
14043
|
case GGML_TYPE_F16:
|
|
13600
14044
|
{
|
|
13601
|
-
ggml_compute_forward_flash_attn_f16(params,
|
|
14045
|
+
ggml_compute_forward_flash_attn_f16(params, masked, dst);
|
|
13602
14046
|
} break;
|
|
13603
14047
|
case GGML_TYPE_F32:
|
|
13604
14048
|
{
|
|
13605
|
-
ggml_compute_forward_flash_attn_f32(params,
|
|
14049
|
+
ggml_compute_forward_flash_attn_f32(params, masked, dst);
|
|
13606
14050
|
} break;
|
|
13607
14051
|
default:
|
|
13608
14052
|
{
|
|
@@ -13615,12 +14059,14 @@ static void ggml_compute_forward_flash_attn(
|
|
|
13615
14059
|
|
|
13616
14060
|
static void ggml_compute_forward_flash_ff_f16(
|
|
13617
14061
|
const struct ggml_compute_params * params,
|
|
13618
|
-
const struct ggml_tensor * a, // F16
|
|
13619
|
-
const struct ggml_tensor * b0, // F16 fc_w
|
|
13620
|
-
const struct ggml_tensor * b1, // F32 fc_b
|
|
13621
|
-
const struct ggml_tensor * c0, // F16 proj_w
|
|
13622
|
-
const struct ggml_tensor * c1, // F32 proj_b
|
|
13623
14062
|
struct ggml_tensor * dst) {
|
|
14063
|
+
|
|
14064
|
+
const struct ggml_tensor * a = dst->src[0]; // F16
|
|
14065
|
+
const struct ggml_tensor * b0 = dst->src[1]; // F16 fc_w
|
|
14066
|
+
const struct ggml_tensor * b1 = dst->src[2]; // F32 fc_b
|
|
14067
|
+
const struct ggml_tensor * c0 = dst->src[3]; // F16 proj_w
|
|
14068
|
+
const struct ggml_tensor * c1 = dst->src[4]; // F32 proj_b
|
|
14069
|
+
|
|
13624
14070
|
int64_t t0 = ggml_perf_time_us();
|
|
13625
14071
|
UNUSED(t0);
|
|
13626
14072
|
|
|
@@ -13670,11 +14116,11 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
|
13670
14116
|
GGML_ASSERT(nb1 <= nb2);
|
|
13671
14117
|
GGML_ASSERT(nb2 <= nb3);
|
|
13672
14118
|
|
|
13673
|
-
if (params->type ==
|
|
14119
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
13674
14120
|
return;
|
|
13675
14121
|
}
|
|
13676
14122
|
|
|
13677
|
-
if (params->type ==
|
|
14123
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13678
14124
|
return;
|
|
13679
14125
|
}
|
|
13680
14126
|
|
|
@@ -13748,16 +14194,14 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
|
13748
14194
|
|
|
13749
14195
|
static void ggml_compute_forward_flash_ff(
|
|
13750
14196
|
const struct ggml_compute_params * params,
|
|
13751
|
-
const struct ggml_tensor * a,
|
|
13752
|
-
const struct ggml_tensor * b0,
|
|
13753
|
-
const struct ggml_tensor * b1,
|
|
13754
|
-
const struct ggml_tensor * c0,
|
|
13755
|
-
const struct ggml_tensor * c1,
|
|
13756
14197
|
struct ggml_tensor * dst) {
|
|
14198
|
+
|
|
14199
|
+
const struct ggml_tensor * b0 = dst->src[1];
|
|
14200
|
+
|
|
13757
14201
|
switch (b0->type) {
|
|
13758
14202
|
case GGML_TYPE_F16:
|
|
13759
14203
|
{
|
|
13760
|
-
ggml_compute_forward_flash_ff_f16(params,
|
|
14204
|
+
ggml_compute_forward_flash_ff_f16(params, dst);
|
|
13761
14205
|
} break;
|
|
13762
14206
|
case GGML_TYPE_F32:
|
|
13763
14207
|
{
|
|
@@ -13774,12 +14218,14 @@ static void ggml_compute_forward_flash_ff(
|
|
|
13774
14218
|
|
|
13775
14219
|
static void ggml_compute_forward_flash_attn_back_f32(
|
|
13776
14220
|
const struct ggml_compute_params * params,
|
|
13777
|
-
const struct ggml_tensor * q,
|
|
13778
|
-
const struct ggml_tensor * k,
|
|
13779
|
-
const struct ggml_tensor * v,
|
|
13780
|
-
const struct ggml_tensor * d,
|
|
13781
14221
|
const bool masked,
|
|
13782
14222
|
struct ggml_tensor * dst) {
|
|
14223
|
+
|
|
14224
|
+
const struct ggml_tensor * q = dst->src[0];
|
|
14225
|
+
const struct ggml_tensor * k = dst->src[1];
|
|
14226
|
+
const struct ggml_tensor * v = dst->src[2];
|
|
14227
|
+
const struct ggml_tensor * d = dst->src[3];
|
|
14228
|
+
|
|
13783
14229
|
int64_t t0 = ggml_perf_time_us();
|
|
13784
14230
|
UNUSED(t0);
|
|
13785
14231
|
|
|
@@ -13829,14 +14275,14 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
|
13829
14275
|
GGML_ASSERT(nb1 <= nb2);
|
|
13830
14276
|
GGML_ASSERT(nb2 <= nb3);
|
|
13831
14277
|
|
|
13832
|
-
if (params->type ==
|
|
14278
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
13833
14279
|
if (ith == 0) {
|
|
13834
14280
|
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
|
|
13835
14281
|
}
|
|
13836
14282
|
return;
|
|
13837
14283
|
}
|
|
13838
14284
|
|
|
13839
|
-
if (params->type ==
|
|
14285
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
13840
14286
|
return;
|
|
13841
14287
|
}
|
|
13842
14288
|
|
|
@@ -14127,16 +14573,15 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
|
14127
14573
|
|
|
14128
14574
|
static void ggml_compute_forward_flash_attn_back(
|
|
14129
14575
|
const struct ggml_compute_params * params,
|
|
14130
|
-
const struct ggml_tensor * q,
|
|
14131
|
-
const struct ggml_tensor * k,
|
|
14132
|
-
const struct ggml_tensor * v,
|
|
14133
|
-
const struct ggml_tensor * d,
|
|
14134
14576
|
const bool masked,
|
|
14135
14577
|
struct ggml_tensor * dst) {
|
|
14578
|
+
|
|
14579
|
+
const struct ggml_tensor * q = dst->src[0];
|
|
14580
|
+
|
|
14136
14581
|
switch (q->type) {
|
|
14137
14582
|
case GGML_TYPE_F32:
|
|
14138
14583
|
{
|
|
14139
|
-
ggml_compute_forward_flash_attn_back_f32(params,
|
|
14584
|
+
ggml_compute_forward_flash_attn_back_f32(params, masked, dst);
|
|
14140
14585
|
} break;
|
|
14141
14586
|
default:
|
|
14142
14587
|
{
|
|
@@ -14149,9 +14594,11 @@ static void ggml_compute_forward_flash_attn_back(
|
|
|
14149
14594
|
|
|
14150
14595
|
static void ggml_compute_forward_win_part_f32(
|
|
14151
14596
|
const struct ggml_compute_params * params,
|
|
14152
|
-
const struct ggml_tensor * src0,
|
|
14153
14597
|
struct ggml_tensor * dst) {
|
|
14154
|
-
|
|
14598
|
+
|
|
14599
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14600
|
+
|
|
14601
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14155
14602
|
return;
|
|
14156
14603
|
}
|
|
14157
14604
|
|
|
@@ -14193,12 +14640,14 @@ static void ggml_compute_forward_win_part_f32(
|
|
|
14193
14640
|
|
|
14194
14641
|
static void ggml_compute_forward_win_part(
|
|
14195
14642
|
const struct ggml_compute_params * params,
|
|
14196
|
-
const struct ggml_tensor * src0,
|
|
14197
14643
|
struct ggml_tensor * dst) {
|
|
14644
|
+
|
|
14645
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14646
|
+
|
|
14198
14647
|
switch (src0->type) {
|
|
14199
14648
|
case GGML_TYPE_F32:
|
|
14200
14649
|
{
|
|
14201
|
-
ggml_compute_forward_win_part_f32(params,
|
|
14650
|
+
ggml_compute_forward_win_part_f32(params, dst);
|
|
14202
14651
|
} break;
|
|
14203
14652
|
default:
|
|
14204
14653
|
{
|
|
@@ -14211,9 +14660,11 @@ static void ggml_compute_forward_win_part(
|
|
|
14211
14660
|
|
|
14212
14661
|
static void ggml_compute_forward_win_unpart_f32(
|
|
14213
14662
|
const struct ggml_compute_params * params,
|
|
14214
|
-
const struct ggml_tensor * src0,
|
|
14215
14663
|
struct ggml_tensor * dst) {
|
|
14216
|
-
|
|
14664
|
+
|
|
14665
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14666
|
+
|
|
14667
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14217
14668
|
return;
|
|
14218
14669
|
}
|
|
14219
14670
|
|
|
@@ -14253,12 +14704,14 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
|
14253
14704
|
|
|
14254
14705
|
static void ggml_compute_forward_win_unpart(
|
|
14255
14706
|
const struct ggml_compute_params * params,
|
|
14256
|
-
const struct ggml_tensor * src0,
|
|
14257
14707
|
struct ggml_tensor * dst) {
|
|
14708
|
+
|
|
14709
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14710
|
+
|
|
14258
14711
|
switch (src0->type) {
|
|
14259
14712
|
case GGML_TYPE_F32:
|
|
14260
14713
|
{
|
|
14261
|
-
ggml_compute_forward_win_unpart_f32(params,
|
|
14714
|
+
ggml_compute_forward_win_unpart_f32(params, dst);
|
|
14262
14715
|
} break;
|
|
14263
14716
|
default:
|
|
14264
14717
|
{
|
|
@@ -14271,58 +14724,58 @@ static void ggml_compute_forward_win_unpart(
|
|
|
14271
14724
|
|
|
14272
14725
|
static void ggml_compute_forward_unary(
|
|
14273
14726
|
const struct ggml_compute_params * params,
|
|
14274
|
-
const struct ggml_tensor * src0,
|
|
14275
14727
|
struct ggml_tensor * dst) {
|
|
14728
|
+
|
|
14276
14729
|
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
|
14277
14730
|
|
|
14278
14731
|
switch (op) {
|
|
14279
14732
|
case GGML_UNARY_OP_ABS:
|
|
14280
14733
|
{
|
|
14281
|
-
ggml_compute_forward_abs(params,
|
|
14734
|
+
ggml_compute_forward_abs(params, dst);
|
|
14282
14735
|
} break;
|
|
14283
14736
|
case GGML_UNARY_OP_SGN:
|
|
14284
14737
|
{
|
|
14285
|
-
ggml_compute_forward_sgn(params,
|
|
14738
|
+
ggml_compute_forward_sgn(params, dst);
|
|
14286
14739
|
} break;
|
|
14287
14740
|
case GGML_UNARY_OP_NEG:
|
|
14288
14741
|
{
|
|
14289
|
-
ggml_compute_forward_neg(params,
|
|
14742
|
+
ggml_compute_forward_neg(params, dst);
|
|
14290
14743
|
} break;
|
|
14291
14744
|
case GGML_UNARY_OP_STEP:
|
|
14292
14745
|
{
|
|
14293
|
-
ggml_compute_forward_step(params,
|
|
14746
|
+
ggml_compute_forward_step(params, dst);
|
|
14294
14747
|
} break;
|
|
14295
14748
|
case GGML_UNARY_OP_TANH:
|
|
14296
14749
|
{
|
|
14297
|
-
ggml_compute_forward_tanh(params,
|
|
14750
|
+
ggml_compute_forward_tanh(params, dst);
|
|
14298
14751
|
} break;
|
|
14299
14752
|
case GGML_UNARY_OP_ELU:
|
|
14300
14753
|
{
|
|
14301
|
-
ggml_compute_forward_elu(params,
|
|
14754
|
+
ggml_compute_forward_elu(params, dst);
|
|
14302
14755
|
} break;
|
|
14303
14756
|
case GGML_UNARY_OP_RELU:
|
|
14304
14757
|
{
|
|
14305
|
-
ggml_compute_forward_relu(params,
|
|
14758
|
+
ggml_compute_forward_relu(params, dst);
|
|
14306
14759
|
} break;
|
|
14307
14760
|
case GGML_UNARY_OP_GELU:
|
|
14308
14761
|
{
|
|
14309
|
-
ggml_compute_forward_gelu(params,
|
|
14762
|
+
ggml_compute_forward_gelu(params, dst);
|
|
14310
14763
|
} break;
|
|
14311
14764
|
case GGML_UNARY_OP_GELU_QUICK:
|
|
14312
14765
|
{
|
|
14313
|
-
ggml_compute_forward_gelu_quick(params,
|
|
14766
|
+
ggml_compute_forward_gelu_quick(params, dst);
|
|
14314
14767
|
} break;
|
|
14315
14768
|
case GGML_UNARY_OP_SILU:
|
|
14316
14769
|
{
|
|
14317
|
-
ggml_compute_forward_silu(params,
|
|
14770
|
+
ggml_compute_forward_silu(params, dst);
|
|
14318
14771
|
} break;
|
|
14319
14772
|
case GGML_UNARY_OP_HARDSWISH:
|
|
14320
14773
|
{
|
|
14321
|
-
ggml_compute_forward_hardswish(params,
|
|
14774
|
+
ggml_compute_forward_hardswish(params, dst);
|
|
14322
14775
|
} break;
|
|
14323
14776
|
case GGML_UNARY_OP_HARDSIGMOID:
|
|
14324
14777
|
{
|
|
14325
|
-
ggml_compute_forward_hardsigmoid(params,
|
|
14778
|
+
ggml_compute_forward_hardsigmoid(params, dst);
|
|
14326
14779
|
} break;
|
|
14327
14780
|
default:
|
|
14328
14781
|
{
|
|
@@ -14335,9 +14788,11 @@ static void ggml_compute_forward_unary(
|
|
|
14335
14788
|
|
|
14336
14789
|
static void ggml_compute_forward_get_rel_pos_f16(
|
|
14337
14790
|
const struct ggml_compute_params * params,
|
|
14338
|
-
const struct ggml_tensor * src0,
|
|
14339
14791
|
struct ggml_tensor * dst) {
|
|
14340
|
-
|
|
14792
|
+
|
|
14793
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14794
|
+
|
|
14795
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14341
14796
|
return;
|
|
14342
14797
|
}
|
|
14343
14798
|
|
|
@@ -14362,12 +14817,14 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
|
14362
14817
|
|
|
14363
14818
|
static void ggml_compute_forward_get_rel_pos(
|
|
14364
14819
|
const struct ggml_compute_params * params,
|
|
14365
|
-
const struct ggml_tensor * src0,
|
|
14366
14820
|
struct ggml_tensor * dst) {
|
|
14821
|
+
|
|
14822
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14823
|
+
|
|
14367
14824
|
switch (src0->type) {
|
|
14368
14825
|
case GGML_TYPE_F16:
|
|
14369
14826
|
{
|
|
14370
|
-
ggml_compute_forward_get_rel_pos_f16(params,
|
|
14827
|
+
ggml_compute_forward_get_rel_pos_f16(params, dst);
|
|
14371
14828
|
} break;
|
|
14372
14829
|
default:
|
|
14373
14830
|
{
|
|
@@ -14380,20 +14837,21 @@ static void ggml_compute_forward_get_rel_pos(
|
|
|
14380
14837
|
|
|
14381
14838
|
static void ggml_compute_forward_add_rel_pos_f32(
|
|
14382
14839
|
const struct ggml_compute_params * params,
|
|
14383
|
-
const struct ggml_tensor * src0,
|
|
14384
|
-
const struct ggml_tensor * src1,
|
|
14385
|
-
const struct ggml_tensor * src2,
|
|
14386
14840
|
struct ggml_tensor * dst) {
|
|
14387
14841
|
|
|
14842
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14843
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
14844
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
|
14845
|
+
|
|
14388
14846
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
|
14389
|
-
if (!inplace && params->type ==
|
|
14847
|
+
if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
|
|
14390
14848
|
if (params->ith != 0) {
|
|
14391
14849
|
return;
|
|
14392
14850
|
}
|
|
14393
14851
|
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
|
|
14394
14852
|
return;
|
|
14395
14853
|
}
|
|
14396
|
-
if (params->type ==
|
|
14854
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14397
14855
|
return;
|
|
14398
14856
|
}
|
|
14399
14857
|
|
|
@@ -14448,14 +14906,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
|
14448
14906
|
|
|
14449
14907
|
static void ggml_compute_forward_add_rel_pos(
|
|
14450
14908
|
const struct ggml_compute_params * params,
|
|
14451
|
-
const struct ggml_tensor * src0,
|
|
14452
|
-
const struct ggml_tensor * src1,
|
|
14453
|
-
const struct ggml_tensor * src2,
|
|
14454
14909
|
struct ggml_tensor * dst) {
|
|
14910
|
+
|
|
14911
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14912
|
+
|
|
14455
14913
|
switch (src0->type) {
|
|
14456
14914
|
case GGML_TYPE_F32:
|
|
14457
14915
|
{
|
|
14458
|
-
ggml_compute_forward_add_rel_pos_f32(params,
|
|
14916
|
+
ggml_compute_forward_add_rel_pos_f32(params, dst);
|
|
14459
14917
|
} break;
|
|
14460
14918
|
default:
|
|
14461
14919
|
{
|
|
@@ -14468,12 +14926,14 @@ static void ggml_compute_forward_add_rel_pos(
|
|
|
14468
14926
|
|
|
14469
14927
|
static void ggml_compute_forward_map_unary_f32(
|
|
14470
14928
|
const struct ggml_compute_params * params,
|
|
14471
|
-
const struct ggml_tensor * src0,
|
|
14472
14929
|
struct ggml_tensor * dst,
|
|
14473
14930
|
const ggml_unary_op_f32_t fun) {
|
|
14931
|
+
|
|
14932
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14933
|
+
|
|
14474
14934
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
14475
14935
|
|
|
14476
|
-
if (params->type ==
|
|
14936
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14477
14937
|
return;
|
|
14478
14938
|
}
|
|
14479
14939
|
|
|
@@ -14492,13 +14952,15 @@ static void ggml_compute_forward_map_unary_f32(
|
|
|
14492
14952
|
|
|
14493
14953
|
static void ggml_compute_forward_map_unary(
|
|
14494
14954
|
const struct ggml_compute_params * params,
|
|
14495
|
-
const struct ggml_tensor * src0,
|
|
14496
14955
|
struct ggml_tensor * dst,
|
|
14497
14956
|
const ggml_unary_op_f32_t fun) {
|
|
14957
|
+
|
|
14958
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14959
|
+
|
|
14498
14960
|
switch (src0->type) {
|
|
14499
14961
|
case GGML_TYPE_F32:
|
|
14500
14962
|
{
|
|
14501
|
-
ggml_compute_forward_map_unary_f32(params,
|
|
14963
|
+
ggml_compute_forward_map_unary_f32(params, dst, fun);
|
|
14502
14964
|
} break;
|
|
14503
14965
|
default:
|
|
14504
14966
|
{
|
|
@@ -14511,14 +14973,16 @@ static void ggml_compute_forward_map_unary(
|
|
|
14511
14973
|
|
|
14512
14974
|
static void ggml_compute_forward_map_binary_f32(
|
|
14513
14975
|
const struct ggml_compute_params * params,
|
|
14514
|
-
const struct ggml_tensor * src0,
|
|
14515
|
-
const struct ggml_tensor * src1,
|
|
14516
14976
|
struct ggml_tensor * dst,
|
|
14517
14977
|
const ggml_binary_op_f32_t fun) {
|
|
14978
|
+
|
|
14979
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
14980
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
14981
|
+
|
|
14518
14982
|
assert(params->ith == 0);
|
|
14519
14983
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
|
14520
14984
|
|
|
14521
|
-
if (params->type ==
|
|
14985
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14522
14986
|
return;
|
|
14523
14987
|
}
|
|
14524
14988
|
|
|
@@ -14539,14 +15003,15 @@ static void ggml_compute_forward_map_binary_f32(
|
|
|
14539
15003
|
|
|
14540
15004
|
static void ggml_compute_forward_map_binary(
|
|
14541
15005
|
const struct ggml_compute_params * params,
|
|
14542
|
-
const struct ggml_tensor * src0,
|
|
14543
|
-
const struct ggml_tensor * src1,
|
|
14544
15006
|
struct ggml_tensor * dst,
|
|
14545
15007
|
const ggml_binary_op_f32_t fun) {
|
|
15008
|
+
|
|
15009
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
15010
|
+
|
|
14546
15011
|
switch (src0->type) {
|
|
14547
15012
|
case GGML_TYPE_F32:
|
|
14548
15013
|
{
|
|
14549
|
-
ggml_compute_forward_map_binary_f32(params,
|
|
15014
|
+
ggml_compute_forward_map_binary_f32(params, dst, fun);
|
|
14550
15015
|
} break;
|
|
14551
15016
|
default:
|
|
14552
15017
|
{
|
|
@@ -14559,12 +15024,14 @@ static void ggml_compute_forward_map_binary(
|
|
|
14559
15024
|
|
|
14560
15025
|
static void ggml_compute_forward_map_custom1_f32(
|
|
14561
15026
|
const struct ggml_compute_params * params,
|
|
14562
|
-
const struct ggml_tensor * a,
|
|
14563
15027
|
struct ggml_tensor * dst,
|
|
14564
15028
|
const ggml_custom1_op_f32_t fun) {
|
|
15029
|
+
|
|
15030
|
+
const struct ggml_tensor * a = dst->src[0];
|
|
15031
|
+
|
|
14565
15032
|
assert(params->ith == 0);
|
|
14566
15033
|
|
|
14567
|
-
if (params->type ==
|
|
15034
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14568
15035
|
return;
|
|
14569
15036
|
}
|
|
14570
15037
|
|
|
@@ -14575,13 +15042,15 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
|
14575
15042
|
|
|
14576
15043
|
static void ggml_compute_forward_map_custom2_f32(
|
|
14577
15044
|
const struct ggml_compute_params * params,
|
|
14578
|
-
const struct ggml_tensor * a,
|
|
14579
|
-
const struct ggml_tensor * b,
|
|
14580
15045
|
struct ggml_tensor * dst,
|
|
14581
15046
|
const ggml_custom2_op_f32_t fun) {
|
|
15047
|
+
|
|
15048
|
+
const struct ggml_tensor * a = dst->src[0];
|
|
15049
|
+
const struct ggml_tensor * b = dst->src[1];
|
|
15050
|
+
|
|
14582
15051
|
assert(params->ith == 0);
|
|
14583
15052
|
|
|
14584
|
-
if (params->type ==
|
|
15053
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14585
15054
|
return;
|
|
14586
15055
|
}
|
|
14587
15056
|
|
|
@@ -14592,14 +15061,16 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
|
14592
15061
|
|
|
14593
15062
|
static void ggml_compute_forward_map_custom3_f32(
|
|
14594
15063
|
const struct ggml_compute_params * params,
|
|
14595
|
-
const struct ggml_tensor * a,
|
|
14596
|
-
const struct ggml_tensor * b,
|
|
14597
|
-
const struct ggml_tensor * c,
|
|
14598
15064
|
struct ggml_tensor * dst,
|
|
14599
15065
|
const ggml_custom3_op_f32_t fun) {
|
|
15066
|
+
|
|
15067
|
+
const struct ggml_tensor * a = dst->src[0];
|
|
15068
|
+
const struct ggml_tensor * b = dst->src[1];
|
|
15069
|
+
const struct ggml_tensor * c = dst->src[1];
|
|
15070
|
+
|
|
14600
15071
|
assert(params->ith == 0);
|
|
14601
15072
|
|
|
14602
|
-
if (params->type ==
|
|
15073
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14603
15074
|
return;
|
|
14604
15075
|
}
|
|
14605
15076
|
|
|
@@ -14610,57 +15081,68 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
|
14610
15081
|
|
|
14611
15082
|
static void ggml_compute_forward_map_custom1(
|
|
14612
15083
|
const struct ggml_compute_params * params,
|
|
14613
|
-
const struct ggml_tensor * a,
|
|
14614
15084
|
struct ggml_tensor * dst) {
|
|
14615
|
-
|
|
15085
|
+
|
|
15086
|
+
const struct ggml_tensor * a = dst->src[0];
|
|
15087
|
+
|
|
15088
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14616
15089
|
return;
|
|
14617
15090
|
}
|
|
14618
15091
|
|
|
14619
|
-
struct ggml_map_custom1_op_params
|
|
15092
|
+
struct ggml_map_custom1_op_params p;
|
|
15093
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
|
14620
15094
|
|
|
14621
|
-
p
|
|
15095
|
+
p.fun(dst, a, params->ith, params->nth, p.userdata);
|
|
14622
15096
|
}
|
|
14623
15097
|
|
|
14624
15098
|
// ggml_compute_forward_map_custom2
|
|
14625
15099
|
|
|
14626
15100
|
static void ggml_compute_forward_map_custom2(
|
|
14627
15101
|
const struct ggml_compute_params * params,
|
|
14628
|
-
const struct ggml_tensor * a,
|
|
14629
|
-
const struct ggml_tensor * b,
|
|
14630
15102
|
struct ggml_tensor * dst) {
|
|
14631
|
-
|
|
15103
|
+
|
|
15104
|
+
const struct ggml_tensor * a = dst->src[0];
|
|
15105
|
+
const struct ggml_tensor * b = dst->src[1];
|
|
15106
|
+
|
|
15107
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14632
15108
|
return;
|
|
14633
15109
|
}
|
|
14634
15110
|
|
|
14635
|
-
struct ggml_map_custom2_op_params
|
|
15111
|
+
struct ggml_map_custom2_op_params p;
|
|
15112
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
|
14636
15113
|
|
|
14637
|
-
p
|
|
15114
|
+
p.fun(dst, a, b, params->ith, params->nth, p.userdata);
|
|
14638
15115
|
}
|
|
14639
15116
|
|
|
14640
15117
|
// ggml_compute_forward_map_custom3
|
|
14641
15118
|
|
|
14642
15119
|
static void ggml_compute_forward_map_custom3(
|
|
14643
15120
|
const struct ggml_compute_params * params,
|
|
14644
|
-
const struct ggml_tensor * a,
|
|
14645
|
-
const struct ggml_tensor * b,
|
|
14646
|
-
const struct ggml_tensor * c,
|
|
14647
15121
|
struct ggml_tensor * dst) {
|
|
14648
|
-
|
|
15122
|
+
|
|
15123
|
+
const struct ggml_tensor * a = dst->src[0];
|
|
15124
|
+
const struct ggml_tensor * b = dst->src[1];
|
|
15125
|
+
const struct ggml_tensor * c = dst->src[2];
|
|
15126
|
+
|
|
15127
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14649
15128
|
return;
|
|
14650
15129
|
}
|
|
14651
15130
|
|
|
14652
|
-
struct ggml_map_custom3_op_params
|
|
15131
|
+
struct ggml_map_custom3_op_params p;
|
|
15132
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
|
14653
15133
|
|
|
14654
|
-
p
|
|
15134
|
+
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
|
|
14655
15135
|
}
|
|
14656
15136
|
|
|
14657
15137
|
// ggml_compute_forward_cross_entropy_loss
|
|
14658
15138
|
|
|
14659
15139
|
static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
14660
15140
|
const struct ggml_compute_params * params,
|
|
14661
|
-
const struct ggml_tensor * src0,
|
|
14662
|
-
const struct ggml_tensor * src1,
|
|
14663
15141
|
struct ggml_tensor * dst) {
|
|
15142
|
+
|
|
15143
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
15144
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
15145
|
+
|
|
14664
15146
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
14665
15147
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
|
14666
15148
|
GGML_ASSERT(ggml_is_scalar(dst));
|
|
@@ -14677,14 +15159,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
|
14677
15159
|
|
|
14678
15160
|
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
|
14679
15161
|
|
|
14680
|
-
if (params->type ==
|
|
15162
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
|
14681
15163
|
if (ith == 0) {
|
|
14682
15164
|
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
|
14683
15165
|
}
|
|
14684
15166
|
return;
|
|
14685
15167
|
}
|
|
14686
15168
|
|
|
14687
|
-
if (params->type ==
|
|
15169
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14688
15170
|
if (ith == 0) {
|
|
14689
15171
|
float * dp = (float *) dst->data;
|
|
14690
15172
|
ggml_vec_sum_f32(nth, dp, sums);
|
|
@@ -14764,13 +15246,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
|
14764
15246
|
|
|
14765
15247
|
static void ggml_compute_forward_cross_entropy_loss(
|
|
14766
15248
|
const struct ggml_compute_params * params,
|
|
14767
|
-
const struct ggml_tensor * src0,
|
|
14768
|
-
const struct ggml_tensor * src1,
|
|
14769
15249
|
struct ggml_tensor * dst) {
|
|
15250
|
+
|
|
15251
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
15252
|
+
|
|
14770
15253
|
switch (src0->type) {
|
|
14771
15254
|
case GGML_TYPE_F32:
|
|
14772
15255
|
{
|
|
14773
|
-
ggml_compute_forward_cross_entropy_loss_f32(params,
|
|
15256
|
+
ggml_compute_forward_cross_entropy_loss_f32(params, dst);
|
|
14774
15257
|
} break;
|
|
14775
15258
|
default:
|
|
14776
15259
|
{
|
|
@@ -14783,10 +15266,12 @@ static void ggml_compute_forward_cross_entropy_loss(
|
|
|
14783
15266
|
|
|
14784
15267
|
static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
14785
15268
|
const struct ggml_compute_params * params,
|
|
14786
|
-
const struct ggml_tensor * src0,
|
|
14787
|
-
const struct ggml_tensor * src1,
|
|
14788
|
-
const struct ggml_tensor * opt0,
|
|
14789
15269
|
struct ggml_tensor * dst) {
|
|
15270
|
+
|
|
15271
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
15272
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
15273
|
+
const struct ggml_tensor * opt0 = dst->src[2];
|
|
15274
|
+
|
|
14790
15275
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
|
14791
15276
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
14792
15277
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
|
@@ -14796,7 +15281,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
|
14796
15281
|
const int64_t ith = params->ith;
|
|
14797
15282
|
const int64_t nth = params->nth;
|
|
14798
15283
|
|
|
14799
|
-
if (params->type ==
|
|
15284
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
14800
15285
|
return;
|
|
14801
15286
|
}
|
|
14802
15287
|
|
|
@@ -14873,14 +15358,14 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
|
14873
15358
|
|
|
14874
15359
|
static void ggml_compute_forward_cross_entropy_loss_back(
|
|
14875
15360
|
const struct ggml_compute_params * params,
|
|
14876
|
-
const struct ggml_tensor * src0,
|
|
14877
|
-
const struct ggml_tensor * src1,
|
|
14878
|
-
const struct ggml_tensor * opt0,
|
|
14879
15361
|
struct ggml_tensor * dst) {
|
|
15362
|
+
|
|
15363
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
15364
|
+
|
|
14880
15365
|
switch (src0->type) {
|
|
14881
15366
|
case GGML_TYPE_F32:
|
|
14882
15367
|
{
|
|
14883
|
-
ggml_compute_forward_cross_entropy_loss_back_f32(params,
|
|
15368
|
+
ggml_compute_forward_cross_entropy_loss_back_f32(params, dst);
|
|
14884
15369
|
} break;
|
|
14885
15370
|
default:
|
|
14886
15371
|
{
|
|
@@ -14903,8 +15388,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
14903
15388
|
if (skip_cpu) {
|
|
14904
15389
|
return;
|
|
14905
15390
|
}
|
|
14906
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
|
14907
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
|
15391
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
|
15392
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
|
14908
15393
|
#elif defined(GGML_USE_VULKAN)
|
|
14909
15394
|
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
|
14910
15395
|
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
@@ -14915,8 +15400,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
14915
15400
|
if (skip_cpu) {
|
|
14916
15401
|
return;
|
|
14917
15402
|
}
|
|
14918
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
|
14919
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
|
15403
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
|
15404
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
|
14920
15405
|
#endif // GGML_USE_CUBLAS
|
|
14921
15406
|
|
|
14922
15407
|
#ifdef GGML_USE_SYCL
|
|
@@ -14928,312 +15413,312 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
14928
15413
|
switch (tensor->op) {
|
|
14929
15414
|
case GGML_OP_DUP:
|
|
14930
15415
|
{
|
|
14931
|
-
ggml_compute_forward_dup(params, tensor
|
|
15416
|
+
ggml_compute_forward_dup(params, tensor);
|
|
14932
15417
|
} break;
|
|
14933
15418
|
case GGML_OP_ADD:
|
|
14934
15419
|
{
|
|
14935
|
-
ggml_compute_forward_add(params, tensor
|
|
15420
|
+
ggml_compute_forward_add(params, tensor);
|
|
14936
15421
|
} break;
|
|
14937
15422
|
case GGML_OP_ADD1:
|
|
14938
15423
|
{
|
|
14939
|
-
ggml_compute_forward_add1(params, tensor
|
|
15424
|
+
ggml_compute_forward_add1(params, tensor);
|
|
14940
15425
|
} break;
|
|
14941
15426
|
case GGML_OP_ACC:
|
|
14942
15427
|
{
|
|
14943
|
-
ggml_compute_forward_acc(params, tensor
|
|
15428
|
+
ggml_compute_forward_acc(params, tensor);
|
|
14944
15429
|
} break;
|
|
14945
15430
|
case GGML_OP_SUB:
|
|
14946
15431
|
{
|
|
14947
|
-
ggml_compute_forward_sub(params, tensor
|
|
15432
|
+
ggml_compute_forward_sub(params, tensor);
|
|
14948
15433
|
} break;
|
|
14949
15434
|
case GGML_OP_MUL:
|
|
14950
15435
|
{
|
|
14951
|
-
ggml_compute_forward_mul(params, tensor
|
|
15436
|
+
ggml_compute_forward_mul(params, tensor);
|
|
14952
15437
|
} break;
|
|
14953
15438
|
case GGML_OP_DIV:
|
|
14954
15439
|
{
|
|
14955
|
-
ggml_compute_forward_div(params, tensor
|
|
15440
|
+
ggml_compute_forward_div(params, tensor);
|
|
14956
15441
|
} break;
|
|
14957
15442
|
case GGML_OP_SQR:
|
|
14958
15443
|
{
|
|
14959
|
-
ggml_compute_forward_sqr(params, tensor
|
|
15444
|
+
ggml_compute_forward_sqr(params, tensor);
|
|
14960
15445
|
} break;
|
|
14961
15446
|
case GGML_OP_SQRT:
|
|
14962
15447
|
{
|
|
14963
|
-
ggml_compute_forward_sqrt(params, tensor
|
|
15448
|
+
ggml_compute_forward_sqrt(params, tensor);
|
|
14964
15449
|
} break;
|
|
14965
15450
|
case GGML_OP_LOG:
|
|
14966
15451
|
{
|
|
14967
|
-
ggml_compute_forward_log(params, tensor
|
|
15452
|
+
ggml_compute_forward_log(params, tensor);
|
|
14968
15453
|
} break;
|
|
14969
15454
|
case GGML_OP_SUM:
|
|
14970
15455
|
{
|
|
14971
|
-
ggml_compute_forward_sum(params, tensor
|
|
15456
|
+
ggml_compute_forward_sum(params, tensor);
|
|
14972
15457
|
} break;
|
|
14973
15458
|
case GGML_OP_SUM_ROWS:
|
|
14974
15459
|
{
|
|
14975
|
-
ggml_compute_forward_sum_rows(params, tensor
|
|
15460
|
+
ggml_compute_forward_sum_rows(params, tensor);
|
|
14976
15461
|
} break;
|
|
14977
15462
|
case GGML_OP_MEAN:
|
|
14978
15463
|
{
|
|
14979
|
-
ggml_compute_forward_mean(params, tensor
|
|
15464
|
+
ggml_compute_forward_mean(params, tensor);
|
|
14980
15465
|
} break;
|
|
14981
15466
|
case GGML_OP_ARGMAX:
|
|
14982
15467
|
{
|
|
14983
|
-
ggml_compute_forward_argmax(params, tensor
|
|
15468
|
+
ggml_compute_forward_argmax(params, tensor);
|
|
14984
15469
|
} break;
|
|
14985
15470
|
case GGML_OP_REPEAT:
|
|
14986
15471
|
{
|
|
14987
|
-
ggml_compute_forward_repeat(params, tensor
|
|
15472
|
+
ggml_compute_forward_repeat(params, tensor);
|
|
14988
15473
|
} break;
|
|
14989
15474
|
case GGML_OP_REPEAT_BACK:
|
|
14990
15475
|
{
|
|
14991
|
-
ggml_compute_forward_repeat_back(params, tensor
|
|
15476
|
+
ggml_compute_forward_repeat_back(params, tensor);
|
|
14992
15477
|
} break;
|
|
14993
15478
|
case GGML_OP_CONCAT:
|
|
14994
15479
|
{
|
|
14995
|
-
ggml_compute_forward_concat(params, tensor
|
|
15480
|
+
ggml_compute_forward_concat(params, tensor);
|
|
14996
15481
|
} break;
|
|
14997
15482
|
case GGML_OP_SILU_BACK:
|
|
14998
15483
|
{
|
|
14999
|
-
ggml_compute_forward_silu_back(params, tensor
|
|
15484
|
+
ggml_compute_forward_silu_back(params, tensor);
|
|
15000
15485
|
} break;
|
|
15001
15486
|
case GGML_OP_NORM:
|
|
15002
15487
|
{
|
|
15003
|
-
ggml_compute_forward_norm(params, tensor
|
|
15488
|
+
ggml_compute_forward_norm(params, tensor);
|
|
15004
15489
|
} break;
|
|
15005
15490
|
case GGML_OP_RMS_NORM:
|
|
15006
15491
|
{
|
|
15007
|
-
ggml_compute_forward_rms_norm(params, tensor
|
|
15492
|
+
ggml_compute_forward_rms_norm(params, tensor);
|
|
15008
15493
|
} break;
|
|
15009
15494
|
case GGML_OP_RMS_NORM_BACK:
|
|
15010
15495
|
{
|
|
15011
|
-
ggml_compute_forward_rms_norm_back(params, tensor
|
|
15496
|
+
ggml_compute_forward_rms_norm_back(params, tensor);
|
|
15012
15497
|
} break;
|
|
15013
15498
|
case GGML_OP_GROUP_NORM:
|
|
15014
15499
|
{
|
|
15015
|
-
ggml_compute_forward_group_norm(params, tensor
|
|
15500
|
+
ggml_compute_forward_group_norm(params, tensor);
|
|
15016
15501
|
} break;
|
|
15017
15502
|
case GGML_OP_MUL_MAT:
|
|
15018
15503
|
{
|
|
15019
|
-
ggml_compute_forward_mul_mat(params, tensor
|
|
15504
|
+
ggml_compute_forward_mul_mat(params, tensor);
|
|
15020
15505
|
} break;
|
|
15021
15506
|
case GGML_OP_MUL_MAT_ID:
|
|
15022
15507
|
{
|
|
15023
|
-
ggml_compute_forward_mul_mat_id(params, tensor
|
|
15508
|
+
ggml_compute_forward_mul_mat_id(params, tensor);
|
|
15024
15509
|
} break;
|
|
15025
15510
|
case GGML_OP_OUT_PROD:
|
|
15026
15511
|
{
|
|
15027
|
-
ggml_compute_forward_out_prod(params, tensor
|
|
15512
|
+
ggml_compute_forward_out_prod(params, tensor);
|
|
15028
15513
|
} break;
|
|
15029
15514
|
case GGML_OP_SCALE:
|
|
15030
15515
|
{
|
|
15031
|
-
ggml_compute_forward_scale(params, tensor
|
|
15516
|
+
ggml_compute_forward_scale(params, tensor);
|
|
15032
15517
|
} break;
|
|
15033
15518
|
case GGML_OP_SET:
|
|
15034
15519
|
{
|
|
15035
|
-
ggml_compute_forward_set(params, tensor
|
|
15520
|
+
ggml_compute_forward_set(params, tensor);
|
|
15036
15521
|
} break;
|
|
15037
15522
|
case GGML_OP_CPY:
|
|
15038
15523
|
{
|
|
15039
|
-
ggml_compute_forward_cpy(params, tensor
|
|
15524
|
+
ggml_compute_forward_cpy(params, tensor);
|
|
15040
15525
|
} break;
|
|
15041
15526
|
case GGML_OP_CONT:
|
|
15042
15527
|
{
|
|
15043
|
-
ggml_compute_forward_cont(params, tensor
|
|
15528
|
+
ggml_compute_forward_cont(params, tensor);
|
|
15044
15529
|
} break;
|
|
15045
15530
|
case GGML_OP_RESHAPE:
|
|
15046
15531
|
{
|
|
15047
|
-
ggml_compute_forward_reshape(params, tensor
|
|
15532
|
+
ggml_compute_forward_reshape(params, tensor);
|
|
15048
15533
|
} break;
|
|
15049
15534
|
case GGML_OP_VIEW:
|
|
15050
15535
|
{
|
|
15051
|
-
ggml_compute_forward_view(params, tensor
|
|
15536
|
+
ggml_compute_forward_view(params, tensor);
|
|
15052
15537
|
} break;
|
|
15053
15538
|
case GGML_OP_PERMUTE:
|
|
15054
15539
|
{
|
|
15055
|
-
ggml_compute_forward_permute(params, tensor
|
|
15540
|
+
ggml_compute_forward_permute(params, tensor);
|
|
15056
15541
|
} break;
|
|
15057
15542
|
case GGML_OP_TRANSPOSE:
|
|
15058
15543
|
{
|
|
15059
|
-
ggml_compute_forward_transpose(params, tensor
|
|
15544
|
+
ggml_compute_forward_transpose(params, tensor);
|
|
15060
15545
|
} break;
|
|
15061
15546
|
case GGML_OP_GET_ROWS:
|
|
15062
15547
|
{
|
|
15063
|
-
ggml_compute_forward_get_rows(params, tensor
|
|
15548
|
+
ggml_compute_forward_get_rows(params, tensor);
|
|
15064
15549
|
} break;
|
|
15065
15550
|
case GGML_OP_GET_ROWS_BACK:
|
|
15066
15551
|
{
|
|
15067
|
-
ggml_compute_forward_get_rows_back(params, tensor
|
|
15552
|
+
ggml_compute_forward_get_rows_back(params, tensor);
|
|
15068
15553
|
} break;
|
|
15069
15554
|
case GGML_OP_DIAG:
|
|
15070
15555
|
{
|
|
15071
|
-
ggml_compute_forward_diag(params, tensor
|
|
15556
|
+
ggml_compute_forward_diag(params, tensor);
|
|
15072
15557
|
} break;
|
|
15073
15558
|
case GGML_OP_DIAG_MASK_INF:
|
|
15074
15559
|
{
|
|
15075
|
-
ggml_compute_forward_diag_mask_inf(params, tensor
|
|
15560
|
+
ggml_compute_forward_diag_mask_inf(params, tensor);
|
|
15076
15561
|
} break;
|
|
15077
15562
|
case GGML_OP_DIAG_MASK_ZERO:
|
|
15078
15563
|
{
|
|
15079
|
-
ggml_compute_forward_diag_mask_zero(params, tensor
|
|
15564
|
+
ggml_compute_forward_diag_mask_zero(params, tensor);
|
|
15080
15565
|
} break;
|
|
15081
15566
|
case GGML_OP_SOFT_MAX:
|
|
15082
15567
|
{
|
|
15083
|
-
ggml_compute_forward_soft_max(params, tensor
|
|
15568
|
+
ggml_compute_forward_soft_max(params, tensor);
|
|
15084
15569
|
} break;
|
|
15085
15570
|
case GGML_OP_SOFT_MAX_BACK:
|
|
15086
15571
|
{
|
|
15087
|
-
ggml_compute_forward_soft_max_back(params, tensor
|
|
15572
|
+
ggml_compute_forward_soft_max_back(params, tensor);
|
|
15088
15573
|
} break;
|
|
15089
15574
|
case GGML_OP_ROPE:
|
|
15090
15575
|
{
|
|
15091
|
-
ggml_compute_forward_rope(params, tensor
|
|
15576
|
+
ggml_compute_forward_rope(params, tensor);
|
|
15092
15577
|
} break;
|
|
15093
15578
|
case GGML_OP_ROPE_BACK:
|
|
15094
15579
|
{
|
|
15095
|
-
ggml_compute_forward_rope_back(params, tensor
|
|
15580
|
+
ggml_compute_forward_rope_back(params, tensor);
|
|
15096
15581
|
} break;
|
|
15097
15582
|
case GGML_OP_ALIBI:
|
|
15098
15583
|
{
|
|
15099
|
-
ggml_compute_forward_alibi(params, tensor
|
|
15584
|
+
ggml_compute_forward_alibi(params, tensor);
|
|
15100
15585
|
} break;
|
|
15101
15586
|
case GGML_OP_CLAMP:
|
|
15102
15587
|
{
|
|
15103
|
-
ggml_compute_forward_clamp(params, tensor
|
|
15588
|
+
ggml_compute_forward_clamp(params, tensor);
|
|
15104
15589
|
} break;
|
|
15105
15590
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
|
15106
15591
|
{
|
|
15107
|
-
ggml_compute_forward_conv_transpose_1d(params, tensor
|
|
15592
|
+
ggml_compute_forward_conv_transpose_1d(params, tensor);
|
|
15108
15593
|
} break;
|
|
15109
15594
|
case GGML_OP_IM2COL:
|
|
15110
15595
|
{
|
|
15111
|
-
ggml_compute_forward_im2col(params, tensor
|
|
15596
|
+
ggml_compute_forward_im2col(params, tensor);
|
|
15112
15597
|
} break;
|
|
15113
15598
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
15114
15599
|
{
|
|
15115
|
-
ggml_compute_forward_conv_transpose_2d(params, tensor
|
|
15600
|
+
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
|
15116
15601
|
} break;
|
|
15117
15602
|
case GGML_OP_POOL_1D:
|
|
15118
15603
|
{
|
|
15119
|
-
ggml_compute_forward_pool_1d(params, tensor
|
|
15604
|
+
ggml_compute_forward_pool_1d(params, tensor);
|
|
15120
15605
|
} break;
|
|
15121
15606
|
case GGML_OP_POOL_2D:
|
|
15122
15607
|
{
|
|
15123
|
-
ggml_compute_forward_pool_2d(params, tensor
|
|
15608
|
+
ggml_compute_forward_pool_2d(params, tensor);
|
|
15124
15609
|
} break;
|
|
15125
15610
|
case GGML_OP_UPSCALE:
|
|
15126
15611
|
{
|
|
15127
|
-
ggml_compute_forward_upscale(params, tensor
|
|
15612
|
+
ggml_compute_forward_upscale(params, tensor);
|
|
15128
15613
|
} break;
|
|
15129
15614
|
case GGML_OP_PAD:
|
|
15130
15615
|
{
|
|
15131
|
-
ggml_compute_forward_pad(params, tensor
|
|
15616
|
+
ggml_compute_forward_pad(params, tensor);
|
|
15132
15617
|
} break;
|
|
15133
15618
|
case GGML_OP_ARGSORT:
|
|
15134
15619
|
{
|
|
15135
|
-
ggml_compute_forward_argsort(params, tensor
|
|
15620
|
+
ggml_compute_forward_argsort(params, tensor);
|
|
15136
15621
|
} break;
|
|
15137
15622
|
case GGML_OP_LEAKY_RELU:
|
|
15138
15623
|
{
|
|
15139
|
-
ggml_compute_forward_leaky_relu(params, tensor
|
|
15624
|
+
ggml_compute_forward_leaky_relu(params, tensor);
|
|
15140
15625
|
} break;
|
|
15141
15626
|
case GGML_OP_FLASH_ATTN:
|
|
15142
15627
|
{
|
|
15143
15628
|
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
|
15144
15629
|
GGML_ASSERT(t == 0 || t == 1);
|
|
15145
15630
|
const bool masked = t != 0;
|
|
15146
|
-
ggml_compute_forward_flash_attn(params,
|
|
15631
|
+
ggml_compute_forward_flash_attn(params, masked, tensor);
|
|
15147
15632
|
} break;
|
|
15148
15633
|
case GGML_OP_FLASH_FF:
|
|
15149
15634
|
{
|
|
15150
|
-
ggml_compute_forward_flash_ff(params, tensor
|
|
15635
|
+
ggml_compute_forward_flash_ff(params, tensor);
|
|
15151
15636
|
} break;
|
|
15152
15637
|
case GGML_OP_FLASH_ATTN_BACK:
|
|
15153
15638
|
{
|
|
15154
15639
|
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
|
15155
15640
|
GGML_ASSERT(t == 0 || t == 1);
|
|
15156
15641
|
bool masked = t != 0;
|
|
15157
|
-
ggml_compute_forward_flash_attn_back(params,
|
|
15642
|
+
ggml_compute_forward_flash_attn_back(params, masked, tensor);
|
|
15158
15643
|
} break;
|
|
15159
15644
|
case GGML_OP_WIN_PART:
|
|
15160
15645
|
{
|
|
15161
|
-
ggml_compute_forward_win_part(params, tensor
|
|
15646
|
+
ggml_compute_forward_win_part(params, tensor);
|
|
15162
15647
|
} break;
|
|
15163
15648
|
case GGML_OP_WIN_UNPART:
|
|
15164
15649
|
{
|
|
15165
|
-
ggml_compute_forward_win_unpart(params, tensor
|
|
15650
|
+
ggml_compute_forward_win_unpart(params, tensor);
|
|
15166
15651
|
} break;
|
|
15167
15652
|
case GGML_OP_UNARY:
|
|
15168
15653
|
{
|
|
15169
|
-
ggml_compute_forward_unary(params, tensor
|
|
15654
|
+
ggml_compute_forward_unary(params, tensor);
|
|
15170
15655
|
} break;
|
|
15171
15656
|
case GGML_OP_GET_REL_POS:
|
|
15172
15657
|
{
|
|
15173
|
-
ggml_compute_forward_get_rel_pos(params, tensor
|
|
15658
|
+
ggml_compute_forward_get_rel_pos(params, tensor);
|
|
15174
15659
|
} break;
|
|
15175
15660
|
case GGML_OP_ADD_REL_POS:
|
|
15176
15661
|
{
|
|
15177
|
-
ggml_compute_forward_add_rel_pos(params, tensor
|
|
15662
|
+
ggml_compute_forward_add_rel_pos(params, tensor);
|
|
15178
15663
|
} break;
|
|
15179
15664
|
case GGML_OP_MAP_UNARY:
|
|
15180
15665
|
{
|
|
15181
15666
|
ggml_unary_op_f32_t fun;
|
|
15182
15667
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
15183
|
-
ggml_compute_forward_map_unary(params, tensor
|
|
15668
|
+
ggml_compute_forward_map_unary(params, tensor, fun);
|
|
15184
15669
|
}
|
|
15185
15670
|
break;
|
|
15186
15671
|
case GGML_OP_MAP_BINARY:
|
|
15187
15672
|
{
|
|
15188
15673
|
ggml_binary_op_f32_t fun;
|
|
15189
15674
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
15190
|
-
ggml_compute_forward_map_binary(params, tensor
|
|
15675
|
+
ggml_compute_forward_map_binary(params, tensor, fun);
|
|
15191
15676
|
}
|
|
15192
15677
|
break;
|
|
15193
15678
|
case GGML_OP_MAP_CUSTOM1_F32:
|
|
15194
15679
|
{
|
|
15195
15680
|
ggml_custom1_op_f32_t fun;
|
|
15196
15681
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
15197
|
-
ggml_compute_forward_map_custom1_f32(params, tensor
|
|
15682
|
+
ggml_compute_forward_map_custom1_f32(params, tensor, fun);
|
|
15198
15683
|
}
|
|
15199
15684
|
break;
|
|
15200
15685
|
case GGML_OP_MAP_CUSTOM2_F32:
|
|
15201
15686
|
{
|
|
15202
15687
|
ggml_custom2_op_f32_t fun;
|
|
15203
15688
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
15204
|
-
ggml_compute_forward_map_custom2_f32(params, tensor
|
|
15689
|
+
ggml_compute_forward_map_custom2_f32(params, tensor, fun);
|
|
15205
15690
|
}
|
|
15206
15691
|
break;
|
|
15207
15692
|
case GGML_OP_MAP_CUSTOM3_F32:
|
|
15208
15693
|
{
|
|
15209
15694
|
ggml_custom3_op_f32_t fun;
|
|
15210
15695
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
15211
|
-
ggml_compute_forward_map_custom3_f32(params, tensor
|
|
15696
|
+
ggml_compute_forward_map_custom3_f32(params, tensor, fun);
|
|
15212
15697
|
}
|
|
15213
15698
|
break;
|
|
15214
15699
|
case GGML_OP_MAP_CUSTOM1:
|
|
15215
15700
|
{
|
|
15216
|
-
ggml_compute_forward_map_custom1(params, tensor
|
|
15701
|
+
ggml_compute_forward_map_custom1(params, tensor);
|
|
15217
15702
|
}
|
|
15218
15703
|
break;
|
|
15219
15704
|
case GGML_OP_MAP_CUSTOM2:
|
|
15220
15705
|
{
|
|
15221
|
-
ggml_compute_forward_map_custom2(params, tensor
|
|
15706
|
+
ggml_compute_forward_map_custom2(params, tensor);
|
|
15222
15707
|
}
|
|
15223
15708
|
break;
|
|
15224
15709
|
case GGML_OP_MAP_CUSTOM3:
|
|
15225
15710
|
{
|
|
15226
|
-
ggml_compute_forward_map_custom3(params, tensor
|
|
15711
|
+
ggml_compute_forward_map_custom3(params, tensor);
|
|
15227
15712
|
}
|
|
15228
15713
|
break;
|
|
15229
15714
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
|
15230
15715
|
{
|
|
15231
|
-
ggml_compute_forward_cross_entropy_loss(params, tensor
|
|
15716
|
+
ggml_compute_forward_cross_entropy_loss(params, tensor);
|
|
15232
15717
|
}
|
|
15233
15718
|
break;
|
|
15234
15719
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
|
15235
15720
|
{
|
|
15236
|
-
ggml_compute_forward_cross_entropy_loss_back(params, tensor
|
|
15721
|
+
ggml_compute_forward_cross_entropy_loss_back(params, tensor);
|
|
15237
15722
|
}
|
|
15238
15723
|
break;
|
|
15239
15724
|
case GGML_OP_NONE:
|
|
@@ -16462,7 +16947,7 @@ size_t ggml_graph_overhead(void) {
|
|
|
16462
16947
|
|
|
16463
16948
|
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
|
16464
16949
|
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
|
16465
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
|
16950
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
|
16466
16951
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
|
16467
16952
|
|
|
16468
16953
|
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
|
@@ -16637,27 +17122,47 @@ typedef pthread_t ggml_thread_t;
|
|
|
16637
17122
|
#endif
|
|
16638
17123
|
|
|
16639
17124
|
// Android's libc implementation "bionic" does not support setting affinity
|
|
16640
|
-
#if defined(
|
|
16641
|
-
static void set_numa_thread_affinity(int thread_n
|
|
17125
|
+
#if defined(__gnu_linux__)
|
|
17126
|
+
static void set_numa_thread_affinity(int thread_n) {
|
|
16642
17127
|
if (!ggml_is_numa()) {
|
|
16643
17128
|
return;
|
|
16644
17129
|
}
|
|
16645
17130
|
|
|
16646
|
-
|
|
16647
|
-
|
|
16648
|
-
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
|
17131
|
+
int node_num;
|
|
17132
|
+
int rv;
|
|
16649
17133
|
size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);
|
|
16650
17134
|
|
|
17135
|
+
switch(g_state.numa.numa_strategy) {
|
|
17136
|
+
case GGML_NUMA_STRATEGY_DISTRIBUTE:
|
|
17137
|
+
// run thread on node_num thread_n / (threads per node)
|
|
17138
|
+
node_num = thread_n % g_state.numa.n_nodes;
|
|
17139
|
+
break;
|
|
17140
|
+
case GGML_NUMA_STRATEGY_ISOLATE:
|
|
17141
|
+
// run thread on current_node
|
|
17142
|
+
node_num = g_state.numa.current_node;
|
|
17143
|
+
break;
|
|
17144
|
+
case GGML_NUMA_STRATEGY_NUMACTL:
|
|
17145
|
+
// use the cpuset that numactl gave us
|
|
17146
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset);
|
|
17147
|
+
if (rv) {
|
|
17148
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
|
|
17149
|
+
}
|
|
17150
|
+
return;
|
|
17151
|
+
default:
|
|
17152
|
+
return;
|
|
17153
|
+
}
|
|
17154
|
+
|
|
17155
|
+
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
|
17156
|
+
|
|
16651
17157
|
cpu_set_t * cpus = CPU_ALLOC(g_state.numa.total_cpus);
|
|
16652
17158
|
CPU_ZERO_S(setsize, cpus);
|
|
16653
17159
|
for (size_t i = 0; i < node->n_cpus; ++i) {
|
|
16654
17160
|
CPU_SET_S(node->cpus[i], setsize, cpus);
|
|
16655
17161
|
}
|
|
16656
17162
|
|
|
16657
|
-
|
|
17163
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
|
16658
17164
|
if (rv) {
|
|
16659
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
|
16660
|
-
strerror(rv));
|
|
17165
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
|
16661
17166
|
}
|
|
16662
17167
|
|
|
16663
17168
|
CPU_FREE(cpus);
|
|
@@ -16678,8 +17183,7 @@ static void clear_numa_thread_affinity(void) {
|
|
|
16678
17183
|
|
|
16679
17184
|
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
|
16680
17185
|
if (rv) {
|
|
16681
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
|
16682
|
-
strerror(rv));
|
|
17186
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
|
16683
17187
|
}
|
|
16684
17188
|
|
|
16685
17189
|
CPU_FREE(cpus);
|
|
@@ -16687,7 +17191,7 @@ static void clear_numa_thread_affinity(void) {
|
|
|
16687
17191
|
#else
|
|
16688
17192
|
// TODO: Windows etc.
|
|
16689
17193
|
// (the linux implementation may also work on BSD, someone should test)
|
|
16690
|
-
static void set_numa_thread_affinity(int thread_n
|
|
17194
|
+
static void set_numa_thread_affinity(int thread_n) { UNUSED(thread_n); }
|
|
16691
17195
|
static void clear_numa_thread_affinity(void) {}
|
|
16692
17196
|
#endif
|
|
16693
17197
|
|
|
@@ -16893,29 +17397,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
16893
17397
|
} break;
|
|
16894
17398
|
case GGML_OP_MAP_CUSTOM1:
|
|
16895
17399
|
{
|
|
16896
|
-
struct ggml_map_custom1_op_params
|
|
16897
|
-
|
|
17400
|
+
struct ggml_map_custom1_op_params p;
|
|
17401
|
+
memcpy(&p, node->op_params, sizeof(p));
|
|
17402
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
|
16898
17403
|
n_tasks = n_threads;
|
|
16899
17404
|
} else {
|
|
16900
|
-
n_tasks = MIN(p
|
|
17405
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
|
16901
17406
|
}
|
|
16902
17407
|
} break;
|
|
16903
17408
|
case GGML_OP_MAP_CUSTOM2:
|
|
16904
17409
|
{
|
|
16905
|
-
struct ggml_map_custom2_op_params
|
|
16906
|
-
|
|
17410
|
+
struct ggml_map_custom2_op_params p;
|
|
17411
|
+
memcpy(&p, node->op_params, sizeof(p));
|
|
17412
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
|
16907
17413
|
n_tasks = n_threads;
|
|
16908
17414
|
} else {
|
|
16909
|
-
n_tasks = MIN(p
|
|
17415
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
|
16910
17416
|
}
|
|
16911
17417
|
} break;
|
|
16912
17418
|
case GGML_OP_MAP_CUSTOM3:
|
|
16913
17419
|
{
|
|
16914
|
-
struct ggml_map_custom3_op_params
|
|
16915
|
-
|
|
17420
|
+
struct ggml_map_custom3_op_params p;
|
|
17421
|
+
memcpy(&p, node->op_params, sizeof(p));
|
|
17422
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
|
16916
17423
|
n_tasks = n_threads;
|
|
16917
17424
|
} else {
|
|
16918
|
-
n_tasks = MIN(p
|
|
17425
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
|
16919
17426
|
}
|
|
16920
17427
|
} break;
|
|
16921
17428
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
|
@@ -16987,10 +17494,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
16987
17494
|
|
|
16988
17495
|
const int n_threads = state->shared->n_threads;
|
|
16989
17496
|
|
|
16990
|
-
set_numa_thread_affinity(state->ith
|
|
17497
|
+
set_numa_thread_affinity(state->ith);
|
|
16991
17498
|
|
|
16992
17499
|
int node_n = -1;
|
|
16993
|
-
int task_phase =
|
|
17500
|
+
int task_phase = GGML_TASK_TYPE_FINALIZE;
|
|
16994
17501
|
|
|
16995
17502
|
while (true) {
|
|
16996
17503
|
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
|
@@ -17002,7 +17509,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
17002
17509
|
// all other threads are finished and spinning
|
|
17003
17510
|
// do finalize and init here so we don't have synchronize again
|
|
17004
17511
|
struct ggml_compute_params params = {
|
|
17005
|
-
/*.type =*/
|
|
17512
|
+
/*.type =*/ GGML_TASK_TYPE_FINALIZE,
|
|
17006
17513
|
/*.ith =*/ 0,
|
|
17007
17514
|
/*.nth =*/ 0,
|
|
17008
17515
|
/*.wsize =*/ cplan->work_size,
|
|
@@ -17033,17 +17540,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
17033
17540
|
if (n_tasks == 1) {
|
|
17034
17541
|
/* INIT */
|
|
17035
17542
|
if (GGML_OP_HAS_INIT[node->op]) {
|
|
17036
|
-
params.type =
|
|
17543
|
+
params.type = GGML_TASK_TYPE_INIT;
|
|
17037
17544
|
ggml_compute_forward(¶ms, node);
|
|
17038
17545
|
}
|
|
17039
17546
|
|
|
17040
17547
|
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
|
|
17041
17548
|
// they do something more efficient than spinning (?)
|
|
17042
|
-
params.type =
|
|
17549
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
|
17043
17550
|
ggml_compute_forward(¶ms, node);
|
|
17044
17551
|
|
|
17045
17552
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
|
17046
|
-
params.type =
|
|
17553
|
+
params.type = GGML_TASK_TYPE_FINALIZE;
|
|
17047
17554
|
ggml_compute_forward(¶ms, node);
|
|
17048
17555
|
}
|
|
17049
17556
|
|
|
@@ -17057,7 +17564,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
17057
17564
|
}
|
|
17058
17565
|
}
|
|
17059
17566
|
|
|
17060
|
-
task_phase =
|
|
17567
|
+
task_phase = GGML_TASK_TYPE_INIT;
|
|
17061
17568
|
atomic_store(&state->shared->n_active, n_threads);
|
|
17062
17569
|
atomic_store(&state->shared->node_n, node_n);
|
|
17063
17570
|
atomic_store(&state->shared->node_task, task_phase);
|
|
@@ -17074,7 +17581,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
17074
17581
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
|
17075
17582
|
|
|
17076
17583
|
struct ggml_compute_params params = {
|
|
17077
|
-
/*.type =*/
|
|
17584
|
+
/*.type =*/ GGML_TASK_TYPE_INIT,
|
|
17078
17585
|
/*.ith =*/ state->ith,
|
|
17079
17586
|
/*.nth =*/ n_tasks,
|
|
17080
17587
|
/*.wsize =*/ cplan->work_size,
|
|
@@ -17088,7 +17595,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
17088
17595
|
}
|
|
17089
17596
|
|
|
17090
17597
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
|
17091
|
-
task_phase =
|
|
17598
|
+
task_phase = GGML_TASK_TYPE_COMPUTE;
|
|
17092
17599
|
atomic_store(&state->shared->n_active, n_threads);
|
|
17093
17600
|
atomic_store(&state->shared->node_task, task_phase);
|
|
17094
17601
|
}
|
|
@@ -17103,12 +17610,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
17103
17610
|
}
|
|
17104
17611
|
|
|
17105
17612
|
if (state->ith < n_tasks) {
|
|
17106
|
-
params.type =
|
|
17613
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
|
17107
17614
|
ggml_compute_forward(¶ms, node);
|
|
17108
17615
|
}
|
|
17109
17616
|
|
|
17110
17617
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
|
17111
|
-
task_phase =
|
|
17618
|
+
task_phase = GGML_TASK_TYPE_FINALIZE;
|
|
17112
17619
|
atomic_store(&state->shared->n_active, n_threads);
|
|
17113
17620
|
atomic_store(&state->shared->node_task, task_phase);
|
|
17114
17621
|
}
|
|
@@ -17344,7 +17851,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
|
17344
17851
|
/*.n_threads =*/ n_threads,
|
|
17345
17852
|
/*.n_active =*/ n_threads,
|
|
17346
17853
|
/*.node_n =*/ -1,
|
|
17347
|
-
/*.node_task =*/
|
|
17854
|
+
/*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
|
|
17348
17855
|
/*.abort_callback =*/ NULL,
|
|
17349
17856
|
/*.abort_callback_data =*/ NULL,
|
|
17350
17857
|
};
|
|
@@ -17412,7 +17919,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
|
17412
17919
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
|
17413
17920
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
|
17414
17921
|
|
|
17415
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
|
17922
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
|
17416
17923
|
|
|
17417
17924
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
|
17418
17925
|
|
|
@@ -17793,7 +18300,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
|
17793
18300
|
|
|
17794
18301
|
ptr += ggml_nbytes(tensor);
|
|
17795
18302
|
|
|
17796
|
-
fprintf(stderr, "%s: loaded leaf %
|
|
18303
|
+
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
|
17797
18304
|
}
|
|
17798
18305
|
}
|
|
17799
18306
|
|
|
@@ -17896,7 +18403,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
|
17896
18403
|
|
|
17897
18404
|
result->nodes[i] = tensor;
|
|
17898
18405
|
|
|
17899
|
-
fprintf(stderr, "%s: loaded node %
|
|
18406
|
+
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
|
17900
18407
|
}
|
|
17901
18408
|
}
|
|
17902
18409
|
}
|
|
@@ -18220,7 +18727,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18220
18727
|
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
|
18221
18728
|
|
|
18222
18729
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
|
18223
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
|
18730
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
|
18224
18731
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
|
18225
18732
|
|
|
18226
18733
|
bool cancel = false;
|
|
@@ -18232,7 +18739,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18232
18739
|
if (callback) {
|
|
18233
18740
|
callback(callback_data, accum_step, &sched, &cancel);
|
|
18234
18741
|
if (cancel) {
|
|
18235
|
-
return
|
|
18742
|
+
return GGML_OPT_RESULT_CANCEL;
|
|
18236
18743
|
}
|
|
18237
18744
|
}
|
|
18238
18745
|
// ggml_graph_reset (gf);
|
|
@@ -18323,7 +18830,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18323
18830
|
if (callback) {
|
|
18324
18831
|
callback(callback_data, accum_step, &sched, &cancel);
|
|
18325
18832
|
if (cancel) {
|
|
18326
|
-
return
|
|
18833
|
+
return GGML_OPT_RESULT_CANCEL;;
|
|
18327
18834
|
}
|
|
18328
18835
|
}
|
|
18329
18836
|
// ggml_graph_reset (gf);
|
|
@@ -18340,7 +18847,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18340
18847
|
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
|
18341
18848
|
GGML_PRINT_DEBUG("converged\n");
|
|
18342
18849
|
|
|
18343
|
-
return
|
|
18850
|
+
return GGML_OPT_RESULT_OK;
|
|
18344
18851
|
}
|
|
18345
18852
|
|
|
18346
18853
|
// delta-based convergence test
|
|
@@ -18350,7 +18857,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18350
18857
|
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
|
|
18351
18858
|
|
|
18352
18859
|
if (fabsf(rate) < params.delta) {
|
|
18353
|
-
return
|
|
18860
|
+
return GGML_OPT_RESULT_OK;
|
|
18354
18861
|
}
|
|
18355
18862
|
}
|
|
18356
18863
|
|
|
@@ -18366,7 +18873,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18366
18873
|
++n_no_improvement[0];
|
|
18367
18874
|
|
|
18368
18875
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
|
18369
|
-
return
|
|
18876
|
+
return GGML_OPT_RESULT_OK;
|
|
18370
18877
|
}
|
|
18371
18878
|
}
|
|
18372
18879
|
}
|
|
@@ -18384,7 +18891,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
|
18384
18891
|
}
|
|
18385
18892
|
}
|
|
18386
18893
|
|
|
18387
|
-
return
|
|
18894
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
|
18388
18895
|
}
|
|
18389
18896
|
|
|
18390
18897
|
//
|
|
@@ -18465,7 +18972,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
|
18465
18972
|
float sched = 0;
|
|
18466
18973
|
callback(callback_data, accum_step, &sched, cancel);
|
|
18467
18974
|
if (*cancel) {
|
|
18468
|
-
return
|
|
18975
|
+
return GGML_OPT_RESULT_CANCEL;
|
|
18469
18976
|
}
|
|
18470
18977
|
}
|
|
18471
18978
|
// ggml_graph_reset (gf);
|
|
@@ -18521,7 +19028,9 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
|
18521
19028
|
(*step) *= width;
|
|
18522
19029
|
}
|
|
18523
19030
|
|
|
18524
|
-
|
|
19031
|
+
GGML_ASSERT(false && "line search failed");
|
|
19032
|
+
|
|
19033
|
+
return GGML_LINESEARCH_FAIL;
|
|
18525
19034
|
}
|
|
18526
19035
|
|
|
18527
19036
|
static enum ggml_opt_result ggml_opt_lbfgs(
|
|
@@ -18536,7 +19045,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18536
19045
|
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
|
18537
19046
|
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
|
18538
19047
|
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
|
18539
|
-
return
|
|
19048
|
+
return GGML_OPT_RESULT_INVALID_WOLFE;
|
|
18540
19049
|
}
|
|
18541
19050
|
}
|
|
18542
19051
|
|
|
@@ -18565,7 +19074,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18565
19074
|
}
|
|
18566
19075
|
|
|
18567
19076
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
|
18568
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
|
19077
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
|
18569
19078
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
|
18570
19079
|
|
|
18571
19080
|
float * x = opt->lbfgs.x->data; // current parameters
|
|
@@ -18606,7 +19115,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18606
19115
|
float sched = 0;
|
|
18607
19116
|
callback(callback_data, accum_step, &sched, &cancel);
|
|
18608
19117
|
if (cancel) {
|
|
18609
|
-
return
|
|
19118
|
+
return GGML_OPT_RESULT_CANCEL;
|
|
18610
19119
|
}
|
|
18611
19120
|
}
|
|
18612
19121
|
// ggml_graph_reset (gf);
|
|
@@ -18634,7 +19143,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18634
19143
|
|
|
18635
19144
|
// already optimized
|
|
18636
19145
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
|
18637
|
-
return
|
|
19146
|
+
return GGML_OPT_RESULT_OK;
|
|
18638
19147
|
}
|
|
18639
19148
|
|
|
18640
19149
|
if (opt->just_initialized) {
|
|
@@ -18679,7 +19188,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18679
19188
|
// way to test and don't want to break something with so many changes lined up
|
|
18680
19189
|
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
|
|
18681
19190
|
if (cancel) {
|
|
18682
|
-
return
|
|
19191
|
+
return GGML_OPT_RESULT_CANCEL;
|
|
18683
19192
|
}
|
|
18684
19193
|
|
|
18685
19194
|
if (ls < 0) {
|
|
@@ -18702,7 +19211,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18702
19211
|
}
|
|
18703
19212
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
|
18704
19213
|
// converged
|
|
18705
|
-
return
|
|
19214
|
+
return GGML_OPT_RESULT_OK;
|
|
18706
19215
|
}
|
|
18707
19216
|
|
|
18708
19217
|
// delta-based convergence test
|
|
@@ -18712,7 +19221,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18712
19221
|
const float rate = (pf[k[0]%params.past] - fx)/fx;
|
|
18713
19222
|
|
|
18714
19223
|
if (fabsf(rate) < params.delta) {
|
|
18715
|
-
return
|
|
19224
|
+
return GGML_OPT_RESULT_OK;
|
|
18716
19225
|
}
|
|
18717
19226
|
}
|
|
18718
19227
|
|
|
@@ -18728,14 +19237,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18728
19237
|
n_no_improvement[0]++;
|
|
18729
19238
|
|
|
18730
19239
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
|
18731
|
-
return
|
|
19240
|
+
return GGML_OPT_RESULT_OK;
|
|
18732
19241
|
}
|
|
18733
19242
|
}
|
|
18734
19243
|
}
|
|
18735
19244
|
|
|
18736
19245
|
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
|
|
18737
19246
|
// reached the maximum number of iterations
|
|
18738
|
-
return
|
|
19247
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
|
18739
19248
|
}
|
|
18740
19249
|
|
|
18741
19250
|
// update vectors s and y:
|
|
@@ -18789,17 +19298,19 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
|
18789
19298
|
step[0] = 1.0;
|
|
18790
19299
|
}
|
|
18791
19300
|
|
|
18792
|
-
|
|
19301
|
+
GGML_ASSERT(false && "lbfgs failed");
|
|
19302
|
+
|
|
19303
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
|
18793
19304
|
}
|
|
18794
19305
|
|
|
18795
19306
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18796
19307
|
struct ggml_opt_params result;
|
|
18797
19308
|
|
|
18798
19309
|
switch (type) {
|
|
18799
|
-
case
|
|
19310
|
+
case GGML_OPT_TYPE_ADAM:
|
|
18800
19311
|
{
|
|
18801
19312
|
result = (struct ggml_opt_params) {
|
|
18802
|
-
.type =
|
|
19313
|
+
.type = GGML_OPT_TYPE_ADAM,
|
|
18803
19314
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
|
18804
19315
|
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
|
18805
19316
|
.past = 0,
|
|
@@ -18827,10 +19338,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
|
18827
19338
|
},
|
|
18828
19339
|
};
|
|
18829
19340
|
} break;
|
|
18830
|
-
case
|
|
19341
|
+
case GGML_OPT_TYPE_LBFGS:
|
|
18831
19342
|
{
|
|
18832
19343
|
result = (struct ggml_opt_params) {
|
|
18833
|
-
.type =
|
|
19344
|
+
.type = GGML_OPT_TYPE_LBFGS,
|
|
18834
19345
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
|
18835
19346
|
.n_threads = 1,
|
|
18836
19347
|
.past = 0,
|
|
@@ -18875,12 +19386,12 @@ GGML_API void ggml_opt_init(
|
|
|
18875
19386
|
opt->just_initialized = true;
|
|
18876
19387
|
if (opt->ctx == NULL) {
|
|
18877
19388
|
struct ggml_init_params ctx_opt_params;
|
|
18878
|
-
if (opt->params.type ==
|
|
19389
|
+
if (opt->params.type == GGML_OPT_TYPE_ADAM) {
|
|
18879
19390
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
|
|
18880
19391
|
if (opt->params.past > 0) {
|
|
18881
19392
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
|
18882
19393
|
}
|
|
18883
|
-
} else if (opt->params.type ==
|
|
19394
|
+
} else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
|
|
18884
19395
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
|
|
18885
19396
|
if (opt->params.past > 0) {
|
|
18886
19397
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
|
@@ -18892,7 +19403,7 @@ GGML_API void ggml_opt_init(
|
|
|
18892
19403
|
opt->ctx = ggml_init(ctx_opt_params);
|
|
18893
19404
|
}
|
|
18894
19405
|
switch (opt->params.type) {
|
|
18895
|
-
case
|
|
19406
|
+
case GGML_OPT_TYPE_ADAM:
|
|
18896
19407
|
{
|
|
18897
19408
|
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
18898
19409
|
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
@@ -18906,7 +19417,7 @@ GGML_API void ggml_opt_init(
|
|
|
18906
19417
|
ggml_set_zero(opt->adam.pf);
|
|
18907
19418
|
}
|
|
18908
19419
|
} break;
|
|
18909
|
-
case
|
|
19420
|
+
case GGML_OPT_TYPE_LBFGS:
|
|
18910
19421
|
{
|
|
18911
19422
|
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
18912
19423
|
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
@@ -18950,13 +19461,13 @@ enum ggml_opt_result ggml_opt(
|
|
|
18950
19461
|
|
|
18951
19462
|
ctx = ggml_init(params_ctx);
|
|
18952
19463
|
if (ctx == NULL) {
|
|
18953
|
-
return
|
|
19464
|
+
return GGML_OPT_RESULT_NO_CONTEXT;
|
|
18954
19465
|
}
|
|
18955
19466
|
|
|
18956
19467
|
free_ctx = true;
|
|
18957
19468
|
}
|
|
18958
19469
|
|
|
18959
|
-
enum ggml_opt_result result =
|
|
19470
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
|
18960
19471
|
|
|
18961
19472
|
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
|
|
18962
19473
|
|
|
@@ -18995,14 +19506,14 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
|
18995
19506
|
void * callback_data) {
|
|
18996
19507
|
|
|
18997
19508
|
// build forward + backward compute graphs
|
|
18998
|
-
enum ggml_opt_result result =
|
|
19509
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
|
18999
19510
|
|
|
19000
19511
|
switch (opt->params.type) {
|
|
19001
|
-
case
|
|
19512
|
+
case GGML_OPT_TYPE_ADAM:
|
|
19002
19513
|
{
|
|
19003
19514
|
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
|
19004
19515
|
} break;
|
|
19005
|
-
case
|
|
19516
|
+
case GGML_OPT_TYPE_LBFGS:
|
|
19006
19517
|
{
|
|
19007
19518
|
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
|
19008
19519
|
} break;
|
|
@@ -19037,9 +19548,12 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
|
19037
19548
|
ggml_critical_section_start();
|
|
19038
19549
|
|
|
19039
19550
|
switch (type) {
|
|
19040
|
-
case GGML_TYPE_IQ2_XXS:
|
|
19041
|
-
case GGML_TYPE_IQ2_XS:
|
|
19551
|
+
case GGML_TYPE_IQ2_XXS:
|
|
19552
|
+
case GGML_TYPE_IQ2_XS:
|
|
19553
|
+
case GGML_TYPE_IQ2_S:
|
|
19554
|
+
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
|
19042
19555
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
|
19556
|
+
case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
|
|
19043
19557
|
default: // nothing
|
|
19044
19558
|
break;
|
|
19045
19559
|
}
|
|
@@ -19050,8 +19564,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
|
19050
19564
|
void ggml_quantize_free(void) {
|
|
19051
19565
|
ggml_critical_section_start();
|
|
19052
19566
|
|
|
19053
|
-
iq2xs_free_impl(
|
|
19054
|
-
iq2xs_free_impl(
|
|
19567
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
|
|
19568
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XS);
|
|
19569
|
+
iq2xs_free_impl(GGML_TYPE_IQ1_S);
|
|
19570
|
+
iq3xs_free_impl(256);
|
|
19055
19571
|
|
|
19056
19572
|
ggml_critical_section_end();
|
|
19057
19573
|
}
|
|
@@ -19186,7 +19702,8 @@ size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t *
|
|
|
19186
19702
|
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
|
19187
19703
|
return
|
|
19188
19704
|
type == GGML_TYPE_IQ2_XXS ||
|
|
19189
|
-
type == GGML_TYPE_IQ2_XS
|
|
19705
|
+
type == GGML_TYPE_IQ2_XS ||
|
|
19706
|
+
type == GGML_TYPE_IQ1_S;
|
|
19190
19707
|
}
|
|
19191
19708
|
|
|
19192
19709
|
size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start,
|
|
@@ -19311,6 +19828,56 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
|
19311
19828
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
19312
19829
|
GGML_ASSERT(result == row_size * nrows);
|
|
19313
19830
|
} break;
|
|
19831
|
+
case GGML_TYPE_IQ3_S:
|
|
19832
|
+
{
|
|
19833
|
+
GGML_ASSERT(start % QK_K == 0);
|
|
19834
|
+
GGML_ASSERT(start % n_per_row == 0);
|
|
19835
|
+
size_t start_row = start / n_per_row;
|
|
19836
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
|
19837
|
+
result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
19838
|
+
GGML_ASSERT(result == row_size * nrows);
|
|
19839
|
+
} break;
|
|
19840
|
+
case GGML_TYPE_IQ2_S:
|
|
19841
|
+
{
|
|
19842
|
+
GGML_ASSERT(start % QK_K == 0);
|
|
19843
|
+
GGML_ASSERT(start % n_per_row == 0);
|
|
19844
|
+
size_t start_row = start / n_per_row;
|
|
19845
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
|
19846
|
+
result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
19847
|
+
GGML_ASSERT(result == row_size * nrows);
|
|
19848
|
+
} break;
|
|
19849
|
+
case GGML_TYPE_IQ1_S:
|
|
19850
|
+
{
|
|
19851
|
+
GGML_ASSERT(start % QK_K == 0);
|
|
19852
|
+
GGML_ASSERT(start % n_per_row == 0);
|
|
19853
|
+
size_t start_row = start / n_per_row;
|
|
19854
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
|
19855
|
+
result = quantize_iq1_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
19856
|
+
GGML_ASSERT(result == row_size * nrows);
|
|
19857
|
+
} break;
|
|
19858
|
+
case GGML_TYPE_IQ4_NL:
|
|
19859
|
+
#if QK_K == 64
|
|
19860
|
+
case GGML_TYPE_IQ4_XS:
|
|
19861
|
+
#endif
|
|
19862
|
+
{
|
|
19863
|
+
GGML_ASSERT(start % QK4_NL == 0);
|
|
19864
|
+
GGML_ASSERT(start % n_per_row == 0);
|
|
19865
|
+
size_t start_row = start / n_per_row;
|
|
19866
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
|
19867
|
+
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
19868
|
+
GGML_ASSERT(result == row_size * nrows);
|
|
19869
|
+
} break;
|
|
19870
|
+
#if QK_K != 64
|
|
19871
|
+
case GGML_TYPE_IQ4_XS:
|
|
19872
|
+
{
|
|
19873
|
+
GGML_ASSERT(start % QK_K == 0);
|
|
19874
|
+
GGML_ASSERT(start % n_per_row == 0);
|
|
19875
|
+
size_t start_row = start / n_per_row;
|
|
19876
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
|
19877
|
+
result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
19878
|
+
GGML_ASSERT(result == row_size * nrows);
|
|
19879
|
+
} break;
|
|
19880
|
+
#endif
|
|
19314
19881
|
case GGML_TYPE_F16:
|
|
19315
19882
|
{
|
|
19316
19883
|
size_t elemsize = sizeof(ggml_fp16_t);
|