llama_cpp 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +21 -10
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +8 -1
- data/vendor/tmp/llama.cpp/Makefile +43 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +73 -43
- data/vendor/tmp/llama.cpp/ggml-backend.c +18 -9
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +560 -346
- data/vendor/tmp/llama.cpp/ggml-impl.h +20 -7
- data/vendor/tmp/llama.cpp/ggml-metal.m +99 -11
- data/vendor/tmp/llama.cpp/ggml-metal.metal +608 -9
- data/vendor/tmp/llama.cpp/ggml-quants.c +908 -54
- data/vendor/tmp/llama.cpp/ggml-quants.h +25 -2
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +81 -203
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +124 -52
- data/vendor/tmp/llama.cpp/ggml.c +948 -504
- data/vendor/tmp/llama.cpp/ggml.h +24 -11
- data/vendor/tmp/llama.cpp/llama.cpp +688 -163
- data/vendor/tmp/llama.cpp/llama.h +37 -1
- data/vendor/tmp/llama.cpp/scripts/get-flags.mk +1 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -23,6 +23,9 @@
|
|
23
23
|
#include <limits.h>
|
24
24
|
#include <stdarg.h>
|
25
25
|
#include <signal.h>
|
26
|
+
#if defined(__gnu_linux__)
|
27
|
+
#include <syscall.h>
|
28
|
+
#endif
|
26
29
|
|
27
30
|
#ifdef GGML_USE_METAL
|
28
31
|
#include <unistd.h>
|
@@ -270,6 +273,8 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|
270
273
|
#include <Accelerate/Accelerate.h>
|
271
274
|
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
272
275
|
#include "ggml-opencl.h"
|
276
|
+
#elif defined(GGML_USE_VULKAN)
|
277
|
+
#include "ggml-vulkan.h"
|
273
278
|
#endif
|
274
279
|
#elif defined(GGML_USE_OPENBLAS)
|
275
280
|
#if defined(GGML_BLAS_USE_MKL)
|
@@ -318,7 +323,7 @@ float ggml_table_f32_f16[1 << 16];
|
|
318
323
|
// note: do not use these inside ggml.c
|
319
324
|
// these are meant to be used via the ggml.h API
|
320
325
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
321
|
-
return
|
326
|
+
return GGML_FP16_TO_FP32(x);
|
322
327
|
}
|
323
328
|
|
324
329
|
ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
@@ -673,6 +678,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
673
678
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
674
679
|
.nrows = 1,
|
675
680
|
},
|
681
|
+
[GGML_TYPE_IQ1_S] = {
|
682
|
+
.type_name = "iq1_s",
|
683
|
+
.blck_size = QK_K,
|
684
|
+
.type_size = sizeof(block_iq1_s),
|
685
|
+
.is_quantized = true,
|
686
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq1_s,
|
687
|
+
.from_float = NULL,
|
688
|
+
.from_float_reference = NULL,
|
689
|
+
.vec_dot = ggml_vec_dot_iq1_s_q8_K,
|
690
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
691
|
+
.nrows = 1,
|
692
|
+
},
|
693
|
+
[GGML_TYPE_IQ4_NL] = {
|
694
|
+
.type_name = "iq4_nl",
|
695
|
+
.blck_size = QK4_NL,
|
696
|
+
.type_size = sizeof(block_iq4_nl),
|
697
|
+
.is_quantized = true,
|
698
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
699
|
+
.from_float = quantize_row_iq4_nl,
|
700
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_nl_reference,
|
701
|
+
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
|
702
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
703
|
+
.nrows = 1,
|
704
|
+
},
|
676
705
|
[GGML_TYPE_Q8_K] = {
|
677
706
|
.type_name = "q8_K",
|
678
707
|
.blck_size = QK_K,
|
@@ -769,7 +798,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
769
798
|
#define GGML_F16x8 float16x8_t
|
770
799
|
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
771
800
|
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
772
|
-
#define GGML_F16x8_LOAD
|
801
|
+
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
|
773
802
|
#define GGML_F16x8_STORE vst1q_f16
|
774
803
|
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
775
804
|
#define GGML_F16x8_ADD vaddq_f16
|
@@ -812,7 +841,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
812
841
|
#define GGML_F32Cx4 float32x4_t
|
813
842
|
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
814
843
|
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
815
|
-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x))
|
844
|
+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
|
816
845
|
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
817
846
|
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
818
847
|
#define GGML_F32Cx4_ADD vaddq_f32
|
@@ -868,7 +897,7 @@ do { \
|
|
868
897
|
const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
|
869
898
|
_mm256_extractf128_ps(x[0], 1)); \
|
870
899
|
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
871
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1));
|
900
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
872
901
|
} while (0)
|
873
902
|
// TODO: is this optimal ?
|
874
903
|
|
@@ -1149,7 +1178,7 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
|
|
1149
1178
|
x[i] = _mm_add_ps(x[i], x[offset+i]); \
|
1150
1179
|
} \
|
1151
1180
|
const __m128 t0 = _mm_hadd_ps(x[0], x[0]); \
|
1152
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t0, t0));
|
1181
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t0, t0)); \
|
1153
1182
|
}
|
1154
1183
|
// TODO: is this optimal ?
|
1155
1184
|
|
@@ -1954,9 +1983,16 @@ struct ggml_numa_node {
|
|
1954
1983
|
};
|
1955
1984
|
|
1956
1985
|
struct ggml_numa_nodes {
|
1986
|
+
enum ggml_numa_strategy numa_strategy;
|
1957
1987
|
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
|
1958
1988
|
uint32_t n_nodes;
|
1959
1989
|
uint32_t total_cpus; // hardware threads on system
|
1990
|
+
uint32_t current_node; // node on which main process is execting
|
1991
|
+
#if defined(__gnu_linux__)
|
1992
|
+
cpu_set_t cpuset; // cpuset from numactl
|
1993
|
+
#else
|
1994
|
+
uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
|
1995
|
+
#endif
|
1960
1996
|
};
|
1961
1997
|
|
1962
1998
|
//
|
@@ -1990,18 +2026,40 @@ inline static void ggml_critical_section_end(void) {
|
|
1990
2026
|
atomic_fetch_sub(&g_state_barrier, 1);
|
1991
2027
|
}
|
1992
2028
|
|
1993
|
-
|
2029
|
+
#if defined(__gnu_linux__)
|
2030
|
+
static cpu_set_t ggml_get_numa_affinity(void) {
|
2031
|
+
cpu_set_t cpuset;
|
2032
|
+
pthread_t thread;
|
2033
|
+
thread = pthread_self();
|
2034
|
+
CPU_ZERO(&cpuset);
|
2035
|
+
pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
2036
|
+
return cpuset;
|
2037
|
+
}
|
2038
|
+
#else
|
2039
|
+
static uint32_t ggml_get_numa_affinity(void) {
|
2040
|
+
return 0; // no NUMA support
|
2041
|
+
}
|
2042
|
+
#endif
|
2043
|
+
|
2044
|
+
void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
1994
2045
|
if (g_state.numa.n_nodes > 0) {
|
1995
2046
|
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
|
1996
2047
|
|
1997
2048
|
return;
|
1998
2049
|
}
|
1999
2050
|
|
2000
|
-
#
|
2051
|
+
#if defined(__gnu_linux__)
|
2001
2052
|
struct stat st;
|
2002
2053
|
char path[256];
|
2003
2054
|
int rv;
|
2004
2055
|
|
2056
|
+
// set numa scheme
|
2057
|
+
g_state.numa.numa_strategy = numa_flag;
|
2058
|
+
|
2059
|
+
GGML_PRINT_DEBUG("numa strategy %u\n",g_state.numa.numa_strategy);
|
2060
|
+
|
2061
|
+
g_state.numa.cpuset = ggml_get_numa_affinity();
|
2062
|
+
|
2005
2063
|
// enumerate nodes
|
2006
2064
|
while (g_state.numa.n_nodes < GGML_NUMA_MAX_NODES) {
|
2007
2065
|
rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", g_state.numa.n_nodes);
|
@@ -2020,11 +2078,23 @@ void ggml_numa_init(void) {
|
|
2020
2078
|
|
2021
2079
|
GGML_PRINT_DEBUG("found %u numa nodes, %u CPUs\n", g_state.numa.n_nodes, g_state.numa.total_cpus);
|
2022
2080
|
|
2023
|
-
|
2081
|
+
// figure out which node we're on
|
2082
|
+
uint current_cpu;
|
2083
|
+
int getcpu_ret = 0;
|
2084
|
+
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 28)
|
2085
|
+
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
2086
|
+
#else
|
2087
|
+
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
2088
|
+
getcpu_ret = syscall(SYS_getcpu,¤t_cpu,&g_state.numa.current_node);
|
2089
|
+
#endif
|
2090
|
+
|
2091
|
+
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
2024
2092
|
g_state.numa.n_nodes = 0;
|
2025
2093
|
return;
|
2026
2094
|
}
|
2027
2095
|
|
2096
|
+
GGML_PRINT_DEBUG("found our process on numa node %u, CPU %u\n", g_state.numa.current_node, current_cpu);
|
2097
|
+
|
2028
2098
|
for (uint32_t n = 0; n < g_state.numa.n_nodes; ++n) {
|
2029
2099
|
struct ggml_numa_node * node = &g_state.numa.nodes[n];
|
2030
2100
|
GGML_PRINT_DEBUG("CPUs on node %u:", n);
|
@@ -2051,6 +2121,7 @@ void ggml_numa_init(void) {
|
|
2051
2121
|
}
|
2052
2122
|
}
|
2053
2123
|
#else
|
2124
|
+
GGML_UNUSED(numa_flag);
|
2054
2125
|
// TODO
|
2055
2126
|
#endif
|
2056
2127
|
}
|
@@ -2231,6 +2302,8 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2231
2302
|
case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
|
2232
2303
|
case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
|
2233
2304
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2305
|
+
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
2306
|
+
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
2234
2307
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2235
2308
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2236
2309
|
}
|
@@ -3184,7 +3257,7 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
|
3184
3257
|
}
|
3185
3258
|
|
3186
3259
|
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
|
3187
|
-
strncpy(tensor->name, name, sizeof(tensor->name));
|
3260
|
+
strncpy(tensor->name, name, sizeof(tensor->name) - 1);
|
3188
3261
|
tensor->name[sizeof(tensor->name) - 1] = '\0';
|
3189
3262
|
return tensor;
|
3190
3263
|
}
|
@@ -5060,16 +5133,28 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5060
5133
|
struct ggml_context * ctx,
|
5061
5134
|
struct ggml_tensor * a,
|
5062
5135
|
struct ggml_tensor * mask,
|
5136
|
+
struct ggml_tensor * pos,
|
5063
5137
|
float scale,
|
5138
|
+
float max_bias,
|
5064
5139
|
bool inplace) {
|
5065
5140
|
GGML_ASSERT(ggml_is_contiguous(a));
|
5141
|
+
|
5066
5142
|
if (mask) {
|
5067
5143
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
5068
|
-
GGML_ASSERT(mask
|
5069
|
-
GGML_ASSERT(mask->ne[3] == 1);
|
5144
|
+
GGML_ASSERT(ggml_is_matrix(mask));
|
5070
5145
|
GGML_ASSERT(ggml_can_repeat_rows(mask, a));
|
5071
5146
|
}
|
5072
5147
|
|
5148
|
+
if (pos) {
|
5149
|
+
GGML_ASSERT(ggml_is_vector(pos));
|
5150
|
+
GGML_ASSERT(pos->type == GGML_TYPE_F32);
|
5151
|
+
GGML_ASSERT(pos->ne[0] == a->ne[0]);
|
5152
|
+
}
|
5153
|
+
|
5154
|
+
if (max_bias > 0.0f) {
|
5155
|
+
GGML_ASSERT(pos);
|
5156
|
+
}
|
5157
|
+
|
5073
5158
|
bool is_node = false;
|
5074
5159
|
|
5075
5160
|
if (a->grad) {
|
@@ -5078,13 +5163,14 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5078
5163
|
|
5079
5164
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5080
5165
|
|
5081
|
-
float params[] = { scale };
|
5166
|
+
float params[] = { scale, max_bias };
|
5082
5167
|
ggml_set_op_params(result, params, sizeof(params));
|
5083
5168
|
|
5084
5169
|
result->op = GGML_OP_SOFT_MAX;
|
5085
5170
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5086
5171
|
result->src[0] = a;
|
5087
5172
|
result->src[1] = mask;
|
5173
|
+
result->src[2] = pos;
|
5088
5174
|
|
5089
5175
|
return result;
|
5090
5176
|
}
|
@@ -5092,21 +5178,23 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5092
5178
|
struct ggml_tensor * ggml_soft_max(
|
5093
5179
|
struct ggml_context * ctx,
|
5094
5180
|
struct ggml_tensor * a) {
|
5095
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, false);
|
5181
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, false);
|
5096
5182
|
}
|
5097
5183
|
|
5098
5184
|
struct ggml_tensor * ggml_soft_max_inplace(
|
5099
5185
|
struct ggml_context * ctx,
|
5100
5186
|
struct ggml_tensor * a) {
|
5101
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, true);
|
5187
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, true);
|
5102
5188
|
}
|
5103
5189
|
|
5104
5190
|
struct ggml_tensor * ggml_soft_max_ext(
|
5105
5191
|
struct ggml_context * ctx,
|
5106
5192
|
struct ggml_tensor * a,
|
5107
5193
|
struct ggml_tensor * mask,
|
5108
|
-
|
5109
|
-
|
5194
|
+
struct ggml_tensor * pos,
|
5195
|
+
float scale,
|
5196
|
+
float max_bias) {
|
5197
|
+
return ggml_soft_max_impl(ctx, a, mask, pos, scale, max_bias, false);
|
5110
5198
|
}
|
5111
5199
|
|
5112
5200
|
// ggml_soft_max_back
|
@@ -5556,7 +5644,9 @@ struct ggml_tensor * ggml_conv_2d(
|
|
5556
5644
|
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
5557
5645
|
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
5558
5646
|
|
5559
|
-
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2],
|
5647
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
|
5648
|
+
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
|
5649
|
+
|
5560
5650
|
|
5561
5651
|
return result;
|
5562
5652
|
}
|
@@ -6562,8 +6652,10 @@ void ggml_set_param(
|
|
6562
6652
|
|
6563
6653
|
static void ggml_compute_forward_dup_same_cont(
|
6564
6654
|
const struct ggml_compute_params * params,
|
6565
|
-
const struct ggml_tensor * src0,
|
6566
6655
|
struct ggml_tensor * dst) {
|
6656
|
+
|
6657
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6658
|
+
|
6567
6659
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6568
6660
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
6569
6661
|
GGML_ASSERT(src0->type == dst->type);
|
@@ -6594,8 +6686,10 @@ static void ggml_compute_forward_dup_same_cont(
|
|
6594
6686
|
}
|
6595
6687
|
static void ggml_compute_forward_dup_f16(
|
6596
6688
|
const struct ggml_compute_params * params,
|
6597
|
-
const struct ggml_tensor * src0,
|
6598
6689
|
struct ggml_tensor * dst) {
|
6690
|
+
|
6691
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6692
|
+
|
6599
6693
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6600
6694
|
|
6601
6695
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -6608,7 +6702,7 @@ static void ggml_compute_forward_dup_f16(
|
|
6608
6702
|
const int nth = params->nth; // number of threads
|
6609
6703
|
|
6610
6704
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
6611
|
-
ggml_compute_forward_dup_same_cont(params,
|
6705
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
6612
6706
|
return;
|
6613
6707
|
}
|
6614
6708
|
|
@@ -6865,8 +6959,10 @@ static void ggml_compute_forward_dup_f16(
|
|
6865
6959
|
|
6866
6960
|
static void ggml_compute_forward_dup_f32(
|
6867
6961
|
const struct ggml_compute_params * params,
|
6868
|
-
const struct ggml_tensor * src0,
|
6869
6962
|
struct ggml_tensor * dst) {
|
6963
|
+
|
6964
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6965
|
+
|
6870
6966
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6871
6967
|
|
6872
6968
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -6879,7 +6975,7 @@ static void ggml_compute_forward_dup_f32(
|
|
6879
6975
|
const int nth = params->nth; // number of threads
|
6880
6976
|
|
6881
6977
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
6882
|
-
ggml_compute_forward_dup_same_cont(params,
|
6978
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
6883
6979
|
return;
|
6884
6980
|
}
|
6885
6981
|
|
@@ -7115,8 +7211,10 @@ static void ggml_compute_forward_dup_f32(
|
|
7115
7211
|
// A simplified version of ggml_compute_forward_dup that doesn't do float upcasting, and just plain old memcpy.
|
7116
7212
|
static void ggml_compute_forward_dup_bytes(
|
7117
7213
|
const struct ggml_compute_params * params,
|
7118
|
-
const struct ggml_tensor * src0,
|
7119
7214
|
struct ggml_tensor * dst) {
|
7215
|
+
|
7216
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7217
|
+
|
7120
7218
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
7121
7219
|
GGML_ASSERT(src0->type == dst->type);
|
7122
7220
|
|
@@ -7125,7 +7223,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
7125
7223
|
}
|
7126
7224
|
|
7127
7225
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
7128
|
-
ggml_compute_forward_dup_same_cont(params,
|
7226
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
7129
7227
|
return;
|
7130
7228
|
}
|
7131
7229
|
|
@@ -7264,21 +7362,23 @@ static void ggml_compute_forward_dup_bytes(
|
|
7264
7362
|
|
7265
7363
|
static void ggml_compute_forward_dup(
|
7266
7364
|
const struct ggml_compute_params * params,
|
7267
|
-
const struct ggml_tensor * src0,
|
7268
7365
|
struct ggml_tensor * dst) {
|
7366
|
+
|
7367
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7368
|
+
|
7269
7369
|
if (src0->type == dst->type) {
|
7270
|
-
ggml_compute_forward_dup_bytes(params,
|
7370
|
+
ggml_compute_forward_dup_bytes(params, dst);
|
7271
7371
|
return;
|
7272
7372
|
}
|
7273
7373
|
|
7274
7374
|
switch (src0->type) {
|
7275
7375
|
case GGML_TYPE_F16:
|
7276
7376
|
{
|
7277
|
-
ggml_compute_forward_dup_f16(params,
|
7377
|
+
ggml_compute_forward_dup_f16(params, dst);
|
7278
7378
|
} break;
|
7279
7379
|
case GGML_TYPE_F32:
|
7280
7380
|
{
|
7281
|
-
ggml_compute_forward_dup_f32(params,
|
7381
|
+
ggml_compute_forward_dup_f32(params, dst);
|
7282
7382
|
} break;
|
7283
7383
|
default:
|
7284
7384
|
{
|
@@ -7291,9 +7391,11 @@ static void ggml_compute_forward_dup(
|
|
7291
7391
|
|
7292
7392
|
static void ggml_compute_forward_add_f32(
|
7293
7393
|
const struct ggml_compute_params * params,
|
7294
|
-
const struct ggml_tensor * src0,
|
7295
|
-
const struct ggml_tensor * src1,
|
7296
7394
|
struct ggml_tensor * dst) {
|
7395
|
+
|
7396
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7397
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7398
|
+
|
7297
7399
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
7298
7400
|
|
7299
7401
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7379,9 +7481,11 @@ static void ggml_compute_forward_add_f32(
|
|
7379
7481
|
|
7380
7482
|
static void ggml_compute_forward_add_f16_f32(
|
7381
7483
|
const struct ggml_compute_params * params,
|
7382
|
-
const struct ggml_tensor * src0,
|
7383
|
-
const struct ggml_tensor * src1,
|
7384
7484
|
struct ggml_tensor * dst) {
|
7485
|
+
|
7486
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7487
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7488
|
+
|
7385
7489
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7386
7490
|
|
7387
7491
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7456,9 +7560,11 @@ static void ggml_compute_forward_add_f16_f32(
|
|
7456
7560
|
|
7457
7561
|
static void ggml_compute_forward_add_f16_f16(
|
7458
7562
|
const struct ggml_compute_params * params,
|
7459
|
-
const struct ggml_tensor * src0,
|
7460
|
-
const struct ggml_tensor * src1,
|
7461
7563
|
struct ggml_tensor * dst) {
|
7564
|
+
|
7565
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7566
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7567
|
+
|
7462
7568
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7463
7569
|
|
7464
7570
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7510,9 +7616,11 @@ static void ggml_compute_forward_add_f16_f16(
|
|
7510
7616
|
|
7511
7617
|
static void ggml_compute_forward_add_q_f32(
|
7512
7618
|
const struct ggml_compute_params * params,
|
7513
|
-
const struct ggml_tensor * src0,
|
7514
|
-
const struct ggml_tensor * src1,
|
7515
7619
|
struct ggml_tensor * dst) {
|
7620
|
+
|
7621
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7622
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7623
|
+
|
7516
7624
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7517
7625
|
|
7518
7626
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7588,14 +7696,16 @@ static void ggml_compute_forward_add_q_f32(
|
|
7588
7696
|
|
7589
7697
|
static void ggml_compute_forward_add(
|
7590
7698
|
const struct ggml_compute_params * params,
|
7591
|
-
const struct ggml_tensor * src0,
|
7592
|
-
const struct ggml_tensor * src1,
|
7593
7699
|
struct ggml_tensor * dst) {
|
7700
|
+
|
7701
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7702
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7703
|
+
|
7594
7704
|
switch (src0->type) {
|
7595
7705
|
case GGML_TYPE_F32:
|
7596
7706
|
{
|
7597
7707
|
if (src1->type == GGML_TYPE_F32) {
|
7598
|
-
ggml_compute_forward_add_f32(params,
|
7708
|
+
ggml_compute_forward_add_f32(params, dst);
|
7599
7709
|
}
|
7600
7710
|
else {
|
7601
7711
|
GGML_ASSERT(false);
|
@@ -7604,10 +7714,10 @@ static void ggml_compute_forward_add(
|
|
7604
7714
|
case GGML_TYPE_F16:
|
7605
7715
|
{
|
7606
7716
|
if (src1->type == GGML_TYPE_F16) {
|
7607
|
-
ggml_compute_forward_add_f16_f16(params,
|
7717
|
+
ggml_compute_forward_add_f16_f16(params, dst);
|
7608
7718
|
}
|
7609
7719
|
else if (src1->type == GGML_TYPE_F32) {
|
7610
|
-
ggml_compute_forward_add_f16_f32(params,
|
7720
|
+
ggml_compute_forward_add_f16_f32(params, dst);
|
7611
7721
|
}
|
7612
7722
|
else {
|
7613
7723
|
GGML_ASSERT(false);
|
@@ -7626,8 +7736,10 @@ static void ggml_compute_forward_add(
|
|
7626
7736
|
case GGML_TYPE_IQ2_XXS:
|
7627
7737
|
case GGML_TYPE_IQ2_XS:
|
7628
7738
|
case GGML_TYPE_IQ3_XXS:
|
7739
|
+
case GGML_TYPE_IQ1_S:
|
7740
|
+
case GGML_TYPE_IQ4_NL:
|
7629
7741
|
{
|
7630
|
-
ggml_compute_forward_add_q_f32(params,
|
7742
|
+
ggml_compute_forward_add_q_f32(params, dst);
|
7631
7743
|
} break;
|
7632
7744
|
default:
|
7633
7745
|
{
|
@@ -7640,9 +7752,11 @@ static void ggml_compute_forward_add(
|
|
7640
7752
|
|
7641
7753
|
static void ggml_compute_forward_add1_f32(
|
7642
7754
|
const struct ggml_compute_params * params,
|
7643
|
-
const struct ggml_tensor * src0,
|
7644
|
-
const struct ggml_tensor * src1,
|
7645
7755
|
struct ggml_tensor * dst) {
|
7756
|
+
|
7757
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7758
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7759
|
+
|
7646
7760
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7647
7761
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7648
7762
|
|
@@ -7692,9 +7806,11 @@ static void ggml_compute_forward_add1_f32(
|
|
7692
7806
|
|
7693
7807
|
static void ggml_compute_forward_add1_f16_f32(
|
7694
7808
|
const struct ggml_compute_params * params,
|
7695
|
-
const struct ggml_tensor * src0,
|
7696
|
-
const struct ggml_tensor * src1,
|
7697
7809
|
struct ggml_tensor * dst) {
|
7810
|
+
|
7811
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7812
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7813
|
+
|
7698
7814
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7699
7815
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7700
7816
|
|
@@ -7742,9 +7858,11 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
7742
7858
|
|
7743
7859
|
static void ggml_compute_forward_add1_f16_f16(
|
7744
7860
|
const struct ggml_compute_params * params,
|
7745
|
-
const struct ggml_tensor * src0,
|
7746
|
-
const struct ggml_tensor * src1,
|
7747
7861
|
struct ggml_tensor * dst) {
|
7862
|
+
|
7863
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7864
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7865
|
+
|
7748
7866
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7749
7867
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7750
7868
|
|
@@ -7792,9 +7910,11 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
7792
7910
|
|
7793
7911
|
static void ggml_compute_forward_add1_q_f32(
|
7794
7912
|
const struct ggml_compute_params * params,
|
7795
|
-
const struct ggml_tensor * src0,
|
7796
|
-
const struct ggml_tensor * src1,
|
7797
7913
|
struct ggml_tensor * dst) {
|
7914
|
+
|
7915
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7916
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7917
|
+
|
7798
7918
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7799
7919
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7800
7920
|
|
@@ -7859,21 +7979,23 @@ static void ggml_compute_forward_add1_q_f32(
|
|
7859
7979
|
|
7860
7980
|
static void ggml_compute_forward_add1(
|
7861
7981
|
const struct ggml_compute_params * params,
|
7862
|
-
const struct ggml_tensor * src0,
|
7863
|
-
const struct ggml_tensor * src1,
|
7864
7982
|
struct ggml_tensor * dst) {
|
7983
|
+
|
7984
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7985
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7986
|
+
|
7865
7987
|
switch (src0->type) {
|
7866
7988
|
case GGML_TYPE_F32:
|
7867
7989
|
{
|
7868
|
-
ggml_compute_forward_add1_f32(params,
|
7990
|
+
ggml_compute_forward_add1_f32(params, dst);
|
7869
7991
|
} break;
|
7870
7992
|
case GGML_TYPE_F16:
|
7871
7993
|
{
|
7872
7994
|
if (src1->type == GGML_TYPE_F16) {
|
7873
|
-
ggml_compute_forward_add1_f16_f16(params,
|
7995
|
+
ggml_compute_forward_add1_f16_f16(params, dst);
|
7874
7996
|
}
|
7875
7997
|
else if (src1->type == GGML_TYPE_F32) {
|
7876
|
-
ggml_compute_forward_add1_f16_f32(params,
|
7998
|
+
ggml_compute_forward_add1_f16_f32(params, dst);
|
7877
7999
|
}
|
7878
8000
|
else {
|
7879
8001
|
GGML_ASSERT(false);
|
@@ -7893,8 +8015,10 @@ static void ggml_compute_forward_add1(
|
|
7893
8015
|
case GGML_TYPE_IQ2_XXS:
|
7894
8016
|
case GGML_TYPE_IQ2_XS:
|
7895
8017
|
case GGML_TYPE_IQ3_XXS:
|
8018
|
+
case GGML_TYPE_IQ1_S:
|
8019
|
+
case GGML_TYPE_IQ4_NL:
|
7896
8020
|
{
|
7897
|
-
ggml_compute_forward_add1_q_f32(params,
|
8021
|
+
ggml_compute_forward_add1_q_f32(params, dst);
|
7898
8022
|
} break;
|
7899
8023
|
default:
|
7900
8024
|
{
|
@@ -7907,9 +8031,11 @@ static void ggml_compute_forward_add1(
|
|
7907
8031
|
|
7908
8032
|
static void ggml_compute_forward_acc_f32(
|
7909
8033
|
const struct ggml_compute_params * params,
|
7910
|
-
const struct ggml_tensor * src0,
|
7911
|
-
const struct ggml_tensor * src1,
|
7912
8034
|
struct ggml_tensor * dst) {
|
8035
|
+
|
8036
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8037
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8038
|
+
|
7913
8039
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7914
8040
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
7915
8041
|
|
@@ -7989,14 +8115,14 @@ static void ggml_compute_forward_acc_f32(
|
|
7989
8115
|
|
7990
8116
|
static void ggml_compute_forward_acc(
|
7991
8117
|
const struct ggml_compute_params * params,
|
7992
|
-
const struct ggml_tensor * src0,
|
7993
|
-
const struct ggml_tensor * src1,
|
7994
8118
|
struct ggml_tensor * dst) {
|
7995
8119
|
|
8120
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8121
|
+
|
7996
8122
|
switch (src0->type) {
|
7997
8123
|
case GGML_TYPE_F32:
|
7998
8124
|
{
|
7999
|
-
ggml_compute_forward_acc_f32(params,
|
8125
|
+
ggml_compute_forward_acc_f32(params, dst);
|
8000
8126
|
} break;
|
8001
8127
|
case GGML_TYPE_F16:
|
8002
8128
|
case GGML_TYPE_Q4_0:
|
@@ -8013,6 +8139,8 @@ static void ggml_compute_forward_acc(
|
|
8013
8139
|
case GGML_TYPE_IQ2_XXS:
|
8014
8140
|
case GGML_TYPE_IQ2_XS:
|
8015
8141
|
case GGML_TYPE_IQ3_XXS:
|
8142
|
+
case GGML_TYPE_IQ1_S:
|
8143
|
+
case GGML_TYPE_IQ4_NL:
|
8016
8144
|
default:
|
8017
8145
|
{
|
8018
8146
|
GGML_ASSERT(false);
|
@@ -8024,9 +8152,11 @@ static void ggml_compute_forward_acc(
|
|
8024
8152
|
|
8025
8153
|
static void ggml_compute_forward_sub_f32(
|
8026
8154
|
const struct ggml_compute_params * params,
|
8027
|
-
const struct ggml_tensor * src0,
|
8028
|
-
const struct ggml_tensor * src1,
|
8029
8155
|
struct ggml_tensor * dst) {
|
8156
|
+
|
8157
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8158
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8159
|
+
|
8030
8160
|
assert(params->ith == 0);
|
8031
8161
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
8032
8162
|
|
@@ -8084,13 +8214,14 @@ static void ggml_compute_forward_sub_f32(
|
|
8084
8214
|
|
8085
8215
|
static void ggml_compute_forward_sub(
|
8086
8216
|
const struct ggml_compute_params * params,
|
8087
|
-
const struct ggml_tensor * src0,
|
8088
|
-
const struct ggml_tensor * src1,
|
8089
8217
|
struct ggml_tensor * dst) {
|
8218
|
+
|
8219
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8220
|
+
|
8090
8221
|
switch (src0->type) {
|
8091
8222
|
case GGML_TYPE_F32:
|
8092
8223
|
{
|
8093
|
-
ggml_compute_forward_sub_f32(params,
|
8224
|
+
ggml_compute_forward_sub_f32(params, dst);
|
8094
8225
|
} break;
|
8095
8226
|
default:
|
8096
8227
|
{
|
@@ -8103,9 +8234,11 @@ static void ggml_compute_forward_sub(
|
|
8103
8234
|
|
8104
8235
|
static void ggml_compute_forward_mul_f32(
|
8105
8236
|
const struct ggml_compute_params * params,
|
8106
|
-
const struct ggml_tensor * src0,
|
8107
|
-
const struct ggml_tensor * src1,
|
8108
8237
|
struct ggml_tensor * dst) {
|
8238
|
+
|
8239
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8240
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8241
|
+
|
8109
8242
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8110
8243
|
|
8111
8244
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8186,15 +8319,17 @@ static void ggml_compute_forward_mul_f32(
|
|
8186
8319
|
|
8187
8320
|
static void ggml_compute_forward_mul(
|
8188
8321
|
const struct ggml_compute_params * params,
|
8189
|
-
const struct ggml_tensor * src0,
|
8190
|
-
const struct ggml_tensor * src1,
|
8191
8322
|
struct ggml_tensor * dst) {
|
8323
|
+
|
8324
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8325
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8326
|
+
|
8192
8327
|
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
|
8193
8328
|
|
8194
8329
|
switch (src0->type) {
|
8195
8330
|
case GGML_TYPE_F32:
|
8196
8331
|
{
|
8197
|
-
ggml_compute_forward_mul_f32(params,
|
8332
|
+
ggml_compute_forward_mul_f32(params, dst);
|
8198
8333
|
} break;
|
8199
8334
|
default:
|
8200
8335
|
{
|
@@ -8207,9 +8342,11 @@ static void ggml_compute_forward_mul(
|
|
8207
8342
|
|
8208
8343
|
static void ggml_compute_forward_div_f32(
|
8209
8344
|
const struct ggml_compute_params * params,
|
8210
|
-
const struct ggml_tensor * src0,
|
8211
|
-
const struct ggml_tensor * src1,
|
8212
8345
|
struct ggml_tensor * dst) {
|
8346
|
+
|
8347
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8348
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8349
|
+
|
8213
8350
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8214
8351
|
|
8215
8352
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8280,13 +8417,14 @@ static void ggml_compute_forward_div_f32(
|
|
8280
8417
|
|
8281
8418
|
static void ggml_compute_forward_div(
|
8282
8419
|
const struct ggml_compute_params * params,
|
8283
|
-
const struct ggml_tensor * src0,
|
8284
|
-
const struct ggml_tensor * src1,
|
8285
8420
|
struct ggml_tensor * dst) {
|
8421
|
+
|
8422
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8423
|
+
|
8286
8424
|
switch (src0->type) {
|
8287
8425
|
case GGML_TYPE_F32:
|
8288
8426
|
{
|
8289
|
-
ggml_compute_forward_div_f32(params,
|
8427
|
+
ggml_compute_forward_div_f32(params, dst);
|
8290
8428
|
} break;
|
8291
8429
|
default:
|
8292
8430
|
{
|
@@ -8299,8 +8437,10 @@ static void ggml_compute_forward_div(
|
|
8299
8437
|
|
8300
8438
|
static void ggml_compute_forward_sqr_f32(
|
8301
8439
|
const struct ggml_compute_params * params,
|
8302
|
-
const struct ggml_tensor * src0,
|
8303
8440
|
struct ggml_tensor * dst) {
|
8441
|
+
|
8442
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8443
|
+
|
8304
8444
|
assert(params->ith == 0);
|
8305
8445
|
assert(ggml_are_same_shape(src0, dst));
|
8306
8446
|
|
@@ -8323,12 +8463,14 @@ static void ggml_compute_forward_sqr_f32(
|
|
8323
8463
|
|
8324
8464
|
static void ggml_compute_forward_sqr(
|
8325
8465
|
const struct ggml_compute_params * params,
|
8326
|
-
const struct ggml_tensor * src0,
|
8327
8466
|
struct ggml_tensor * dst) {
|
8467
|
+
|
8468
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8469
|
+
|
8328
8470
|
switch (src0->type) {
|
8329
8471
|
case GGML_TYPE_F32:
|
8330
8472
|
{
|
8331
|
-
ggml_compute_forward_sqr_f32(params,
|
8473
|
+
ggml_compute_forward_sqr_f32(params, dst);
|
8332
8474
|
} break;
|
8333
8475
|
default:
|
8334
8476
|
{
|
@@ -8341,8 +8483,10 @@ static void ggml_compute_forward_sqr(
|
|
8341
8483
|
|
8342
8484
|
static void ggml_compute_forward_sqrt_f32(
|
8343
8485
|
const struct ggml_compute_params * params,
|
8344
|
-
const struct ggml_tensor * src0,
|
8345
8486
|
struct ggml_tensor * dst) {
|
8487
|
+
|
8488
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8489
|
+
|
8346
8490
|
assert(params->ith == 0);
|
8347
8491
|
assert(ggml_are_same_shape(src0, dst));
|
8348
8492
|
|
@@ -8365,12 +8509,14 @@ static void ggml_compute_forward_sqrt_f32(
|
|
8365
8509
|
|
8366
8510
|
static void ggml_compute_forward_sqrt(
|
8367
8511
|
const struct ggml_compute_params * params,
|
8368
|
-
const struct ggml_tensor * src0,
|
8369
8512
|
struct ggml_tensor * dst) {
|
8513
|
+
|
8514
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8515
|
+
|
8370
8516
|
switch (src0->type) {
|
8371
8517
|
case GGML_TYPE_F32:
|
8372
8518
|
{
|
8373
|
-
ggml_compute_forward_sqrt_f32(params,
|
8519
|
+
ggml_compute_forward_sqrt_f32(params, dst);
|
8374
8520
|
} break;
|
8375
8521
|
default:
|
8376
8522
|
{
|
@@ -8383,8 +8529,10 @@ static void ggml_compute_forward_sqrt(
|
|
8383
8529
|
|
8384
8530
|
static void ggml_compute_forward_log_f32(
|
8385
8531
|
const struct ggml_compute_params * params,
|
8386
|
-
const struct ggml_tensor * src0,
|
8387
8532
|
struct ggml_tensor * dst) {
|
8533
|
+
|
8534
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8535
|
+
|
8388
8536
|
GGML_ASSERT(params->ith == 0);
|
8389
8537
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8390
8538
|
|
@@ -8407,12 +8555,14 @@ static void ggml_compute_forward_log_f32(
|
|
8407
8555
|
|
8408
8556
|
static void ggml_compute_forward_log(
|
8409
8557
|
const struct ggml_compute_params * params,
|
8410
|
-
const struct ggml_tensor * src0,
|
8411
8558
|
struct ggml_tensor * dst) {
|
8559
|
+
|
8560
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8561
|
+
|
8412
8562
|
switch (src0->type) {
|
8413
8563
|
case GGML_TYPE_F32:
|
8414
8564
|
{
|
8415
|
-
ggml_compute_forward_log_f32(params,
|
8565
|
+
ggml_compute_forward_log_f32(params, dst);
|
8416
8566
|
} break;
|
8417
8567
|
default:
|
8418
8568
|
{
|
@@ -8425,8 +8575,10 @@ static void ggml_compute_forward_log(
|
|
8425
8575
|
|
8426
8576
|
static void ggml_compute_forward_sum_f32(
|
8427
8577
|
const struct ggml_compute_params * params,
|
8428
|
-
const struct ggml_tensor * src0,
|
8429
8578
|
struct ggml_tensor * dst) {
|
8579
|
+
|
8580
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8581
|
+
|
8430
8582
|
assert(params->ith == 0);
|
8431
8583
|
assert(ggml_is_scalar(dst));
|
8432
8584
|
|
@@ -8458,8 +8610,10 @@ static void ggml_compute_forward_sum_f32(
|
|
8458
8610
|
|
8459
8611
|
static void ggml_compute_forward_sum_f16(
|
8460
8612
|
const struct ggml_compute_params * params,
|
8461
|
-
const struct ggml_tensor * src0,
|
8462
8613
|
struct ggml_tensor * dst) {
|
8614
|
+
|
8615
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8616
|
+
|
8463
8617
|
assert(params->ith == 0);
|
8464
8618
|
assert(ggml_is_scalar(dst));
|
8465
8619
|
|
@@ -8490,16 +8644,18 @@ static void ggml_compute_forward_sum_f16(
|
|
8490
8644
|
|
8491
8645
|
static void ggml_compute_forward_sum(
|
8492
8646
|
const struct ggml_compute_params * params,
|
8493
|
-
const struct ggml_tensor * src0,
|
8494
8647
|
struct ggml_tensor * dst) {
|
8648
|
+
|
8649
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8650
|
+
|
8495
8651
|
switch (src0->type) {
|
8496
8652
|
case GGML_TYPE_F32:
|
8497
8653
|
{
|
8498
|
-
ggml_compute_forward_sum_f32(params,
|
8654
|
+
ggml_compute_forward_sum_f32(params, dst);
|
8499
8655
|
} break;
|
8500
8656
|
case GGML_TYPE_F16:
|
8501
8657
|
{
|
8502
|
-
ggml_compute_forward_sum_f16(params,
|
8658
|
+
ggml_compute_forward_sum_f16(params, dst);
|
8503
8659
|
} break;
|
8504
8660
|
default:
|
8505
8661
|
{
|
@@ -8512,8 +8668,10 @@ static void ggml_compute_forward_sum(
|
|
8512
8668
|
|
8513
8669
|
static void ggml_compute_forward_sum_rows_f32(
|
8514
8670
|
const struct ggml_compute_params * params,
|
8515
|
-
const struct ggml_tensor * src0,
|
8516
8671
|
struct ggml_tensor * dst) {
|
8672
|
+
|
8673
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8674
|
+
|
8517
8675
|
GGML_ASSERT(params->ith == 0);
|
8518
8676
|
|
8519
8677
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8545,12 +8703,14 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
8545
8703
|
|
8546
8704
|
static void ggml_compute_forward_sum_rows(
|
8547
8705
|
const struct ggml_compute_params * params,
|
8548
|
-
const struct ggml_tensor * src0,
|
8549
8706
|
struct ggml_tensor * dst) {
|
8707
|
+
|
8708
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8709
|
+
|
8550
8710
|
switch (src0->type) {
|
8551
8711
|
case GGML_TYPE_F32:
|
8552
8712
|
{
|
8553
|
-
ggml_compute_forward_sum_rows_f32(params,
|
8713
|
+
ggml_compute_forward_sum_rows_f32(params, dst);
|
8554
8714
|
} break;
|
8555
8715
|
default:
|
8556
8716
|
{
|
@@ -8563,8 +8723,10 @@ static void ggml_compute_forward_sum_rows(
|
|
8563
8723
|
|
8564
8724
|
static void ggml_compute_forward_mean_f32(
|
8565
8725
|
const struct ggml_compute_params * params,
|
8566
|
-
const struct ggml_tensor * src0,
|
8567
8726
|
struct ggml_tensor * dst) {
|
8727
|
+
|
8728
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8729
|
+
|
8568
8730
|
assert(params->ith == 0);
|
8569
8731
|
|
8570
8732
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8600,12 +8762,14 @@ static void ggml_compute_forward_mean_f32(
|
|
8600
8762
|
|
8601
8763
|
static void ggml_compute_forward_mean(
|
8602
8764
|
const struct ggml_compute_params * params,
|
8603
|
-
const struct ggml_tensor * src0,
|
8604
8765
|
struct ggml_tensor * dst) {
|
8766
|
+
|
8767
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8768
|
+
|
8605
8769
|
switch (src0->type) {
|
8606
8770
|
case GGML_TYPE_F32:
|
8607
8771
|
{
|
8608
|
-
ggml_compute_forward_mean_f32(params,
|
8772
|
+
ggml_compute_forward_mean_f32(params, dst);
|
8609
8773
|
} break;
|
8610
8774
|
default:
|
8611
8775
|
{
|
@@ -8618,8 +8782,10 @@ static void ggml_compute_forward_mean(
|
|
8618
8782
|
|
8619
8783
|
static void ggml_compute_forward_argmax_f32(
|
8620
8784
|
const struct ggml_compute_params * params,
|
8621
|
-
const struct ggml_tensor * src0,
|
8622
8785
|
struct ggml_tensor * dst) {
|
8786
|
+
|
8787
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8788
|
+
|
8623
8789
|
assert(params->ith == 0);
|
8624
8790
|
|
8625
8791
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8646,12 +8812,14 @@ static void ggml_compute_forward_argmax_f32(
|
|
8646
8812
|
|
8647
8813
|
static void ggml_compute_forward_argmax(
|
8648
8814
|
const struct ggml_compute_params * params,
|
8649
|
-
const struct ggml_tensor * src0,
|
8650
8815
|
struct ggml_tensor * dst) {
|
8816
|
+
|
8817
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8818
|
+
|
8651
8819
|
switch (src0->type) {
|
8652
8820
|
case GGML_TYPE_F32:
|
8653
8821
|
{
|
8654
|
-
ggml_compute_forward_argmax_f32(params,
|
8822
|
+
ggml_compute_forward_argmax_f32(params, dst);
|
8655
8823
|
} break;
|
8656
8824
|
default:
|
8657
8825
|
{
|
@@ -8664,8 +8832,10 @@ static void ggml_compute_forward_argmax(
|
|
8664
8832
|
|
8665
8833
|
static void ggml_compute_forward_repeat_f32(
|
8666
8834
|
const struct ggml_compute_params * params,
|
8667
|
-
const struct ggml_tensor * src0,
|
8668
8835
|
struct ggml_tensor * dst) {
|
8836
|
+
|
8837
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8838
|
+
|
8669
8839
|
GGML_ASSERT(params->ith == 0);
|
8670
8840
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8671
8841
|
|
@@ -8707,8 +8877,10 @@ static void ggml_compute_forward_repeat_f32(
|
|
8707
8877
|
|
8708
8878
|
static void ggml_compute_forward_repeat_f16(
|
8709
8879
|
const struct ggml_compute_params * params,
|
8710
|
-
const struct ggml_tensor * src0,
|
8711
8880
|
struct ggml_tensor * dst) {
|
8881
|
+
|
8882
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8883
|
+
|
8712
8884
|
GGML_ASSERT(params->ith == 0);
|
8713
8885
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8714
8886
|
|
@@ -8753,18 +8925,20 @@ static void ggml_compute_forward_repeat_f16(
|
|
8753
8925
|
|
8754
8926
|
static void ggml_compute_forward_repeat(
|
8755
8927
|
const struct ggml_compute_params * params,
|
8756
|
-
const struct ggml_tensor * src0,
|
8757
8928
|
struct ggml_tensor * dst) {
|
8929
|
+
|
8930
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8931
|
+
|
8758
8932
|
switch (src0->type) {
|
8759
8933
|
case GGML_TYPE_F16:
|
8760
8934
|
case GGML_TYPE_I16:
|
8761
8935
|
{
|
8762
|
-
ggml_compute_forward_repeat_f16(params,
|
8936
|
+
ggml_compute_forward_repeat_f16(params, dst);
|
8763
8937
|
} break;
|
8764
8938
|
case GGML_TYPE_F32:
|
8765
8939
|
case GGML_TYPE_I32:
|
8766
8940
|
{
|
8767
|
-
ggml_compute_forward_repeat_f32(params,
|
8941
|
+
ggml_compute_forward_repeat_f32(params, dst);
|
8768
8942
|
} break;
|
8769
8943
|
default:
|
8770
8944
|
{
|
@@ -8777,8 +8951,10 @@ static void ggml_compute_forward_repeat(
|
|
8777
8951
|
|
8778
8952
|
static void ggml_compute_forward_repeat_back_f32(
|
8779
8953
|
const struct ggml_compute_params * params,
|
8780
|
-
const struct ggml_tensor * src0,
|
8781
8954
|
struct ggml_tensor * dst) {
|
8955
|
+
|
8956
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8957
|
+
|
8782
8958
|
GGML_ASSERT(params->ith == 0);
|
8783
8959
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
8784
8960
|
|
@@ -8834,12 +9010,14 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
8834
9010
|
|
8835
9011
|
static void ggml_compute_forward_repeat_back(
|
8836
9012
|
const struct ggml_compute_params * params,
|
8837
|
-
const struct ggml_tensor * src0,
|
8838
9013
|
struct ggml_tensor * dst) {
|
9014
|
+
|
9015
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9016
|
+
|
8839
9017
|
switch (src0->type) {
|
8840
9018
|
case GGML_TYPE_F32:
|
8841
9019
|
{
|
8842
|
-
ggml_compute_forward_repeat_back_f32(params,
|
9020
|
+
ggml_compute_forward_repeat_back_f32(params, dst);
|
8843
9021
|
} break;
|
8844
9022
|
default:
|
8845
9023
|
{
|
@@ -8852,10 +9030,11 @@ static void ggml_compute_forward_repeat_back(
|
|
8852
9030
|
|
8853
9031
|
static void ggml_compute_forward_concat_f32(
|
8854
9032
|
const struct ggml_compute_params * params,
|
8855
|
-
const struct ggml_tensor * src0,
|
8856
|
-
const struct ggml_tensor * src1,
|
8857
9033
|
struct ggml_tensor * dst) {
|
8858
9034
|
|
9035
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9036
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
9037
|
+
|
8859
9038
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
8860
9039
|
return;
|
8861
9040
|
}
|
@@ -8900,14 +9079,15 @@ static void ggml_compute_forward_concat_f32(
|
|
8900
9079
|
|
8901
9080
|
static void ggml_compute_forward_concat(
|
8902
9081
|
const struct ggml_compute_params* params,
|
8903
|
-
const struct ggml_tensor* src0,
|
8904
|
-
const struct ggml_tensor* src1,
|
8905
9082
|
struct ggml_tensor* dst) {
|
9083
|
+
|
9084
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9085
|
+
|
8906
9086
|
switch (src0->type) {
|
8907
9087
|
case GGML_TYPE_F32:
|
8908
9088
|
case GGML_TYPE_I32:
|
8909
9089
|
{
|
8910
|
-
ggml_compute_forward_concat_f32(params,
|
9090
|
+
ggml_compute_forward_concat_f32(params, dst);
|
8911
9091
|
} break;
|
8912
9092
|
default:
|
8913
9093
|
{
|
@@ -8920,8 +9100,10 @@ static void ggml_compute_forward_concat(
|
|
8920
9100
|
|
8921
9101
|
static void ggml_compute_forward_abs_f32(
|
8922
9102
|
const struct ggml_compute_params * params,
|
8923
|
-
const struct ggml_tensor * src0,
|
8924
9103
|
struct ggml_tensor * dst) {
|
9104
|
+
|
9105
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9106
|
+
|
8925
9107
|
assert(params->ith == 0);
|
8926
9108
|
assert(ggml_are_same_shape(src0, dst));
|
8927
9109
|
|
@@ -8944,12 +9126,14 @@ static void ggml_compute_forward_abs_f32(
|
|
8944
9126
|
|
8945
9127
|
static void ggml_compute_forward_abs(
|
8946
9128
|
const struct ggml_compute_params * params,
|
8947
|
-
const struct ggml_tensor * src0,
|
8948
9129
|
struct ggml_tensor * dst) {
|
9130
|
+
|
9131
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9132
|
+
|
8949
9133
|
switch (src0->type) {
|
8950
9134
|
case GGML_TYPE_F32:
|
8951
9135
|
{
|
8952
|
-
ggml_compute_forward_abs_f32(params,
|
9136
|
+
ggml_compute_forward_abs_f32(params, dst);
|
8953
9137
|
} break;
|
8954
9138
|
default:
|
8955
9139
|
{
|
@@ -8962,8 +9146,10 @@ static void ggml_compute_forward_abs(
|
|
8962
9146
|
|
8963
9147
|
static void ggml_compute_forward_sgn_f32(
|
8964
9148
|
const struct ggml_compute_params * params,
|
8965
|
-
const struct ggml_tensor * src0,
|
8966
9149
|
struct ggml_tensor * dst) {
|
9150
|
+
|
9151
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9152
|
+
|
8967
9153
|
assert(params->ith == 0);
|
8968
9154
|
assert(ggml_are_same_shape(src0, dst));
|
8969
9155
|
|
@@ -8986,12 +9172,14 @@ static void ggml_compute_forward_sgn_f32(
|
|
8986
9172
|
|
8987
9173
|
static void ggml_compute_forward_sgn(
|
8988
9174
|
const struct ggml_compute_params * params,
|
8989
|
-
const struct ggml_tensor * src0,
|
8990
9175
|
struct ggml_tensor * dst) {
|
9176
|
+
|
9177
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9178
|
+
|
8991
9179
|
switch (src0->type) {
|
8992
9180
|
case GGML_TYPE_F32:
|
8993
9181
|
{
|
8994
|
-
ggml_compute_forward_sgn_f32(params,
|
9182
|
+
ggml_compute_forward_sgn_f32(params, dst);
|
8995
9183
|
} break;
|
8996
9184
|
default:
|
8997
9185
|
{
|
@@ -9004,8 +9192,10 @@ static void ggml_compute_forward_sgn(
|
|
9004
9192
|
|
9005
9193
|
static void ggml_compute_forward_neg_f32(
|
9006
9194
|
const struct ggml_compute_params * params,
|
9007
|
-
const struct ggml_tensor * src0,
|
9008
9195
|
struct ggml_tensor * dst) {
|
9196
|
+
|
9197
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9198
|
+
|
9009
9199
|
assert(params->ith == 0);
|
9010
9200
|
assert(ggml_are_same_shape(src0, dst));
|
9011
9201
|
|
@@ -9028,12 +9218,14 @@ static void ggml_compute_forward_neg_f32(
|
|
9028
9218
|
|
9029
9219
|
static void ggml_compute_forward_neg(
|
9030
9220
|
const struct ggml_compute_params * params,
|
9031
|
-
const struct ggml_tensor * src0,
|
9032
9221
|
struct ggml_tensor * dst) {
|
9222
|
+
|
9223
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9224
|
+
|
9033
9225
|
switch (src0->type) {
|
9034
9226
|
case GGML_TYPE_F32:
|
9035
9227
|
{
|
9036
|
-
ggml_compute_forward_neg_f32(params,
|
9228
|
+
ggml_compute_forward_neg_f32(params, dst);
|
9037
9229
|
} break;
|
9038
9230
|
default:
|
9039
9231
|
{
|
@@ -9046,8 +9238,10 @@ static void ggml_compute_forward_neg(
|
|
9046
9238
|
|
9047
9239
|
static void ggml_compute_forward_step_f32(
|
9048
9240
|
const struct ggml_compute_params * params,
|
9049
|
-
const struct ggml_tensor * src0,
|
9050
9241
|
struct ggml_tensor * dst) {
|
9242
|
+
|
9243
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9244
|
+
|
9051
9245
|
assert(params->ith == 0);
|
9052
9246
|
assert(ggml_are_same_shape(src0, dst));
|
9053
9247
|
|
@@ -9070,12 +9264,14 @@ static void ggml_compute_forward_step_f32(
|
|
9070
9264
|
|
9071
9265
|
static void ggml_compute_forward_step(
|
9072
9266
|
const struct ggml_compute_params * params,
|
9073
|
-
const struct ggml_tensor * src0,
|
9074
9267
|
struct ggml_tensor * dst) {
|
9268
|
+
|
9269
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9270
|
+
|
9075
9271
|
switch (src0->type) {
|
9076
9272
|
case GGML_TYPE_F32:
|
9077
9273
|
{
|
9078
|
-
ggml_compute_forward_step_f32(params,
|
9274
|
+
ggml_compute_forward_step_f32(params, dst);
|
9079
9275
|
} break;
|
9080
9276
|
default:
|
9081
9277
|
{
|
@@ -9088,8 +9284,10 @@ static void ggml_compute_forward_step(
|
|
9088
9284
|
|
9089
9285
|
static void ggml_compute_forward_tanh_f32(
|
9090
9286
|
const struct ggml_compute_params * params,
|
9091
|
-
const struct ggml_tensor * src0,
|
9092
9287
|
struct ggml_tensor * dst) {
|
9288
|
+
|
9289
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9290
|
+
|
9093
9291
|
assert(params->ith == 0);
|
9094
9292
|
assert(ggml_are_same_shape(src0, dst));
|
9095
9293
|
|
@@ -9112,12 +9310,14 @@ static void ggml_compute_forward_tanh_f32(
|
|
9112
9310
|
|
9113
9311
|
static void ggml_compute_forward_tanh(
|
9114
9312
|
const struct ggml_compute_params * params,
|
9115
|
-
const struct ggml_tensor * src0,
|
9116
9313
|
struct ggml_tensor * dst) {
|
9314
|
+
|
9315
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9316
|
+
|
9117
9317
|
switch (src0->type) {
|
9118
9318
|
case GGML_TYPE_F32:
|
9119
9319
|
{
|
9120
|
-
ggml_compute_forward_tanh_f32(params,
|
9320
|
+
ggml_compute_forward_tanh_f32(params, dst);
|
9121
9321
|
} break;
|
9122
9322
|
default:
|
9123
9323
|
{
|
@@ -9130,8 +9330,10 @@ static void ggml_compute_forward_tanh(
|
|
9130
9330
|
|
9131
9331
|
static void ggml_compute_forward_elu_f32(
|
9132
9332
|
const struct ggml_compute_params * params,
|
9133
|
-
const struct ggml_tensor * src0,
|
9134
9333
|
struct ggml_tensor * dst) {
|
9334
|
+
|
9335
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9336
|
+
|
9135
9337
|
assert(params->ith == 0);
|
9136
9338
|
assert(ggml_are_same_shape(src0, dst));
|
9137
9339
|
|
@@ -9154,12 +9356,14 @@ static void ggml_compute_forward_elu_f32(
|
|
9154
9356
|
|
9155
9357
|
static void ggml_compute_forward_elu(
|
9156
9358
|
const struct ggml_compute_params * params,
|
9157
|
-
const struct ggml_tensor * src0,
|
9158
9359
|
struct ggml_tensor * dst) {
|
9360
|
+
|
9361
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9362
|
+
|
9159
9363
|
switch (src0->type) {
|
9160
9364
|
case GGML_TYPE_F32:
|
9161
9365
|
{
|
9162
|
-
ggml_compute_forward_elu_f32(params,
|
9366
|
+
ggml_compute_forward_elu_f32(params, dst);
|
9163
9367
|
} break;
|
9164
9368
|
default:
|
9165
9369
|
{
|
@@ -9172,8 +9376,10 @@ static void ggml_compute_forward_elu(
|
|
9172
9376
|
|
9173
9377
|
static void ggml_compute_forward_relu_f32(
|
9174
9378
|
const struct ggml_compute_params * params,
|
9175
|
-
const struct ggml_tensor * src0,
|
9176
9379
|
struct ggml_tensor * dst) {
|
9380
|
+
|
9381
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9382
|
+
|
9177
9383
|
assert(params->ith == 0);
|
9178
9384
|
assert(ggml_are_same_shape(src0, dst));
|
9179
9385
|
|
@@ -9196,12 +9402,14 @@ static void ggml_compute_forward_relu_f32(
|
|
9196
9402
|
|
9197
9403
|
static void ggml_compute_forward_relu(
|
9198
9404
|
const struct ggml_compute_params * params,
|
9199
|
-
const struct ggml_tensor * src0,
|
9200
9405
|
struct ggml_tensor * dst) {
|
9406
|
+
|
9407
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9408
|
+
|
9201
9409
|
switch (src0->type) {
|
9202
9410
|
case GGML_TYPE_F32:
|
9203
9411
|
{
|
9204
|
-
ggml_compute_forward_relu_f32(params,
|
9412
|
+
ggml_compute_forward_relu_f32(params, dst);
|
9205
9413
|
} break;
|
9206
9414
|
default:
|
9207
9415
|
{
|
@@ -9214,8 +9422,10 @@ static void ggml_compute_forward_relu(
|
|
9214
9422
|
|
9215
9423
|
static void ggml_compute_forward_gelu_f32(
|
9216
9424
|
const struct ggml_compute_params * params,
|
9217
|
-
const struct ggml_tensor * src0,
|
9218
9425
|
struct ggml_tensor * dst) {
|
9426
|
+
|
9427
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9428
|
+
|
9219
9429
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9220
9430
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9221
9431
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9255,12 +9465,14 @@ static void ggml_compute_forward_gelu_f32(
|
|
9255
9465
|
|
9256
9466
|
static void ggml_compute_forward_gelu(
|
9257
9467
|
const struct ggml_compute_params * params,
|
9258
|
-
const struct ggml_tensor * src0,
|
9259
9468
|
struct ggml_tensor * dst) {
|
9469
|
+
|
9470
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9471
|
+
|
9260
9472
|
switch (src0->type) {
|
9261
9473
|
case GGML_TYPE_F32:
|
9262
9474
|
{
|
9263
|
-
ggml_compute_forward_gelu_f32(params,
|
9475
|
+
ggml_compute_forward_gelu_f32(params, dst);
|
9264
9476
|
} break;
|
9265
9477
|
default:
|
9266
9478
|
{
|
@@ -9273,8 +9485,10 @@ static void ggml_compute_forward_gelu(
|
|
9273
9485
|
|
9274
9486
|
static void ggml_compute_forward_gelu_quick_f32(
|
9275
9487
|
const struct ggml_compute_params * params,
|
9276
|
-
const struct ggml_tensor * src0,
|
9277
9488
|
struct ggml_tensor * dst) {
|
9489
|
+
|
9490
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9491
|
+
|
9278
9492
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9279
9493
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9280
9494
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9314,12 +9528,14 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
9314
9528
|
|
9315
9529
|
static void ggml_compute_forward_gelu_quick(
|
9316
9530
|
const struct ggml_compute_params * params,
|
9317
|
-
const struct ggml_tensor * src0,
|
9318
9531
|
struct ggml_tensor * dst) {
|
9532
|
+
|
9533
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9534
|
+
|
9319
9535
|
switch (src0->type) {
|
9320
9536
|
case GGML_TYPE_F32:
|
9321
9537
|
{
|
9322
|
-
ggml_compute_forward_gelu_quick_f32(params,
|
9538
|
+
ggml_compute_forward_gelu_quick_f32(params, dst);
|
9323
9539
|
} break;
|
9324
9540
|
default:
|
9325
9541
|
{
|
@@ -9332,8 +9548,10 @@ static void ggml_compute_forward_gelu_quick(
|
|
9332
9548
|
|
9333
9549
|
static void ggml_compute_forward_silu_f32(
|
9334
9550
|
const struct ggml_compute_params * params,
|
9335
|
-
const struct ggml_tensor * src0,
|
9336
9551
|
struct ggml_tensor * dst) {
|
9552
|
+
|
9553
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9554
|
+
|
9337
9555
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9338
9556
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9339
9557
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9373,12 +9591,14 @@ static void ggml_compute_forward_silu_f32(
|
|
9373
9591
|
|
9374
9592
|
static void ggml_compute_forward_silu(
|
9375
9593
|
const struct ggml_compute_params * params,
|
9376
|
-
const struct ggml_tensor * src0,
|
9377
9594
|
struct ggml_tensor * dst) {
|
9595
|
+
|
9596
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9597
|
+
|
9378
9598
|
switch (src0->type) {
|
9379
9599
|
case GGML_TYPE_F32:
|
9380
9600
|
{
|
9381
|
-
ggml_compute_forward_silu_f32(params,
|
9601
|
+
ggml_compute_forward_silu_f32(params, dst);
|
9382
9602
|
} break;
|
9383
9603
|
default:
|
9384
9604
|
{
|
@@ -9390,8 +9610,10 @@ static void ggml_compute_forward_silu(
|
|
9390
9610
|
|
9391
9611
|
static void ggml_compute_forward_leaky_relu_f32(
|
9392
9612
|
const struct ggml_compute_params * params,
|
9393
|
-
const struct ggml_tensor * src0,
|
9394
9613
|
struct ggml_tensor * dst) {
|
9614
|
+
|
9615
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9616
|
+
|
9395
9617
|
assert(params->ith == 0);
|
9396
9618
|
assert(ggml_are_same_shape(src0, dst));
|
9397
9619
|
|
@@ -9417,12 +9639,14 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
9417
9639
|
|
9418
9640
|
static void ggml_compute_forward_leaky_relu(
|
9419
9641
|
const struct ggml_compute_params * params,
|
9420
|
-
const struct ggml_tensor * src0,
|
9421
9642
|
struct ggml_tensor * dst) {
|
9643
|
+
|
9644
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9645
|
+
|
9422
9646
|
switch (src0->type) {
|
9423
9647
|
case GGML_TYPE_F32:
|
9424
9648
|
{
|
9425
|
-
ggml_compute_forward_leaky_relu_f32(params,
|
9649
|
+
ggml_compute_forward_leaky_relu_f32(params, dst);
|
9426
9650
|
} break;
|
9427
9651
|
default:
|
9428
9652
|
{
|
@@ -9435,9 +9659,11 @@ static void ggml_compute_forward_leaky_relu(
|
|
9435
9659
|
|
9436
9660
|
static void ggml_compute_forward_silu_back_f32(
|
9437
9661
|
const struct ggml_compute_params * params,
|
9438
|
-
const struct ggml_tensor * src0,
|
9439
|
-
const struct ggml_tensor * grad,
|
9440
9662
|
struct ggml_tensor * dst) {
|
9663
|
+
|
9664
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9665
|
+
const struct ggml_tensor * grad = dst->src[1];
|
9666
|
+
|
9441
9667
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
9442
9668
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9443
9669
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
@@ -9480,13 +9706,14 @@ static void ggml_compute_forward_silu_back_f32(
|
|
9480
9706
|
|
9481
9707
|
static void ggml_compute_forward_silu_back(
|
9482
9708
|
const struct ggml_compute_params * params,
|
9483
|
-
const struct ggml_tensor * src0,
|
9484
|
-
const struct ggml_tensor * grad,
|
9485
9709
|
struct ggml_tensor * dst) {
|
9710
|
+
|
9711
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9712
|
+
|
9486
9713
|
switch (src0->type) {
|
9487
9714
|
case GGML_TYPE_F32:
|
9488
9715
|
{
|
9489
|
-
ggml_compute_forward_silu_back_f32(params,
|
9716
|
+
ggml_compute_forward_silu_back_f32(params, dst);
|
9490
9717
|
} break;
|
9491
9718
|
default:
|
9492
9719
|
{
|
@@ -9498,8 +9725,10 @@ static void ggml_compute_forward_silu_back(
|
|
9498
9725
|
|
9499
9726
|
static void ggml_compute_forward_hardswish_f32(
|
9500
9727
|
const struct ggml_compute_params * params,
|
9501
|
-
const struct ggml_tensor * src0,
|
9502
9728
|
struct ggml_tensor * dst) {
|
9729
|
+
|
9730
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9731
|
+
|
9503
9732
|
assert(params->ith == 0);
|
9504
9733
|
assert(ggml_are_same_shape(src0, dst));
|
9505
9734
|
|
@@ -9521,12 +9750,14 @@ static void ggml_compute_forward_hardswish_f32(
|
|
9521
9750
|
}
|
9522
9751
|
static void ggml_compute_forward_hardswish(
|
9523
9752
|
const struct ggml_compute_params * params,
|
9524
|
-
const struct ggml_tensor * src0,
|
9525
9753
|
struct ggml_tensor * dst) {
|
9754
|
+
|
9755
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9756
|
+
|
9526
9757
|
switch (src0->type) {
|
9527
9758
|
case GGML_TYPE_F32:
|
9528
9759
|
{
|
9529
|
-
ggml_compute_forward_hardswish_f32(params,
|
9760
|
+
ggml_compute_forward_hardswish_f32(params, dst);
|
9530
9761
|
} break;
|
9531
9762
|
default:
|
9532
9763
|
{
|
@@ -9537,8 +9768,10 @@ static void ggml_compute_forward_hardswish(
|
|
9537
9768
|
|
9538
9769
|
static void ggml_compute_forward_hardsigmoid_f32(
|
9539
9770
|
const struct ggml_compute_params * params,
|
9540
|
-
const struct ggml_tensor * src0,
|
9541
9771
|
struct ggml_tensor * dst) {
|
9772
|
+
|
9773
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9774
|
+
|
9542
9775
|
assert(params->ith == 0);
|
9543
9776
|
assert(ggml_are_same_shape(src0, dst));
|
9544
9777
|
|
@@ -9561,12 +9794,14 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
9561
9794
|
|
9562
9795
|
static void ggml_compute_forward_hardsigmoid(
|
9563
9796
|
const struct ggml_compute_params * params,
|
9564
|
-
const struct ggml_tensor * src0,
|
9565
9797
|
struct ggml_tensor * dst) {
|
9798
|
+
|
9799
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9800
|
+
|
9566
9801
|
switch (src0->type) {
|
9567
9802
|
case GGML_TYPE_F32:
|
9568
9803
|
{
|
9569
|
-
ggml_compute_forward_hardsigmoid_f32(params,
|
9804
|
+
ggml_compute_forward_hardsigmoid_f32(params, dst);
|
9570
9805
|
} break;
|
9571
9806
|
default:
|
9572
9807
|
{
|
@@ -9580,8 +9815,10 @@ static void ggml_compute_forward_hardsigmoid(
|
|
9580
9815
|
|
9581
9816
|
static void ggml_compute_forward_norm_f32(
|
9582
9817
|
const struct ggml_compute_params * params,
|
9583
|
-
const struct ggml_tensor * src0,
|
9584
9818
|
struct ggml_tensor * dst) {
|
9819
|
+
|
9820
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9821
|
+
|
9585
9822
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9586
9823
|
|
9587
9824
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9633,12 +9870,14 @@ static void ggml_compute_forward_norm_f32(
|
|
9633
9870
|
|
9634
9871
|
static void ggml_compute_forward_norm(
|
9635
9872
|
const struct ggml_compute_params * params,
|
9636
|
-
const struct ggml_tensor * src0,
|
9637
9873
|
struct ggml_tensor * dst) {
|
9874
|
+
|
9875
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9876
|
+
|
9638
9877
|
switch (src0->type) {
|
9639
9878
|
case GGML_TYPE_F32:
|
9640
9879
|
{
|
9641
|
-
ggml_compute_forward_norm_f32(params,
|
9880
|
+
ggml_compute_forward_norm_f32(params, dst);
|
9642
9881
|
} break;
|
9643
9882
|
default:
|
9644
9883
|
{
|
@@ -9651,8 +9890,10 @@ static void ggml_compute_forward_norm(
|
|
9651
9890
|
|
9652
9891
|
static void ggml_compute_forward_rms_norm_f32(
|
9653
9892
|
const struct ggml_compute_params * params,
|
9654
|
-
const struct ggml_tensor * src0,
|
9655
9893
|
struct ggml_tensor * dst) {
|
9894
|
+
|
9895
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9896
|
+
|
9656
9897
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9657
9898
|
|
9658
9899
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9701,12 +9942,14 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
9701
9942
|
|
9702
9943
|
static void ggml_compute_forward_rms_norm(
|
9703
9944
|
const struct ggml_compute_params * params,
|
9704
|
-
const struct ggml_tensor * src0,
|
9705
9945
|
struct ggml_tensor * dst) {
|
9946
|
+
|
9947
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9948
|
+
|
9706
9949
|
switch (src0->type) {
|
9707
9950
|
case GGML_TYPE_F32:
|
9708
9951
|
{
|
9709
|
-
ggml_compute_forward_rms_norm_f32(params,
|
9952
|
+
ggml_compute_forward_rms_norm_f32(params, dst);
|
9710
9953
|
} break;
|
9711
9954
|
default:
|
9712
9955
|
{
|
@@ -9717,9 +9960,11 @@ static void ggml_compute_forward_rms_norm(
|
|
9717
9960
|
|
9718
9961
|
static void ggml_compute_forward_rms_norm_back_f32(
|
9719
9962
|
const struct ggml_compute_params * params,
|
9720
|
-
const struct ggml_tensor * src0,
|
9721
|
-
const struct ggml_tensor * src1,
|
9722
9963
|
struct ggml_tensor * dst) {
|
9964
|
+
|
9965
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9966
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
9967
|
+
|
9723
9968
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
9724
9969
|
|
9725
9970
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9874,13 +10119,14 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
9874
10119
|
|
9875
10120
|
static void ggml_compute_forward_rms_norm_back(
|
9876
10121
|
const struct ggml_compute_params * params,
|
9877
|
-
const struct ggml_tensor * src0,
|
9878
|
-
const struct ggml_tensor * src1,
|
9879
10122
|
struct ggml_tensor * dst) {
|
10123
|
+
|
10124
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10125
|
+
|
9880
10126
|
switch (src0->type) {
|
9881
10127
|
case GGML_TYPE_F32:
|
9882
10128
|
{
|
9883
|
-
ggml_compute_forward_rms_norm_back_f32(params,
|
10129
|
+
ggml_compute_forward_rms_norm_back_f32(params, dst);
|
9884
10130
|
} break;
|
9885
10131
|
default:
|
9886
10132
|
{
|
@@ -9893,8 +10139,10 @@ static void ggml_compute_forward_rms_norm_back(
|
|
9893
10139
|
|
9894
10140
|
static void ggml_compute_forward_group_norm_f32(
|
9895
10141
|
const struct ggml_compute_params * params,
|
9896
|
-
const struct ggml_tensor * src0,
|
9897
10142
|
struct ggml_tensor * dst) {
|
10143
|
+
|
10144
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10145
|
+
|
9898
10146
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9899
10147
|
|
9900
10148
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9965,12 +10213,14 @@ static void ggml_compute_forward_group_norm_f32(
|
|
9965
10213
|
|
9966
10214
|
static void ggml_compute_forward_group_norm(
|
9967
10215
|
const struct ggml_compute_params * params,
|
9968
|
-
const struct ggml_tensor * src0,
|
9969
10216
|
struct ggml_tensor * dst) {
|
10217
|
+
|
10218
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10219
|
+
|
9970
10220
|
switch (src0->type) {
|
9971
10221
|
case GGML_TYPE_F32:
|
9972
10222
|
{
|
9973
|
-
ggml_compute_forward_group_norm_f32(params,
|
10223
|
+
ggml_compute_forward_group_norm_f32(params, dst);
|
9974
10224
|
} break;
|
9975
10225
|
default:
|
9976
10226
|
{
|
@@ -10016,9 +10266,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
|
|
10016
10266
|
|
10017
10267
|
static void ggml_compute_forward_mul_mat(
|
10018
10268
|
const struct ggml_compute_params * params,
|
10019
|
-
const struct ggml_tensor * src0,
|
10020
|
-
const struct ggml_tensor * src1,
|
10021
10269
|
struct ggml_tensor * dst) {
|
10270
|
+
|
10271
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10272
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10273
|
+
|
10022
10274
|
int64_t t0 = ggml_perf_time_us();
|
10023
10275
|
UNUSED(t0);
|
10024
10276
|
|
@@ -10263,10 +10515,11 @@ static void ggml_compute_forward_mul_mat(
|
|
10263
10515
|
|
10264
10516
|
static void ggml_compute_forward_mul_mat_id(
|
10265
10517
|
const struct ggml_compute_params * params,
|
10266
|
-
const struct ggml_tensor * ids,
|
10267
|
-
const struct ggml_tensor * src1,
|
10268
10518
|
struct ggml_tensor * dst) {
|
10269
10519
|
|
10520
|
+
const struct ggml_tensor * ids = dst->src[0];
|
10521
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10522
|
+
|
10270
10523
|
const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
|
10271
10524
|
|
10272
10525
|
GGML_TENSOR_BINARY_OP_LOCALS
|
@@ -10457,9 +10710,11 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10457
10710
|
|
10458
10711
|
static void ggml_compute_forward_out_prod_f32(
|
10459
10712
|
const struct ggml_compute_params * params,
|
10460
|
-
const struct ggml_tensor * src0,
|
10461
|
-
const struct ggml_tensor * src1,
|
10462
10713
|
struct ggml_tensor * dst) {
|
10714
|
+
|
10715
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10716
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10717
|
+
|
10463
10718
|
// int64_t t0 = ggml_perf_time_us();
|
10464
10719
|
// UNUSED(t0);
|
10465
10720
|
|
@@ -10649,9 +10904,11 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10649
10904
|
|
10650
10905
|
static void ggml_compute_forward_out_prod_q_f32(
|
10651
10906
|
const struct ggml_compute_params * params,
|
10652
|
-
const struct ggml_tensor * src0,
|
10653
|
-
const struct ggml_tensor * src1,
|
10654
10907
|
struct ggml_tensor * dst) {
|
10908
|
+
|
10909
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10910
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10911
|
+
|
10655
10912
|
// int64_t t0 = ggml_perf_time_us();
|
10656
10913
|
// UNUSED(t0);
|
10657
10914
|
|
@@ -10762,9 +11019,10 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10762
11019
|
|
10763
11020
|
static void ggml_compute_forward_out_prod(
|
10764
11021
|
const struct ggml_compute_params * params,
|
10765
|
-
const struct ggml_tensor * src0,
|
10766
|
-
const struct ggml_tensor * src1,
|
10767
11022
|
struct ggml_tensor * dst) {
|
11023
|
+
|
11024
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11025
|
+
|
10768
11026
|
switch (src0->type) {
|
10769
11027
|
case GGML_TYPE_Q4_0:
|
10770
11028
|
case GGML_TYPE_Q4_1:
|
@@ -10779,17 +11037,19 @@ static void ggml_compute_forward_out_prod(
|
|
10779
11037
|
case GGML_TYPE_IQ2_XXS:
|
10780
11038
|
case GGML_TYPE_IQ2_XS:
|
10781
11039
|
case GGML_TYPE_IQ3_XXS:
|
11040
|
+
case GGML_TYPE_IQ1_S:
|
11041
|
+
case GGML_TYPE_IQ4_NL:
|
10782
11042
|
{
|
10783
|
-
ggml_compute_forward_out_prod_q_f32(params,
|
11043
|
+
ggml_compute_forward_out_prod_q_f32(params, dst);
|
10784
11044
|
} break;
|
10785
11045
|
case GGML_TYPE_F16:
|
10786
11046
|
{
|
10787
11047
|
GGML_ASSERT(false); // todo
|
10788
|
-
// ggml_compute_forward_out_prod_f16_f32(params,
|
11048
|
+
// ggml_compute_forward_out_prod_f16_f32(params, dst);
|
10789
11049
|
} break;
|
10790
11050
|
case GGML_TYPE_F32:
|
10791
11051
|
{
|
10792
|
-
ggml_compute_forward_out_prod_f32(params,
|
11052
|
+
ggml_compute_forward_out_prod_f32(params, dst);
|
10793
11053
|
} break;
|
10794
11054
|
default:
|
10795
11055
|
{
|
@@ -10802,8 +11062,10 @@ static void ggml_compute_forward_out_prod(
|
|
10802
11062
|
|
10803
11063
|
static void ggml_compute_forward_scale_f32(
|
10804
11064
|
const struct ggml_compute_params * params,
|
10805
|
-
const struct ggml_tensor * src0,
|
10806
11065
|
struct ggml_tensor * dst) {
|
11066
|
+
|
11067
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11068
|
+
|
10807
11069
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
10808
11070
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
10809
11071
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -10844,12 +11106,14 @@ static void ggml_compute_forward_scale_f32(
|
|
10844
11106
|
|
10845
11107
|
static void ggml_compute_forward_scale(
|
10846
11108
|
const struct ggml_compute_params * params,
|
10847
|
-
const struct ggml_tensor * src0,
|
10848
11109
|
struct ggml_tensor * dst) {
|
11110
|
+
|
11111
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11112
|
+
|
10849
11113
|
switch (src0->type) {
|
10850
11114
|
case GGML_TYPE_F32:
|
10851
11115
|
{
|
10852
|
-
ggml_compute_forward_scale_f32(params,
|
11116
|
+
ggml_compute_forward_scale_f32(params, dst);
|
10853
11117
|
} break;
|
10854
11118
|
default:
|
10855
11119
|
{
|
@@ -10862,9 +11126,11 @@ static void ggml_compute_forward_scale(
|
|
10862
11126
|
|
10863
11127
|
static void ggml_compute_forward_set_f32(
|
10864
11128
|
const struct ggml_compute_params * params,
|
10865
|
-
const struct ggml_tensor * src0,
|
10866
|
-
const struct ggml_tensor * src1,
|
10867
11129
|
struct ggml_tensor * dst) {
|
11130
|
+
|
11131
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11132
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11133
|
+
|
10868
11134
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10869
11135
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
10870
11136
|
|
@@ -10935,14 +11201,14 @@ static void ggml_compute_forward_set_f32(
|
|
10935
11201
|
|
10936
11202
|
static void ggml_compute_forward_set(
|
10937
11203
|
const struct ggml_compute_params * params,
|
10938
|
-
const struct ggml_tensor * src0,
|
10939
|
-
const struct ggml_tensor * src1,
|
10940
11204
|
struct ggml_tensor * dst) {
|
10941
11205
|
|
11206
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11207
|
+
|
10942
11208
|
switch (src0->type) {
|
10943
11209
|
case GGML_TYPE_F32:
|
10944
11210
|
{
|
10945
|
-
ggml_compute_forward_set_f32(params,
|
11211
|
+
ggml_compute_forward_set_f32(params, dst);
|
10946
11212
|
} break;
|
10947
11213
|
case GGML_TYPE_F16:
|
10948
11214
|
case GGML_TYPE_Q4_0:
|
@@ -10959,6 +11225,8 @@ static void ggml_compute_forward_set(
|
|
10959
11225
|
case GGML_TYPE_IQ2_XXS:
|
10960
11226
|
case GGML_TYPE_IQ2_XS:
|
10961
11227
|
case GGML_TYPE_IQ3_XXS:
|
11228
|
+
case GGML_TYPE_IQ1_S:
|
11229
|
+
case GGML_TYPE_IQ4_NL:
|
10962
11230
|
default:
|
10963
11231
|
{
|
10964
11232
|
GGML_ASSERT(false);
|
@@ -10970,29 +11238,25 @@ static void ggml_compute_forward_set(
|
|
10970
11238
|
|
10971
11239
|
static void ggml_compute_forward_cpy(
|
10972
11240
|
const struct ggml_compute_params * params,
|
10973
|
-
const struct ggml_tensor * src0,
|
10974
11241
|
struct ggml_tensor * dst) {
|
10975
|
-
ggml_compute_forward_dup(params,
|
11242
|
+
ggml_compute_forward_dup(params, dst);
|
10976
11243
|
}
|
10977
11244
|
|
10978
11245
|
// ggml_compute_forward_cont
|
10979
11246
|
|
10980
11247
|
static void ggml_compute_forward_cont(
|
10981
11248
|
const struct ggml_compute_params * params,
|
10982
|
-
const struct ggml_tensor * src0,
|
10983
11249
|
struct ggml_tensor * dst) {
|
10984
|
-
ggml_compute_forward_dup(params,
|
11250
|
+
ggml_compute_forward_dup(params, dst);
|
10985
11251
|
}
|
10986
11252
|
|
10987
11253
|
// ggml_compute_forward_reshape
|
10988
11254
|
|
10989
11255
|
static void ggml_compute_forward_reshape(
|
10990
11256
|
const struct ggml_compute_params * params,
|
10991
|
-
const struct ggml_tensor * src0,
|
10992
11257
|
struct ggml_tensor * dst) {
|
10993
11258
|
// NOP
|
10994
11259
|
UNUSED(params);
|
10995
|
-
UNUSED(src0);
|
10996
11260
|
UNUSED(dst);
|
10997
11261
|
}
|
10998
11262
|
|
@@ -11000,39 +11264,41 @@ static void ggml_compute_forward_reshape(
|
|
11000
11264
|
|
11001
11265
|
static void ggml_compute_forward_view(
|
11002
11266
|
const struct ggml_compute_params * params,
|
11003
|
-
const struct ggml_tensor *
|
11267
|
+
const struct ggml_tensor * dst) {
|
11004
11268
|
// NOP
|
11005
11269
|
UNUSED(params);
|
11006
|
-
UNUSED(
|
11270
|
+
UNUSED(dst);
|
11007
11271
|
}
|
11008
11272
|
|
11009
11273
|
// ggml_compute_forward_permute
|
11010
11274
|
|
11011
11275
|
static void ggml_compute_forward_permute(
|
11012
11276
|
const struct ggml_compute_params * params,
|
11013
|
-
const struct ggml_tensor *
|
11277
|
+
const struct ggml_tensor * dst) {
|
11014
11278
|
// NOP
|
11015
11279
|
UNUSED(params);
|
11016
|
-
UNUSED(
|
11280
|
+
UNUSED(dst);
|
11017
11281
|
}
|
11018
11282
|
|
11019
11283
|
// ggml_compute_forward_transpose
|
11020
11284
|
|
11021
11285
|
static void ggml_compute_forward_transpose(
|
11022
11286
|
const struct ggml_compute_params * params,
|
11023
|
-
const struct ggml_tensor *
|
11287
|
+
const struct ggml_tensor * dst) {
|
11024
11288
|
// NOP
|
11025
11289
|
UNUSED(params);
|
11026
|
-
UNUSED(
|
11290
|
+
UNUSED(dst);
|
11027
11291
|
}
|
11028
11292
|
|
11029
11293
|
// ggml_compute_forward_get_rows
|
11030
11294
|
|
11031
11295
|
static void ggml_compute_forward_get_rows_q(
|
11032
11296
|
const struct ggml_compute_params * params,
|
11033
|
-
const struct ggml_tensor * src0,
|
11034
|
-
const struct ggml_tensor * src1,
|
11035
11297
|
struct ggml_tensor * dst) {
|
11298
|
+
|
11299
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11300
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11301
|
+
|
11036
11302
|
assert(params->ith == 0);
|
11037
11303
|
|
11038
11304
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11068,9 +11334,11 @@ static void ggml_compute_forward_get_rows_q(
|
|
11068
11334
|
|
11069
11335
|
static void ggml_compute_forward_get_rows_f16(
|
11070
11336
|
const struct ggml_compute_params * params,
|
11071
|
-
const struct ggml_tensor * src0,
|
11072
|
-
const struct ggml_tensor * src1,
|
11073
11337
|
struct ggml_tensor * dst) {
|
11338
|
+
|
11339
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11340
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11341
|
+
|
11074
11342
|
assert(params->ith == 0);
|
11075
11343
|
|
11076
11344
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11103,9 +11371,11 @@ static void ggml_compute_forward_get_rows_f16(
|
|
11103
11371
|
|
11104
11372
|
static void ggml_compute_forward_get_rows_f32(
|
11105
11373
|
const struct ggml_compute_params * params,
|
11106
|
-
const struct ggml_tensor * src0,
|
11107
|
-
const struct ggml_tensor * src1,
|
11108
11374
|
struct ggml_tensor * dst) {
|
11375
|
+
|
11376
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11377
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11378
|
+
|
11109
11379
|
assert(params->ith == 0);
|
11110
11380
|
|
11111
11381
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11138,9 +11408,10 @@ static void ggml_compute_forward_get_rows_f32(
|
|
11138
11408
|
|
11139
11409
|
static void ggml_compute_forward_get_rows(
|
11140
11410
|
const struct ggml_compute_params * params,
|
11141
|
-
const struct ggml_tensor * src0,
|
11142
|
-
const struct ggml_tensor * src1,
|
11143
11411
|
struct ggml_tensor * dst) {
|
11412
|
+
|
11413
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11414
|
+
|
11144
11415
|
switch (src0->type) {
|
11145
11416
|
case GGML_TYPE_Q4_0:
|
11146
11417
|
case GGML_TYPE_Q4_1:
|
@@ -11156,17 +11427,19 @@ static void ggml_compute_forward_get_rows(
|
|
11156
11427
|
case GGML_TYPE_IQ2_XXS:
|
11157
11428
|
case GGML_TYPE_IQ2_XS:
|
11158
11429
|
case GGML_TYPE_IQ3_XXS:
|
11430
|
+
case GGML_TYPE_IQ1_S:
|
11431
|
+
case GGML_TYPE_IQ4_NL:
|
11159
11432
|
{
|
11160
|
-
ggml_compute_forward_get_rows_q(params,
|
11433
|
+
ggml_compute_forward_get_rows_q(params, dst);
|
11161
11434
|
} break;
|
11162
11435
|
case GGML_TYPE_F16:
|
11163
11436
|
{
|
11164
|
-
ggml_compute_forward_get_rows_f16(params,
|
11437
|
+
ggml_compute_forward_get_rows_f16(params, dst);
|
11165
11438
|
} break;
|
11166
11439
|
case GGML_TYPE_F32:
|
11167
11440
|
case GGML_TYPE_I32:
|
11168
11441
|
{
|
11169
|
-
ggml_compute_forward_get_rows_f32(params,
|
11442
|
+
ggml_compute_forward_get_rows_f32(params, dst);
|
11170
11443
|
} break;
|
11171
11444
|
default:
|
11172
11445
|
{
|
@@ -11197,9 +11470,11 @@ static void ggml_compute_forward_get_rows(
|
|
11197
11470
|
|
11198
11471
|
static void ggml_compute_forward_get_rows_back_f32_f16(
|
11199
11472
|
const struct ggml_compute_params * params,
|
11200
|
-
const struct ggml_tensor * src0,
|
11201
|
-
const struct ggml_tensor * src1,
|
11202
11473
|
struct ggml_tensor * dst) {
|
11474
|
+
|
11475
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11476
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11477
|
+
|
11203
11478
|
GGML_ASSERT(params->ith == 0);
|
11204
11479
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11205
11480
|
|
@@ -11234,9 +11509,11 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11234
11509
|
|
11235
11510
|
static void ggml_compute_forward_get_rows_back_f32(
|
11236
11511
|
const struct ggml_compute_params * params,
|
11237
|
-
const struct ggml_tensor * src0,
|
11238
|
-
const struct ggml_tensor * src1,
|
11239
11512
|
struct ggml_tensor * dst) {
|
11513
|
+
|
11514
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11515
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11516
|
+
|
11240
11517
|
GGML_ASSERT(params->ith == 0);
|
11241
11518
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11242
11519
|
|
@@ -11271,17 +11548,18 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
11271
11548
|
|
11272
11549
|
static void ggml_compute_forward_get_rows_back(
|
11273
11550
|
const struct ggml_compute_params * params,
|
11274
|
-
const struct ggml_tensor * src0,
|
11275
|
-
const struct ggml_tensor * src1,
|
11276
11551
|
struct ggml_tensor * dst) {
|
11552
|
+
|
11553
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11554
|
+
|
11277
11555
|
switch (src0->type) {
|
11278
11556
|
case GGML_TYPE_F16:
|
11279
11557
|
{
|
11280
|
-
ggml_compute_forward_get_rows_back_f32_f16(params,
|
11558
|
+
ggml_compute_forward_get_rows_back_f32_f16(params, dst);
|
11281
11559
|
} break;
|
11282
11560
|
case GGML_TYPE_F32:
|
11283
11561
|
{
|
11284
|
-
ggml_compute_forward_get_rows_back_f32(params,
|
11562
|
+
ggml_compute_forward_get_rows_back_f32(params, dst);
|
11285
11563
|
} break;
|
11286
11564
|
default:
|
11287
11565
|
{
|
@@ -11312,8 +11590,10 @@ static void ggml_compute_forward_get_rows_back(
|
|
11312
11590
|
|
11313
11591
|
static void ggml_compute_forward_diag_f32(
|
11314
11592
|
const struct ggml_compute_params * params,
|
11315
|
-
const struct ggml_tensor * src0,
|
11316
11593
|
struct ggml_tensor * dst) {
|
11594
|
+
|
11595
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11596
|
+
|
11317
11597
|
GGML_ASSERT(params->ith == 0);
|
11318
11598
|
|
11319
11599
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11352,12 +11632,14 @@ static void ggml_compute_forward_diag_f32(
|
|
11352
11632
|
|
11353
11633
|
static void ggml_compute_forward_diag(
|
11354
11634
|
const struct ggml_compute_params * params,
|
11355
|
-
const struct ggml_tensor * src0,
|
11356
11635
|
struct ggml_tensor * dst) {
|
11636
|
+
|
11637
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11638
|
+
|
11357
11639
|
switch (src0->type) {
|
11358
11640
|
case GGML_TYPE_F32:
|
11359
11641
|
{
|
11360
|
-
ggml_compute_forward_diag_f32(params,
|
11642
|
+
ggml_compute_forward_diag_f32(params, dst);
|
11361
11643
|
} break;
|
11362
11644
|
default:
|
11363
11645
|
{
|
@@ -11370,10 +11652,11 @@ static void ggml_compute_forward_diag(
|
|
11370
11652
|
|
11371
11653
|
static void ggml_compute_forward_diag_mask_f32(
|
11372
11654
|
const struct ggml_compute_params * params,
|
11373
|
-
const struct ggml_tensor * src0,
|
11374
11655
|
struct ggml_tensor * dst,
|
11375
11656
|
const float value) {
|
11376
11657
|
|
11658
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11659
|
+
|
11377
11660
|
const int ith = params->ith;
|
11378
11661
|
const int nth = params->nth;
|
11379
11662
|
|
@@ -11423,12 +11706,14 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11423
11706
|
|
11424
11707
|
static void ggml_compute_forward_diag_mask_inf(
|
11425
11708
|
const struct ggml_compute_params * params,
|
11426
|
-
const struct ggml_tensor * src0,
|
11427
11709
|
struct ggml_tensor * dst) {
|
11710
|
+
|
11711
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11712
|
+
|
11428
11713
|
switch (src0->type) {
|
11429
11714
|
case GGML_TYPE_F32:
|
11430
11715
|
{
|
11431
|
-
ggml_compute_forward_diag_mask_f32(params,
|
11716
|
+
ggml_compute_forward_diag_mask_f32(params, dst, -INFINITY);
|
11432
11717
|
} break;
|
11433
11718
|
default:
|
11434
11719
|
{
|
@@ -11439,12 +11724,14 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11439
11724
|
|
11440
11725
|
static void ggml_compute_forward_diag_mask_zero(
|
11441
11726
|
const struct ggml_compute_params * params,
|
11442
|
-
const struct ggml_tensor * src0,
|
11443
11727
|
struct ggml_tensor * dst) {
|
11728
|
+
|
11729
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11730
|
+
|
11444
11731
|
switch (src0->type) {
|
11445
11732
|
case GGML_TYPE_F32:
|
11446
11733
|
{
|
11447
|
-
ggml_compute_forward_diag_mask_f32(params,
|
11734
|
+
ggml_compute_forward_diag_mask_f32(params, dst, 0);
|
11448
11735
|
} break;
|
11449
11736
|
default:
|
11450
11737
|
{
|
@@ -11457,9 +11744,12 @@ static void ggml_compute_forward_diag_mask_zero(
|
|
11457
11744
|
|
11458
11745
|
static void ggml_compute_forward_soft_max_f32(
|
11459
11746
|
const struct ggml_compute_params * params,
|
11460
|
-
const struct ggml_tensor * src0,
|
11461
|
-
const struct ggml_tensor * src1,
|
11462
11747
|
struct ggml_tensor * dst) {
|
11748
|
+
|
11749
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11750
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11751
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
11752
|
+
|
11463
11753
|
assert(ggml_is_contiguous(dst));
|
11464
11754
|
assert(ggml_are_same_shape(src0, dst));
|
11465
11755
|
|
@@ -11467,16 +11757,29 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11467
11757
|
return;
|
11468
11758
|
}
|
11469
11759
|
|
11470
|
-
float scale
|
11471
|
-
|
11760
|
+
float scale = 1.0f;
|
11761
|
+
float max_bias = 0.0f;
|
11762
|
+
|
11763
|
+
memcpy(&scale, (float *) dst->op_params + 0, sizeof(float));
|
11764
|
+
memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
|
11472
11765
|
|
11473
11766
|
// TODO: handle transposed/permuted matrices
|
11474
11767
|
|
11475
11768
|
const int ith = params->ith;
|
11476
11769
|
const int nth = params->nth;
|
11477
11770
|
|
11771
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
11772
|
+
|
11478
11773
|
const int64_t ne11 = src1 ? src1->ne[1] : 1;
|
11479
11774
|
|
11775
|
+
// TODO: is this supposed to be ceil instead of floor?
|
11776
|
+
// https://huggingface.co/mosaicml/mpt-7b/blob/main/attention.py#L370
|
11777
|
+
const uint32_t n_head_kv = ne02;
|
11778
|
+
const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2(n_head_kv));
|
11779
|
+
|
11780
|
+
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
|
11781
|
+
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
11782
|
+
|
11480
11783
|
const int nc = src0->ne[0];
|
11481
11784
|
const int nr = ggml_nrows(src0);
|
11482
11785
|
|
@@ -11489,6 +11792,9 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11489
11792
|
|
11490
11793
|
float * wp = (float *) params->wdata + (nc + CACHE_LINE_SIZE_F32) * ith;
|
11491
11794
|
|
11795
|
+
// when max_bias <= 0.0f, src2 is not used and we default it to src0 to avoid branching
|
11796
|
+
float * pos = src2 ? (float *) src2->data : src0->data;
|
11797
|
+
|
11492
11798
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
11493
11799
|
float * sp = (float *)((char *) src0->data + i1*src0->nb[1]);
|
11494
11800
|
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
|
@@ -11502,6 +11808,16 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11502
11808
|
ggml_vec_acc_f32(nc, wp, mp);
|
11503
11809
|
}
|
11504
11810
|
|
11811
|
+
// ALiBi bias
|
11812
|
+
if (max_bias > 0.0f) {
|
11813
|
+
const uint32_t h = (i1/ne01)%ne02; // head
|
11814
|
+
const float slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1);
|
11815
|
+
|
11816
|
+
for (int i = 0; i < nc; i++) {
|
11817
|
+
wp[i] = wp[i] + slope*pos[i];
|
11818
|
+
}
|
11819
|
+
}
|
11820
|
+
|
11505
11821
|
#ifndef NDEBUG
|
11506
11822
|
for (int i = 0; i < nc; ++i) {
|
11507
11823
|
//printf("p[%d] = %f\n", i, p[i]);
|
@@ -11544,13 +11860,14 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11544
11860
|
|
11545
11861
|
static void ggml_compute_forward_soft_max(
|
11546
11862
|
const struct ggml_compute_params * params,
|
11547
|
-
const struct ggml_tensor * src0,
|
11548
|
-
const struct ggml_tensor * src1,
|
11549
11863
|
struct ggml_tensor * dst) {
|
11864
|
+
|
11865
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11866
|
+
|
11550
11867
|
switch (src0->type) {
|
11551
11868
|
case GGML_TYPE_F32:
|
11552
11869
|
{
|
11553
|
-
ggml_compute_forward_soft_max_f32(params,
|
11870
|
+
ggml_compute_forward_soft_max_f32(params, dst);
|
11554
11871
|
} break;
|
11555
11872
|
default:
|
11556
11873
|
{
|
@@ -11563,9 +11880,11 @@ static void ggml_compute_forward_soft_max(
|
|
11563
11880
|
|
11564
11881
|
static void ggml_compute_forward_soft_max_back_f32(
|
11565
11882
|
const struct ggml_compute_params * params,
|
11566
|
-
const struct ggml_tensor * src0,
|
11567
|
-
const struct ggml_tensor * src1,
|
11568
11883
|
struct ggml_tensor * dst) {
|
11884
|
+
|
11885
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11886
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11887
|
+
|
11569
11888
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
11570
11889
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
11571
11890
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
@@ -11640,13 +11959,14 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11640
11959
|
|
11641
11960
|
static void ggml_compute_forward_soft_max_back(
|
11642
11961
|
const struct ggml_compute_params * params,
|
11643
|
-
const struct ggml_tensor * src0,
|
11644
|
-
const struct ggml_tensor * src1,
|
11645
11962
|
struct ggml_tensor * dst) {
|
11963
|
+
|
11964
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11965
|
+
|
11646
11966
|
switch (src0->type) {
|
11647
11967
|
case GGML_TYPE_F32:
|
11648
11968
|
{
|
11649
|
-
ggml_compute_forward_soft_max_back_f32(params,
|
11969
|
+
ggml_compute_forward_soft_max_back_f32(params, dst);
|
11650
11970
|
} break;
|
11651
11971
|
default:
|
11652
11972
|
{
|
@@ -11659,8 +11979,10 @@ static void ggml_compute_forward_soft_max_back(
|
|
11659
11979
|
|
11660
11980
|
static void ggml_compute_forward_alibi_f32(
|
11661
11981
|
const struct ggml_compute_params * params,
|
11662
|
-
const struct ggml_tensor * src0,
|
11663
11982
|
struct ggml_tensor * dst) {
|
11983
|
+
|
11984
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11985
|
+
|
11664
11986
|
assert(params->ith == 0);
|
11665
11987
|
|
11666
11988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11694,22 +12016,20 @@ static void ggml_compute_forward_alibi_f32(
|
|
11694
12016
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
11695
12017
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
11696
12018
|
|
11697
|
-
for (int64_t
|
11698
|
-
|
11699
|
-
|
11700
|
-
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
11701
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11702
|
-
|
11703
|
-
// TODO: k*nb2 or k*nb3
|
11704
|
-
|
11705
|
-
float m_k;
|
12019
|
+
for (int64_t k = 0; k < ne2_ne3; k++) {
|
12020
|
+
// TODO: k*nb2 or k*nb3
|
12021
|
+
float m_k;
|
11706
12022
|
|
11707
|
-
|
11708
|
-
|
11709
|
-
|
11710
|
-
|
11711
|
-
|
12023
|
+
if (k < n_heads_log2_floor) {
|
12024
|
+
m_k = powf(m0, k + 1);
|
12025
|
+
} else {
|
12026
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
12027
|
+
}
|
11712
12028
|
|
12029
|
+
for (int64_t i = 0; i < ne0; i++) {
|
12030
|
+
for (int64_t j = 0; j < ne1; j++) {
|
12031
|
+
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
12032
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11713
12033
|
pdst[0] = i * m_k + src[0];
|
11714
12034
|
}
|
11715
12035
|
}
|
@@ -11718,8 +12038,10 @@ static void ggml_compute_forward_alibi_f32(
|
|
11718
12038
|
|
11719
12039
|
static void ggml_compute_forward_alibi_f16(
|
11720
12040
|
const struct ggml_compute_params * params,
|
11721
|
-
const struct ggml_tensor * src0,
|
11722
12041
|
struct ggml_tensor * dst) {
|
12042
|
+
|
12043
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12044
|
+
|
11723
12045
|
assert(params->ith == 0);
|
11724
12046
|
|
11725
12047
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11754,21 +12076,20 @@ static void ggml_compute_forward_alibi_f16(
|
|
11754
12076
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
11755
12077
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
11756
12078
|
|
11757
|
-
for (int
|
11758
|
-
|
11759
|
-
|
11760
|
-
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
11761
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11762
|
-
|
11763
|
-
// TODO: k*nb2 or k*nb3
|
12079
|
+
for (int k = 0; k < ne2_ne3; k++) {
|
12080
|
+
// TODO: k*nb2 or k*nb3
|
12081
|
+
float m_k;
|
11764
12082
|
|
11765
|
-
|
12083
|
+
if (k < n_heads_log2_floor) {
|
12084
|
+
m_k = powf(m0, k + 1);
|
12085
|
+
} else {
|
12086
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
12087
|
+
}
|
11766
12088
|
|
11767
|
-
|
11768
|
-
|
11769
|
-
|
11770
|
-
|
11771
|
-
}
|
12089
|
+
for (int i = 0; i < ne0; i++) {
|
12090
|
+
for (int j = 0; j < ne1; j++) {
|
12091
|
+
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
12092
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11772
12093
|
|
11773
12094
|
// we return F32
|
11774
12095
|
pdst[0] = i * m_k + GGML_FP16_TO_FP32(src[0]);
|
@@ -11779,16 +12100,18 @@ static void ggml_compute_forward_alibi_f16(
|
|
11779
12100
|
|
11780
12101
|
static void ggml_compute_forward_alibi(
|
11781
12102
|
const struct ggml_compute_params * params,
|
11782
|
-
const struct ggml_tensor * src0,
|
11783
12103
|
struct ggml_tensor * dst) {
|
12104
|
+
|
12105
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12106
|
+
|
11784
12107
|
switch (src0->type) {
|
11785
12108
|
case GGML_TYPE_F16:
|
11786
12109
|
{
|
11787
|
-
ggml_compute_forward_alibi_f16(params,
|
12110
|
+
ggml_compute_forward_alibi_f16(params, dst);
|
11788
12111
|
} break;
|
11789
12112
|
case GGML_TYPE_F32:
|
11790
12113
|
{
|
11791
|
-
ggml_compute_forward_alibi_f32(params,
|
12114
|
+
ggml_compute_forward_alibi_f32(params, dst);
|
11792
12115
|
} break;
|
11793
12116
|
case GGML_TYPE_Q4_0:
|
11794
12117
|
case GGML_TYPE_Q4_1:
|
@@ -11804,6 +12127,8 @@ static void ggml_compute_forward_alibi(
|
|
11804
12127
|
case GGML_TYPE_IQ2_XXS:
|
11805
12128
|
case GGML_TYPE_IQ2_XS:
|
11806
12129
|
case GGML_TYPE_IQ3_XXS:
|
12130
|
+
case GGML_TYPE_IQ1_S:
|
12131
|
+
case GGML_TYPE_IQ4_NL:
|
11807
12132
|
case GGML_TYPE_Q8_K:
|
11808
12133
|
case GGML_TYPE_I8:
|
11809
12134
|
case GGML_TYPE_I16:
|
@@ -11819,8 +12144,10 @@ static void ggml_compute_forward_alibi(
|
|
11819
12144
|
|
11820
12145
|
static void ggml_compute_forward_clamp_f32(
|
11821
12146
|
const struct ggml_compute_params * params,
|
11822
|
-
const struct ggml_tensor * src0,
|
11823
12147
|
struct ggml_tensor * dst) {
|
12148
|
+
|
12149
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12150
|
+
|
11824
12151
|
assert(params->ith == 0);
|
11825
12152
|
|
11826
12153
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11859,12 +12186,14 @@ static void ggml_compute_forward_clamp_f32(
|
|
11859
12186
|
|
11860
12187
|
static void ggml_compute_forward_clamp(
|
11861
12188
|
const struct ggml_compute_params * params,
|
11862
|
-
const struct ggml_tensor * src0,
|
11863
12189
|
struct ggml_tensor * dst) {
|
12190
|
+
|
12191
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12192
|
+
|
11864
12193
|
switch (src0->type) {
|
11865
12194
|
case GGML_TYPE_F32:
|
11866
12195
|
{
|
11867
|
-
ggml_compute_forward_clamp_f32(params,
|
12196
|
+
ggml_compute_forward_clamp_f32(params, dst);
|
11868
12197
|
} break;
|
11869
12198
|
case GGML_TYPE_F16:
|
11870
12199
|
case GGML_TYPE_Q4_0:
|
@@ -11881,6 +12210,8 @@ static void ggml_compute_forward_clamp(
|
|
11881
12210
|
case GGML_TYPE_IQ2_XXS:
|
11882
12211
|
case GGML_TYPE_IQ2_XS:
|
11883
12212
|
case GGML_TYPE_IQ3_XXS:
|
12213
|
+
case GGML_TYPE_IQ1_S:
|
12214
|
+
case GGML_TYPE_IQ4_NL:
|
11884
12215
|
case GGML_TYPE_Q8_K:
|
11885
12216
|
case GGML_TYPE_I8:
|
11886
12217
|
case GGML_TYPE_I16:
|
@@ -11952,10 +12283,12 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
11952
12283
|
|
11953
12284
|
static void ggml_compute_forward_rope_f32(
|
11954
12285
|
const struct ggml_compute_params * params,
|
11955
|
-
const struct ggml_tensor * src0,
|
11956
|
-
const struct ggml_tensor * src1,
|
11957
12286
|
struct ggml_tensor * dst,
|
11958
12287
|
const bool forward) {
|
12288
|
+
|
12289
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12290
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12291
|
+
|
11959
12292
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11960
12293
|
return;
|
11961
12294
|
}
|
@@ -12128,10 +12461,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12128
12461
|
|
12129
12462
|
static void ggml_compute_forward_rope_f16(
|
12130
12463
|
const struct ggml_compute_params * params,
|
12131
|
-
const struct ggml_tensor * src0,
|
12132
|
-
const struct ggml_tensor * src1,
|
12133
12464
|
struct ggml_tensor * dst,
|
12134
12465
|
const bool forward) {
|
12466
|
+
|
12467
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12468
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12469
|
+
|
12135
12470
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12136
12471
|
return;
|
12137
12472
|
}
|
@@ -12293,17 +12628,18 @@ static void ggml_compute_forward_rope_f16(
|
|
12293
12628
|
|
12294
12629
|
static void ggml_compute_forward_rope(
|
12295
12630
|
const struct ggml_compute_params * params,
|
12296
|
-
const struct ggml_tensor * src0,
|
12297
|
-
const struct ggml_tensor * src1,
|
12298
12631
|
struct ggml_tensor * dst) {
|
12632
|
+
|
12633
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12634
|
+
|
12299
12635
|
switch (src0->type) {
|
12300
12636
|
case GGML_TYPE_F16:
|
12301
12637
|
{
|
12302
|
-
ggml_compute_forward_rope_f16(params,
|
12638
|
+
ggml_compute_forward_rope_f16(params, dst, true);
|
12303
12639
|
} break;
|
12304
12640
|
case GGML_TYPE_F32:
|
12305
12641
|
{
|
12306
|
-
ggml_compute_forward_rope_f32(params,
|
12642
|
+
ggml_compute_forward_rope_f32(params, dst, true);
|
12307
12643
|
} break;
|
12308
12644
|
default:
|
12309
12645
|
{
|
@@ -12316,17 +12652,18 @@ static void ggml_compute_forward_rope(
|
|
12316
12652
|
|
12317
12653
|
static void ggml_compute_forward_rope_back(
|
12318
12654
|
const struct ggml_compute_params * params,
|
12319
|
-
const struct ggml_tensor * src0,
|
12320
|
-
const struct ggml_tensor * src1,
|
12321
12655
|
struct ggml_tensor * dst) {
|
12656
|
+
|
12657
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12658
|
+
|
12322
12659
|
switch (src0->type) {
|
12323
12660
|
case GGML_TYPE_F16:
|
12324
12661
|
{
|
12325
|
-
ggml_compute_forward_rope_f16(params,
|
12662
|
+
ggml_compute_forward_rope_f16(params, dst, false);
|
12326
12663
|
} break;
|
12327
12664
|
case GGML_TYPE_F32:
|
12328
12665
|
{
|
12329
|
-
ggml_compute_forward_rope_f32(params,
|
12666
|
+
ggml_compute_forward_rope_f32(params, dst, false);
|
12330
12667
|
} break;
|
12331
12668
|
default:
|
12332
12669
|
{
|
@@ -12339,9 +12676,11 @@ static void ggml_compute_forward_rope_back(
|
|
12339
12676
|
|
12340
12677
|
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
12341
12678
|
const struct ggml_compute_params * params,
|
12342
|
-
const struct ggml_tensor * src0,
|
12343
|
-
const struct ggml_tensor * src1,
|
12344
12679
|
struct ggml_tensor * dst) {
|
12680
|
+
|
12681
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12682
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12683
|
+
|
12345
12684
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12346
12685
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12347
12686
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12436,9 +12775,11 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12436
12775
|
|
12437
12776
|
static void ggml_compute_forward_conv_transpose_1d_f32(
|
12438
12777
|
const struct ggml_compute_params * params,
|
12439
|
-
const struct ggml_tensor * src0,
|
12440
|
-
const struct ggml_tensor * src1,
|
12441
12778
|
struct ggml_tensor * dst) {
|
12779
|
+
|
12780
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12781
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12782
|
+
|
12442
12783
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
12443
12784
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12444
12785
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12533,17 +12874,18 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12533
12874
|
|
12534
12875
|
static void ggml_compute_forward_conv_transpose_1d(
|
12535
12876
|
const struct ggml_compute_params * params,
|
12536
|
-
const struct ggml_tensor * src0,
|
12537
|
-
const struct ggml_tensor * src1,
|
12538
12877
|
struct ggml_tensor * dst) {
|
12878
|
+
|
12879
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12880
|
+
|
12539
12881
|
switch (src0->type) {
|
12540
12882
|
case GGML_TYPE_F16:
|
12541
12883
|
{
|
12542
|
-
ggml_compute_forward_conv_transpose_1d_f16_f32(params,
|
12884
|
+
ggml_compute_forward_conv_transpose_1d_f16_f32(params, dst);
|
12543
12885
|
} break;
|
12544
12886
|
case GGML_TYPE_F32:
|
12545
12887
|
{
|
12546
|
-
ggml_compute_forward_conv_transpose_1d_f32(params,
|
12888
|
+
ggml_compute_forward_conv_transpose_1d_f32(params, dst);
|
12547
12889
|
} break;
|
12548
12890
|
default:
|
12549
12891
|
{
|
@@ -12557,9 +12899,11 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
12557
12899
|
// dst: result [N, OH, OW, IC*KH*KW]
|
12558
12900
|
static void ggml_compute_forward_im2col_f32(
|
12559
12901
|
const struct ggml_compute_params * params,
|
12560
|
-
const struct ggml_tensor * src0,
|
12561
|
-
const struct ggml_tensor * src1,
|
12562
12902
|
struct ggml_tensor * dst) {
|
12903
|
+
|
12904
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12905
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12906
|
+
|
12563
12907
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12564
12908
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12565
12909
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12643,9 +12987,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
12643
12987
|
// dst: result [N, OH, OW, IC*KH*KW]
|
12644
12988
|
static void ggml_compute_forward_im2col_f16(
|
12645
12989
|
const struct ggml_compute_params * params,
|
12646
|
-
const struct ggml_tensor * src0,
|
12647
|
-
const struct ggml_tensor * src1,
|
12648
12990
|
struct ggml_tensor * dst) {
|
12991
|
+
|
12992
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12993
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12994
|
+
|
12649
12995
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12650
12996
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12651
12997
|
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
@@ -12725,17 +13071,15 @@ static void ggml_compute_forward_im2col_f16(
|
|
12725
13071
|
|
12726
13072
|
static void ggml_compute_forward_im2col(
|
12727
13073
|
const struct ggml_compute_params * params,
|
12728
|
-
const struct ggml_tensor * src0,
|
12729
|
-
const struct ggml_tensor * src1,
|
12730
13074
|
struct ggml_tensor * dst) {
|
12731
13075
|
switch (dst->type) {
|
12732
13076
|
case GGML_TYPE_F16:
|
12733
13077
|
{
|
12734
|
-
ggml_compute_forward_im2col_f16(params,
|
13078
|
+
ggml_compute_forward_im2col_f16(params, dst);
|
12735
13079
|
} break;
|
12736
13080
|
case GGML_TYPE_F32:
|
12737
13081
|
{
|
12738
|
-
ggml_compute_forward_im2col_f32(params,
|
13082
|
+
ggml_compute_forward_im2col_f32(params, dst);
|
12739
13083
|
} break;
|
12740
13084
|
default:
|
12741
13085
|
{
|
@@ -12749,9 +13093,11 @@ static void ggml_compute_forward_im2col(
|
|
12749
13093
|
|
12750
13094
|
static void ggml_compute_forward_conv_transpose_2d(
|
12751
13095
|
const struct ggml_compute_params * params,
|
12752
|
-
const struct ggml_tensor * src0,
|
12753
|
-
const struct ggml_tensor * src1,
|
12754
13096
|
struct ggml_tensor * dst) {
|
13097
|
+
|
13098
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13099
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
13100
|
+
|
12755
13101
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12756
13102
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12757
13103
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12855,9 +13201,11 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
12855
13201
|
static void ggml_compute_forward_pool_1d_sk_p0(
|
12856
13202
|
const struct ggml_compute_params * params,
|
12857
13203
|
const enum ggml_op_pool op,
|
12858
|
-
const struct ggml_tensor * src,
|
12859
13204
|
const int k,
|
12860
13205
|
struct ggml_tensor * dst) {
|
13206
|
+
|
13207
|
+
const struct ggml_tensor * src = dst->src[0];
|
13208
|
+
|
12861
13209
|
assert(src->type == GGML_TYPE_F32);
|
12862
13210
|
assert(params->ith == 0);
|
12863
13211
|
|
@@ -12906,7 +13254,6 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
12906
13254
|
|
12907
13255
|
static void ggml_compute_forward_pool_1d(
|
12908
13256
|
const struct ggml_compute_params * params,
|
12909
|
-
const struct ggml_tensor * src0,
|
12910
13257
|
struct ggml_tensor * dst) {
|
12911
13258
|
|
12912
13259
|
const int32_t * opts = (const int32_t *)dst->op_params;
|
@@ -12917,15 +13264,17 @@ static void ggml_compute_forward_pool_1d(
|
|
12917
13264
|
GGML_ASSERT(p0 == 0); // padding not supported
|
12918
13265
|
GGML_ASSERT(k0 == s0); // only s = k supported
|
12919
13266
|
|
12920
|
-
ggml_compute_forward_pool_1d_sk_p0(params, op,
|
13267
|
+
ggml_compute_forward_pool_1d_sk_p0(params, op, k0, dst);
|
12921
13268
|
}
|
12922
13269
|
|
12923
13270
|
// ggml_compute_forward_pool_2d
|
12924
13271
|
|
12925
13272
|
static void ggml_compute_forward_pool_2d(
|
12926
13273
|
const struct ggml_compute_params * params,
|
12927
|
-
const struct ggml_tensor * src,
|
12928
13274
|
struct ggml_tensor * dst) {
|
13275
|
+
|
13276
|
+
const struct ggml_tensor * src = dst->src[0];
|
13277
|
+
|
12929
13278
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
12930
13279
|
GGML_ASSERT(params->ith == 0);
|
12931
13280
|
|
@@ -12998,9 +13347,10 @@ static void ggml_compute_forward_pool_2d(
|
|
12998
13347
|
|
12999
13348
|
static void ggml_compute_forward_upscale_f32(
|
13000
13349
|
const struct ggml_compute_params * params,
|
13001
|
-
const struct ggml_tensor * src0,
|
13002
13350
|
struct ggml_tensor * dst) {
|
13003
13351
|
|
13352
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13353
|
+
|
13004
13354
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13005
13355
|
return;
|
13006
13356
|
}
|
@@ -13037,12 +13387,14 @@ static void ggml_compute_forward_upscale_f32(
|
|
13037
13387
|
|
13038
13388
|
static void ggml_compute_forward_upscale(
|
13039
13389
|
const struct ggml_compute_params * params,
|
13040
|
-
const struct ggml_tensor * src0,
|
13041
13390
|
struct ggml_tensor * dst) {
|
13391
|
+
|
13392
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13393
|
+
|
13042
13394
|
switch (src0->type) {
|
13043
13395
|
case GGML_TYPE_F32:
|
13044
13396
|
{
|
13045
|
-
ggml_compute_forward_upscale_f32(params,
|
13397
|
+
ggml_compute_forward_upscale_f32(params, dst);
|
13046
13398
|
} break;
|
13047
13399
|
default:
|
13048
13400
|
{
|
@@ -13055,9 +13407,10 @@ static void ggml_compute_forward_upscale(
|
|
13055
13407
|
|
13056
13408
|
static void ggml_compute_forward_pad_f32(
|
13057
13409
|
const struct ggml_compute_params * params,
|
13058
|
-
const struct ggml_tensor * src0,
|
13059
13410
|
struct ggml_tensor * dst) {
|
13060
13411
|
|
13412
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13413
|
+
|
13061
13414
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13062
13415
|
return;
|
13063
13416
|
}
|
@@ -13095,12 +13448,14 @@ static void ggml_compute_forward_pad_f32(
|
|
13095
13448
|
|
13096
13449
|
static void ggml_compute_forward_pad(
|
13097
13450
|
const struct ggml_compute_params * params,
|
13098
|
-
const struct ggml_tensor * src0,
|
13099
13451
|
struct ggml_tensor * dst) {
|
13452
|
+
|
13453
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13454
|
+
|
13100
13455
|
switch (src0->type) {
|
13101
13456
|
case GGML_TYPE_F32:
|
13102
13457
|
{
|
13103
|
-
ggml_compute_forward_pad_f32(params,
|
13458
|
+
ggml_compute_forward_pad_f32(params, dst);
|
13104
13459
|
} break;
|
13105
13460
|
default:
|
13106
13461
|
{
|
@@ -13113,9 +13468,10 @@ static void ggml_compute_forward_pad(
|
|
13113
13468
|
|
13114
13469
|
static void ggml_compute_forward_argsort_f32(
|
13115
13470
|
const struct ggml_compute_params * params,
|
13116
|
-
const struct ggml_tensor * src0,
|
13117
13471
|
struct ggml_tensor * dst) {
|
13118
13472
|
|
13473
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13474
|
+
|
13119
13475
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13120
13476
|
return;
|
13121
13477
|
}
|
@@ -13155,13 +13511,14 @@ static void ggml_compute_forward_argsort_f32(
|
|
13155
13511
|
|
13156
13512
|
static void ggml_compute_forward_argsort(
|
13157
13513
|
const struct ggml_compute_params * params,
|
13158
|
-
const struct ggml_tensor * src0,
|
13159
13514
|
struct ggml_tensor * dst) {
|
13160
13515
|
|
13516
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13517
|
+
|
13161
13518
|
switch (src0->type) {
|
13162
13519
|
case GGML_TYPE_F32:
|
13163
13520
|
{
|
13164
|
-
ggml_compute_forward_argsort_f32(params,
|
13521
|
+
ggml_compute_forward_argsort_f32(params, dst);
|
13165
13522
|
} break;
|
13166
13523
|
default:
|
13167
13524
|
{
|
@@ -13174,11 +13531,13 @@ static void ggml_compute_forward_argsort(
|
|
13174
13531
|
|
13175
13532
|
static void ggml_compute_forward_flash_attn_f32(
|
13176
13533
|
const struct ggml_compute_params * params,
|
13177
|
-
const struct ggml_tensor * q,
|
13178
|
-
const struct ggml_tensor * k,
|
13179
|
-
const struct ggml_tensor * v,
|
13180
13534
|
const bool masked,
|
13181
13535
|
struct ggml_tensor * dst) {
|
13536
|
+
|
13537
|
+
const struct ggml_tensor * q = dst->src[0];
|
13538
|
+
const struct ggml_tensor * k = dst->src[1];
|
13539
|
+
const struct ggml_tensor * v = dst->src[2];
|
13540
|
+
|
13182
13541
|
int64_t t0 = ggml_perf_time_us();
|
13183
13542
|
UNUSED(t0);
|
13184
13543
|
|
@@ -13364,11 +13723,13 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13364
13723
|
|
13365
13724
|
static void ggml_compute_forward_flash_attn_f16(
|
13366
13725
|
const struct ggml_compute_params * params,
|
13367
|
-
const struct ggml_tensor * q,
|
13368
|
-
const struct ggml_tensor * k,
|
13369
|
-
const struct ggml_tensor * v,
|
13370
13726
|
const bool masked,
|
13371
13727
|
struct ggml_tensor * dst) {
|
13728
|
+
|
13729
|
+
const struct ggml_tensor * q = dst->src[0];
|
13730
|
+
const struct ggml_tensor * k = dst->src[1];
|
13731
|
+
const struct ggml_tensor * v = dst->src[2];
|
13732
|
+
|
13372
13733
|
int64_t t0 = ggml_perf_time_us();
|
13373
13734
|
UNUSED(t0);
|
13374
13735
|
|
@@ -13590,19 +13951,19 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13590
13951
|
|
13591
13952
|
static void ggml_compute_forward_flash_attn(
|
13592
13953
|
const struct ggml_compute_params * params,
|
13593
|
-
const struct ggml_tensor * q,
|
13594
|
-
const struct ggml_tensor * k,
|
13595
|
-
const struct ggml_tensor * v,
|
13596
13954
|
const bool masked,
|
13597
13955
|
struct ggml_tensor * dst) {
|
13956
|
+
|
13957
|
+
const struct ggml_tensor * q = dst->src[0];
|
13958
|
+
|
13598
13959
|
switch (q->type) {
|
13599
13960
|
case GGML_TYPE_F16:
|
13600
13961
|
{
|
13601
|
-
ggml_compute_forward_flash_attn_f16(params,
|
13962
|
+
ggml_compute_forward_flash_attn_f16(params, masked, dst);
|
13602
13963
|
} break;
|
13603
13964
|
case GGML_TYPE_F32:
|
13604
13965
|
{
|
13605
|
-
ggml_compute_forward_flash_attn_f32(params,
|
13966
|
+
ggml_compute_forward_flash_attn_f32(params, masked, dst);
|
13606
13967
|
} break;
|
13607
13968
|
default:
|
13608
13969
|
{
|
@@ -13615,12 +13976,14 @@ static void ggml_compute_forward_flash_attn(
|
|
13615
13976
|
|
13616
13977
|
static void ggml_compute_forward_flash_ff_f16(
|
13617
13978
|
const struct ggml_compute_params * params,
|
13618
|
-
const struct ggml_tensor * a, // F16
|
13619
|
-
const struct ggml_tensor * b0, // F16 fc_w
|
13620
|
-
const struct ggml_tensor * b1, // F32 fc_b
|
13621
|
-
const struct ggml_tensor * c0, // F16 proj_w
|
13622
|
-
const struct ggml_tensor * c1, // F32 proj_b
|
13623
13979
|
struct ggml_tensor * dst) {
|
13980
|
+
|
13981
|
+
const struct ggml_tensor * a = dst->src[0]; // F16
|
13982
|
+
const struct ggml_tensor * b0 = dst->src[1]; // F16 fc_w
|
13983
|
+
const struct ggml_tensor * b1 = dst->src[2]; // F32 fc_b
|
13984
|
+
const struct ggml_tensor * c0 = dst->src[3]; // F16 proj_w
|
13985
|
+
const struct ggml_tensor * c1 = dst->src[4]; // F32 proj_b
|
13986
|
+
|
13624
13987
|
int64_t t0 = ggml_perf_time_us();
|
13625
13988
|
UNUSED(t0);
|
13626
13989
|
|
@@ -13748,16 +14111,14 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13748
14111
|
|
13749
14112
|
static void ggml_compute_forward_flash_ff(
|
13750
14113
|
const struct ggml_compute_params * params,
|
13751
|
-
const struct ggml_tensor * a,
|
13752
|
-
const struct ggml_tensor * b0,
|
13753
|
-
const struct ggml_tensor * b1,
|
13754
|
-
const struct ggml_tensor * c0,
|
13755
|
-
const struct ggml_tensor * c1,
|
13756
14114
|
struct ggml_tensor * dst) {
|
14115
|
+
|
14116
|
+
const struct ggml_tensor * b0 = dst->src[1];
|
14117
|
+
|
13757
14118
|
switch (b0->type) {
|
13758
14119
|
case GGML_TYPE_F16:
|
13759
14120
|
{
|
13760
|
-
ggml_compute_forward_flash_ff_f16(params,
|
14121
|
+
ggml_compute_forward_flash_ff_f16(params, dst);
|
13761
14122
|
} break;
|
13762
14123
|
case GGML_TYPE_F32:
|
13763
14124
|
{
|
@@ -13774,12 +14135,14 @@ static void ggml_compute_forward_flash_ff(
|
|
13774
14135
|
|
13775
14136
|
static void ggml_compute_forward_flash_attn_back_f32(
|
13776
14137
|
const struct ggml_compute_params * params,
|
13777
|
-
const struct ggml_tensor * q,
|
13778
|
-
const struct ggml_tensor * k,
|
13779
|
-
const struct ggml_tensor * v,
|
13780
|
-
const struct ggml_tensor * d,
|
13781
14138
|
const bool masked,
|
13782
14139
|
struct ggml_tensor * dst) {
|
14140
|
+
|
14141
|
+
const struct ggml_tensor * q = dst->src[0];
|
14142
|
+
const struct ggml_tensor * k = dst->src[1];
|
14143
|
+
const struct ggml_tensor * v = dst->src[2];
|
14144
|
+
const struct ggml_tensor * d = dst->src[3];
|
14145
|
+
|
13783
14146
|
int64_t t0 = ggml_perf_time_us();
|
13784
14147
|
UNUSED(t0);
|
13785
14148
|
|
@@ -14127,16 +14490,15 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14127
14490
|
|
14128
14491
|
static void ggml_compute_forward_flash_attn_back(
|
14129
14492
|
const struct ggml_compute_params * params,
|
14130
|
-
const struct ggml_tensor * q,
|
14131
|
-
const struct ggml_tensor * k,
|
14132
|
-
const struct ggml_tensor * v,
|
14133
|
-
const struct ggml_tensor * d,
|
14134
14493
|
const bool masked,
|
14135
14494
|
struct ggml_tensor * dst) {
|
14495
|
+
|
14496
|
+
const struct ggml_tensor * q = dst->src[0];
|
14497
|
+
|
14136
14498
|
switch (q->type) {
|
14137
14499
|
case GGML_TYPE_F32:
|
14138
14500
|
{
|
14139
|
-
ggml_compute_forward_flash_attn_back_f32(params,
|
14501
|
+
ggml_compute_forward_flash_attn_back_f32(params, masked, dst);
|
14140
14502
|
} break;
|
14141
14503
|
default:
|
14142
14504
|
{
|
@@ -14149,8 +14511,10 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14149
14511
|
|
14150
14512
|
static void ggml_compute_forward_win_part_f32(
|
14151
14513
|
const struct ggml_compute_params * params,
|
14152
|
-
const struct ggml_tensor * src0,
|
14153
14514
|
struct ggml_tensor * dst) {
|
14515
|
+
|
14516
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14517
|
+
|
14154
14518
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14155
14519
|
return;
|
14156
14520
|
}
|
@@ -14193,12 +14557,14 @@ static void ggml_compute_forward_win_part_f32(
|
|
14193
14557
|
|
14194
14558
|
static void ggml_compute_forward_win_part(
|
14195
14559
|
const struct ggml_compute_params * params,
|
14196
|
-
const struct ggml_tensor * src0,
|
14197
14560
|
struct ggml_tensor * dst) {
|
14561
|
+
|
14562
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14563
|
+
|
14198
14564
|
switch (src0->type) {
|
14199
14565
|
case GGML_TYPE_F32:
|
14200
14566
|
{
|
14201
|
-
ggml_compute_forward_win_part_f32(params,
|
14567
|
+
ggml_compute_forward_win_part_f32(params, dst);
|
14202
14568
|
} break;
|
14203
14569
|
default:
|
14204
14570
|
{
|
@@ -14211,8 +14577,10 @@ static void ggml_compute_forward_win_part(
|
|
14211
14577
|
|
14212
14578
|
static void ggml_compute_forward_win_unpart_f32(
|
14213
14579
|
const struct ggml_compute_params * params,
|
14214
|
-
const struct ggml_tensor * src0,
|
14215
14580
|
struct ggml_tensor * dst) {
|
14581
|
+
|
14582
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14583
|
+
|
14216
14584
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14217
14585
|
return;
|
14218
14586
|
}
|
@@ -14253,12 +14621,14 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14253
14621
|
|
14254
14622
|
static void ggml_compute_forward_win_unpart(
|
14255
14623
|
const struct ggml_compute_params * params,
|
14256
|
-
const struct ggml_tensor * src0,
|
14257
14624
|
struct ggml_tensor * dst) {
|
14625
|
+
|
14626
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14627
|
+
|
14258
14628
|
switch (src0->type) {
|
14259
14629
|
case GGML_TYPE_F32:
|
14260
14630
|
{
|
14261
|
-
ggml_compute_forward_win_unpart_f32(params,
|
14631
|
+
ggml_compute_forward_win_unpart_f32(params, dst);
|
14262
14632
|
} break;
|
14263
14633
|
default:
|
14264
14634
|
{
|
@@ -14271,58 +14641,58 @@ static void ggml_compute_forward_win_unpart(
|
|
14271
14641
|
|
14272
14642
|
static void ggml_compute_forward_unary(
|
14273
14643
|
const struct ggml_compute_params * params,
|
14274
|
-
const struct ggml_tensor * src0,
|
14275
14644
|
struct ggml_tensor * dst) {
|
14645
|
+
|
14276
14646
|
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14277
14647
|
|
14278
14648
|
switch (op) {
|
14279
14649
|
case GGML_UNARY_OP_ABS:
|
14280
14650
|
{
|
14281
|
-
ggml_compute_forward_abs(params,
|
14651
|
+
ggml_compute_forward_abs(params, dst);
|
14282
14652
|
} break;
|
14283
14653
|
case GGML_UNARY_OP_SGN:
|
14284
14654
|
{
|
14285
|
-
ggml_compute_forward_sgn(params,
|
14655
|
+
ggml_compute_forward_sgn(params, dst);
|
14286
14656
|
} break;
|
14287
14657
|
case GGML_UNARY_OP_NEG:
|
14288
14658
|
{
|
14289
|
-
ggml_compute_forward_neg(params,
|
14659
|
+
ggml_compute_forward_neg(params, dst);
|
14290
14660
|
} break;
|
14291
14661
|
case GGML_UNARY_OP_STEP:
|
14292
14662
|
{
|
14293
|
-
ggml_compute_forward_step(params,
|
14663
|
+
ggml_compute_forward_step(params, dst);
|
14294
14664
|
} break;
|
14295
14665
|
case GGML_UNARY_OP_TANH:
|
14296
14666
|
{
|
14297
|
-
ggml_compute_forward_tanh(params,
|
14667
|
+
ggml_compute_forward_tanh(params, dst);
|
14298
14668
|
} break;
|
14299
14669
|
case GGML_UNARY_OP_ELU:
|
14300
14670
|
{
|
14301
|
-
ggml_compute_forward_elu(params,
|
14671
|
+
ggml_compute_forward_elu(params, dst);
|
14302
14672
|
} break;
|
14303
14673
|
case GGML_UNARY_OP_RELU:
|
14304
14674
|
{
|
14305
|
-
ggml_compute_forward_relu(params,
|
14675
|
+
ggml_compute_forward_relu(params, dst);
|
14306
14676
|
} break;
|
14307
14677
|
case GGML_UNARY_OP_GELU:
|
14308
14678
|
{
|
14309
|
-
ggml_compute_forward_gelu(params,
|
14679
|
+
ggml_compute_forward_gelu(params, dst);
|
14310
14680
|
} break;
|
14311
14681
|
case GGML_UNARY_OP_GELU_QUICK:
|
14312
14682
|
{
|
14313
|
-
ggml_compute_forward_gelu_quick(params,
|
14683
|
+
ggml_compute_forward_gelu_quick(params, dst);
|
14314
14684
|
} break;
|
14315
14685
|
case GGML_UNARY_OP_SILU:
|
14316
14686
|
{
|
14317
|
-
ggml_compute_forward_silu(params,
|
14687
|
+
ggml_compute_forward_silu(params, dst);
|
14318
14688
|
} break;
|
14319
14689
|
case GGML_UNARY_OP_HARDSWISH:
|
14320
14690
|
{
|
14321
|
-
ggml_compute_forward_hardswish(params,
|
14691
|
+
ggml_compute_forward_hardswish(params, dst);
|
14322
14692
|
} break;
|
14323
14693
|
case GGML_UNARY_OP_HARDSIGMOID:
|
14324
14694
|
{
|
14325
|
-
ggml_compute_forward_hardsigmoid(params,
|
14695
|
+
ggml_compute_forward_hardsigmoid(params, dst);
|
14326
14696
|
} break;
|
14327
14697
|
default:
|
14328
14698
|
{
|
@@ -14335,8 +14705,10 @@ static void ggml_compute_forward_unary(
|
|
14335
14705
|
|
14336
14706
|
static void ggml_compute_forward_get_rel_pos_f16(
|
14337
14707
|
const struct ggml_compute_params * params,
|
14338
|
-
const struct ggml_tensor * src0,
|
14339
14708
|
struct ggml_tensor * dst) {
|
14709
|
+
|
14710
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14711
|
+
|
14340
14712
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14341
14713
|
return;
|
14342
14714
|
}
|
@@ -14362,12 +14734,14 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
14362
14734
|
|
14363
14735
|
static void ggml_compute_forward_get_rel_pos(
|
14364
14736
|
const struct ggml_compute_params * params,
|
14365
|
-
const struct ggml_tensor * src0,
|
14366
14737
|
struct ggml_tensor * dst) {
|
14738
|
+
|
14739
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14740
|
+
|
14367
14741
|
switch (src0->type) {
|
14368
14742
|
case GGML_TYPE_F16:
|
14369
14743
|
{
|
14370
|
-
ggml_compute_forward_get_rel_pos_f16(params,
|
14744
|
+
ggml_compute_forward_get_rel_pos_f16(params, dst);
|
14371
14745
|
} break;
|
14372
14746
|
default:
|
14373
14747
|
{
|
@@ -14380,11 +14754,12 @@ static void ggml_compute_forward_get_rel_pos(
|
|
14380
14754
|
|
14381
14755
|
static void ggml_compute_forward_add_rel_pos_f32(
|
14382
14756
|
const struct ggml_compute_params * params,
|
14383
|
-
const struct ggml_tensor * src0,
|
14384
|
-
const struct ggml_tensor * src1,
|
14385
|
-
const struct ggml_tensor * src2,
|
14386
14757
|
struct ggml_tensor * dst) {
|
14387
14758
|
|
14759
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14760
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
14761
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
14762
|
+
|
14388
14763
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
14389
14764
|
if (!inplace && params->type == GGML_TASK_INIT) {
|
14390
14765
|
if (params->ith != 0) {
|
@@ -14448,14 +14823,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
14448
14823
|
|
14449
14824
|
static void ggml_compute_forward_add_rel_pos(
|
14450
14825
|
const struct ggml_compute_params * params,
|
14451
|
-
const struct ggml_tensor * src0,
|
14452
|
-
const struct ggml_tensor * src1,
|
14453
|
-
const struct ggml_tensor * src2,
|
14454
14826
|
struct ggml_tensor * dst) {
|
14827
|
+
|
14828
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14829
|
+
|
14455
14830
|
switch (src0->type) {
|
14456
14831
|
case GGML_TYPE_F32:
|
14457
14832
|
{
|
14458
|
-
ggml_compute_forward_add_rel_pos_f32(params,
|
14833
|
+
ggml_compute_forward_add_rel_pos_f32(params, dst);
|
14459
14834
|
} break;
|
14460
14835
|
default:
|
14461
14836
|
{
|
@@ -14468,9 +14843,11 @@ static void ggml_compute_forward_add_rel_pos(
|
|
14468
14843
|
|
14469
14844
|
static void ggml_compute_forward_map_unary_f32(
|
14470
14845
|
const struct ggml_compute_params * params,
|
14471
|
-
const struct ggml_tensor * src0,
|
14472
14846
|
struct ggml_tensor * dst,
|
14473
14847
|
const ggml_unary_op_f32_t fun) {
|
14848
|
+
|
14849
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14850
|
+
|
14474
14851
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
14475
14852
|
|
14476
14853
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14492,13 +14869,15 @@ static void ggml_compute_forward_map_unary_f32(
|
|
14492
14869
|
|
14493
14870
|
static void ggml_compute_forward_map_unary(
|
14494
14871
|
const struct ggml_compute_params * params,
|
14495
|
-
const struct ggml_tensor * src0,
|
14496
14872
|
struct ggml_tensor * dst,
|
14497
14873
|
const ggml_unary_op_f32_t fun) {
|
14874
|
+
|
14875
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14876
|
+
|
14498
14877
|
switch (src0->type) {
|
14499
14878
|
case GGML_TYPE_F32:
|
14500
14879
|
{
|
14501
|
-
ggml_compute_forward_map_unary_f32(params,
|
14880
|
+
ggml_compute_forward_map_unary_f32(params, dst, fun);
|
14502
14881
|
} break;
|
14503
14882
|
default:
|
14504
14883
|
{
|
@@ -14511,10 +14890,12 @@ static void ggml_compute_forward_map_unary(
|
|
14511
14890
|
|
14512
14891
|
static void ggml_compute_forward_map_binary_f32(
|
14513
14892
|
const struct ggml_compute_params * params,
|
14514
|
-
const struct ggml_tensor * src0,
|
14515
|
-
const struct ggml_tensor * src1,
|
14516
14893
|
struct ggml_tensor * dst,
|
14517
14894
|
const ggml_binary_op_f32_t fun) {
|
14895
|
+
|
14896
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14897
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
14898
|
+
|
14518
14899
|
assert(params->ith == 0);
|
14519
14900
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
14520
14901
|
|
@@ -14539,14 +14920,15 @@ static void ggml_compute_forward_map_binary_f32(
|
|
14539
14920
|
|
14540
14921
|
static void ggml_compute_forward_map_binary(
|
14541
14922
|
const struct ggml_compute_params * params,
|
14542
|
-
const struct ggml_tensor * src0,
|
14543
|
-
const struct ggml_tensor * src1,
|
14544
14923
|
struct ggml_tensor * dst,
|
14545
14924
|
const ggml_binary_op_f32_t fun) {
|
14925
|
+
|
14926
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14927
|
+
|
14546
14928
|
switch (src0->type) {
|
14547
14929
|
case GGML_TYPE_F32:
|
14548
14930
|
{
|
14549
|
-
ggml_compute_forward_map_binary_f32(params,
|
14931
|
+
ggml_compute_forward_map_binary_f32(params, dst, fun);
|
14550
14932
|
} break;
|
14551
14933
|
default:
|
14552
14934
|
{
|
@@ -14559,9 +14941,11 @@ static void ggml_compute_forward_map_binary(
|
|
14559
14941
|
|
14560
14942
|
static void ggml_compute_forward_map_custom1_f32(
|
14561
14943
|
const struct ggml_compute_params * params,
|
14562
|
-
const struct ggml_tensor * a,
|
14563
14944
|
struct ggml_tensor * dst,
|
14564
14945
|
const ggml_custom1_op_f32_t fun) {
|
14946
|
+
|
14947
|
+
const struct ggml_tensor * a = dst->src[0];
|
14948
|
+
|
14565
14949
|
assert(params->ith == 0);
|
14566
14950
|
|
14567
14951
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14575,10 +14959,12 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
14575
14959
|
|
14576
14960
|
static void ggml_compute_forward_map_custom2_f32(
|
14577
14961
|
const struct ggml_compute_params * params,
|
14578
|
-
const struct ggml_tensor * a,
|
14579
|
-
const struct ggml_tensor * b,
|
14580
14962
|
struct ggml_tensor * dst,
|
14581
14963
|
const ggml_custom2_op_f32_t fun) {
|
14964
|
+
|
14965
|
+
const struct ggml_tensor * a = dst->src[0];
|
14966
|
+
const struct ggml_tensor * b = dst->src[1];
|
14967
|
+
|
14582
14968
|
assert(params->ith == 0);
|
14583
14969
|
|
14584
14970
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14592,11 +14978,13 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
14592
14978
|
|
14593
14979
|
static void ggml_compute_forward_map_custom3_f32(
|
14594
14980
|
const struct ggml_compute_params * params,
|
14595
|
-
const struct ggml_tensor * a,
|
14596
|
-
const struct ggml_tensor * b,
|
14597
|
-
const struct ggml_tensor * c,
|
14598
14981
|
struct ggml_tensor * dst,
|
14599
14982
|
const ggml_custom3_op_f32_t fun) {
|
14983
|
+
|
14984
|
+
const struct ggml_tensor * a = dst->src[0];
|
14985
|
+
const struct ggml_tensor * b = dst->src[1];
|
14986
|
+
const struct ggml_tensor * c = dst->src[1];
|
14987
|
+
|
14600
14988
|
assert(params->ith == 0);
|
14601
14989
|
|
14602
14990
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14610,8 +14998,10 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
14610
14998
|
|
14611
14999
|
static void ggml_compute_forward_map_custom1(
|
14612
15000
|
const struct ggml_compute_params * params,
|
14613
|
-
const struct ggml_tensor * a,
|
14614
15001
|
struct ggml_tensor * dst) {
|
15002
|
+
|
15003
|
+
const struct ggml_tensor * a = dst->src[0];
|
15004
|
+
|
14615
15005
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14616
15006
|
return;
|
14617
15007
|
}
|
@@ -14625,9 +15015,11 @@ static void ggml_compute_forward_map_custom1(
|
|
14625
15015
|
|
14626
15016
|
static void ggml_compute_forward_map_custom2(
|
14627
15017
|
const struct ggml_compute_params * params,
|
14628
|
-
const struct ggml_tensor * a,
|
14629
|
-
const struct ggml_tensor * b,
|
14630
15018
|
struct ggml_tensor * dst) {
|
15019
|
+
|
15020
|
+
const struct ggml_tensor * a = dst->src[0];
|
15021
|
+
const struct ggml_tensor * b = dst->src[1];
|
15022
|
+
|
14631
15023
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14632
15024
|
return;
|
14633
15025
|
}
|
@@ -14641,10 +15033,12 @@ static void ggml_compute_forward_map_custom2(
|
|
14641
15033
|
|
14642
15034
|
static void ggml_compute_forward_map_custom3(
|
14643
15035
|
const struct ggml_compute_params * params,
|
14644
|
-
const struct ggml_tensor * a,
|
14645
|
-
const struct ggml_tensor * b,
|
14646
|
-
const struct ggml_tensor * c,
|
14647
15036
|
struct ggml_tensor * dst) {
|
15037
|
+
|
15038
|
+
const struct ggml_tensor * a = dst->src[0];
|
15039
|
+
const struct ggml_tensor * b = dst->src[1];
|
15040
|
+
const struct ggml_tensor * c = dst->src[2];
|
15041
|
+
|
14648
15042
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14649
15043
|
return;
|
14650
15044
|
}
|
@@ -14658,9 +15052,11 @@ static void ggml_compute_forward_map_custom3(
|
|
14658
15052
|
|
14659
15053
|
static void ggml_compute_forward_cross_entropy_loss_f32(
|
14660
15054
|
const struct ggml_compute_params * params,
|
14661
|
-
const struct ggml_tensor * src0,
|
14662
|
-
const struct ggml_tensor * src1,
|
14663
15055
|
struct ggml_tensor * dst) {
|
15056
|
+
|
15057
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15058
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
15059
|
+
|
14664
15060
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
14665
15061
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
14666
15062
|
GGML_ASSERT(ggml_is_scalar(dst));
|
@@ -14764,13 +15160,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
14764
15160
|
|
14765
15161
|
static void ggml_compute_forward_cross_entropy_loss(
|
14766
15162
|
const struct ggml_compute_params * params,
|
14767
|
-
const struct ggml_tensor * src0,
|
14768
|
-
const struct ggml_tensor * src1,
|
14769
15163
|
struct ggml_tensor * dst) {
|
15164
|
+
|
15165
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15166
|
+
|
14770
15167
|
switch (src0->type) {
|
14771
15168
|
case GGML_TYPE_F32:
|
14772
15169
|
{
|
14773
|
-
ggml_compute_forward_cross_entropy_loss_f32(params,
|
15170
|
+
ggml_compute_forward_cross_entropy_loss_f32(params, dst);
|
14774
15171
|
} break;
|
14775
15172
|
default:
|
14776
15173
|
{
|
@@ -14783,10 +15180,12 @@ static void ggml_compute_forward_cross_entropy_loss(
|
|
14783
15180
|
|
14784
15181
|
static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
14785
15182
|
const struct ggml_compute_params * params,
|
14786
|
-
const struct ggml_tensor * src0,
|
14787
|
-
const struct ggml_tensor * src1,
|
14788
|
-
const struct ggml_tensor * opt0,
|
14789
15183
|
struct ggml_tensor * dst) {
|
15184
|
+
|
15185
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15186
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
15187
|
+
const struct ggml_tensor * opt0 = dst->src[2];
|
15188
|
+
|
14790
15189
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
14791
15190
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
14792
15191
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
@@ -14873,14 +15272,14 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
14873
15272
|
|
14874
15273
|
static void ggml_compute_forward_cross_entropy_loss_back(
|
14875
15274
|
const struct ggml_compute_params * params,
|
14876
|
-
const struct ggml_tensor * src0,
|
14877
|
-
const struct ggml_tensor * src1,
|
14878
|
-
const struct ggml_tensor * opt0,
|
14879
15275
|
struct ggml_tensor * dst) {
|
15276
|
+
|
15277
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15278
|
+
|
14880
15279
|
switch (src0->type) {
|
14881
15280
|
case GGML_TYPE_F32:
|
14882
15281
|
{
|
14883
|
-
ggml_compute_forward_cross_entropy_loss_back_f32(params,
|
15282
|
+
ggml_compute_forward_cross_entropy_loss_back_f32(params, dst);
|
14884
15283
|
} break;
|
14885
15284
|
default:
|
14886
15285
|
{
|
@@ -14928,312 +15327,312 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14928
15327
|
switch (tensor->op) {
|
14929
15328
|
case GGML_OP_DUP:
|
14930
15329
|
{
|
14931
|
-
ggml_compute_forward_dup(params, tensor
|
15330
|
+
ggml_compute_forward_dup(params, tensor);
|
14932
15331
|
} break;
|
14933
15332
|
case GGML_OP_ADD:
|
14934
15333
|
{
|
14935
|
-
ggml_compute_forward_add(params, tensor
|
15334
|
+
ggml_compute_forward_add(params, tensor);
|
14936
15335
|
} break;
|
14937
15336
|
case GGML_OP_ADD1:
|
14938
15337
|
{
|
14939
|
-
ggml_compute_forward_add1(params, tensor
|
15338
|
+
ggml_compute_forward_add1(params, tensor);
|
14940
15339
|
} break;
|
14941
15340
|
case GGML_OP_ACC:
|
14942
15341
|
{
|
14943
|
-
ggml_compute_forward_acc(params, tensor
|
15342
|
+
ggml_compute_forward_acc(params, tensor);
|
14944
15343
|
} break;
|
14945
15344
|
case GGML_OP_SUB:
|
14946
15345
|
{
|
14947
|
-
ggml_compute_forward_sub(params, tensor
|
15346
|
+
ggml_compute_forward_sub(params, tensor);
|
14948
15347
|
} break;
|
14949
15348
|
case GGML_OP_MUL:
|
14950
15349
|
{
|
14951
|
-
ggml_compute_forward_mul(params, tensor
|
15350
|
+
ggml_compute_forward_mul(params, tensor);
|
14952
15351
|
} break;
|
14953
15352
|
case GGML_OP_DIV:
|
14954
15353
|
{
|
14955
|
-
ggml_compute_forward_div(params, tensor
|
15354
|
+
ggml_compute_forward_div(params, tensor);
|
14956
15355
|
} break;
|
14957
15356
|
case GGML_OP_SQR:
|
14958
15357
|
{
|
14959
|
-
ggml_compute_forward_sqr(params, tensor
|
15358
|
+
ggml_compute_forward_sqr(params, tensor);
|
14960
15359
|
} break;
|
14961
15360
|
case GGML_OP_SQRT:
|
14962
15361
|
{
|
14963
|
-
ggml_compute_forward_sqrt(params, tensor
|
15362
|
+
ggml_compute_forward_sqrt(params, tensor);
|
14964
15363
|
} break;
|
14965
15364
|
case GGML_OP_LOG:
|
14966
15365
|
{
|
14967
|
-
ggml_compute_forward_log(params, tensor
|
15366
|
+
ggml_compute_forward_log(params, tensor);
|
14968
15367
|
} break;
|
14969
15368
|
case GGML_OP_SUM:
|
14970
15369
|
{
|
14971
|
-
ggml_compute_forward_sum(params, tensor
|
15370
|
+
ggml_compute_forward_sum(params, tensor);
|
14972
15371
|
} break;
|
14973
15372
|
case GGML_OP_SUM_ROWS:
|
14974
15373
|
{
|
14975
|
-
ggml_compute_forward_sum_rows(params, tensor
|
15374
|
+
ggml_compute_forward_sum_rows(params, tensor);
|
14976
15375
|
} break;
|
14977
15376
|
case GGML_OP_MEAN:
|
14978
15377
|
{
|
14979
|
-
ggml_compute_forward_mean(params, tensor
|
15378
|
+
ggml_compute_forward_mean(params, tensor);
|
14980
15379
|
} break;
|
14981
15380
|
case GGML_OP_ARGMAX:
|
14982
15381
|
{
|
14983
|
-
ggml_compute_forward_argmax(params, tensor
|
15382
|
+
ggml_compute_forward_argmax(params, tensor);
|
14984
15383
|
} break;
|
14985
15384
|
case GGML_OP_REPEAT:
|
14986
15385
|
{
|
14987
|
-
ggml_compute_forward_repeat(params, tensor
|
15386
|
+
ggml_compute_forward_repeat(params, tensor);
|
14988
15387
|
} break;
|
14989
15388
|
case GGML_OP_REPEAT_BACK:
|
14990
15389
|
{
|
14991
|
-
ggml_compute_forward_repeat_back(params, tensor
|
15390
|
+
ggml_compute_forward_repeat_back(params, tensor);
|
14992
15391
|
} break;
|
14993
15392
|
case GGML_OP_CONCAT:
|
14994
15393
|
{
|
14995
|
-
ggml_compute_forward_concat(params, tensor
|
15394
|
+
ggml_compute_forward_concat(params, tensor);
|
14996
15395
|
} break;
|
14997
15396
|
case GGML_OP_SILU_BACK:
|
14998
15397
|
{
|
14999
|
-
ggml_compute_forward_silu_back(params, tensor
|
15398
|
+
ggml_compute_forward_silu_back(params, tensor);
|
15000
15399
|
} break;
|
15001
15400
|
case GGML_OP_NORM:
|
15002
15401
|
{
|
15003
|
-
ggml_compute_forward_norm(params, tensor
|
15402
|
+
ggml_compute_forward_norm(params, tensor);
|
15004
15403
|
} break;
|
15005
15404
|
case GGML_OP_RMS_NORM:
|
15006
15405
|
{
|
15007
|
-
ggml_compute_forward_rms_norm(params, tensor
|
15406
|
+
ggml_compute_forward_rms_norm(params, tensor);
|
15008
15407
|
} break;
|
15009
15408
|
case GGML_OP_RMS_NORM_BACK:
|
15010
15409
|
{
|
15011
|
-
ggml_compute_forward_rms_norm_back(params, tensor
|
15410
|
+
ggml_compute_forward_rms_norm_back(params, tensor);
|
15012
15411
|
} break;
|
15013
15412
|
case GGML_OP_GROUP_NORM:
|
15014
15413
|
{
|
15015
|
-
ggml_compute_forward_group_norm(params, tensor
|
15414
|
+
ggml_compute_forward_group_norm(params, tensor);
|
15016
15415
|
} break;
|
15017
15416
|
case GGML_OP_MUL_MAT:
|
15018
15417
|
{
|
15019
|
-
ggml_compute_forward_mul_mat(params, tensor
|
15418
|
+
ggml_compute_forward_mul_mat(params, tensor);
|
15020
15419
|
} break;
|
15021
15420
|
case GGML_OP_MUL_MAT_ID:
|
15022
15421
|
{
|
15023
|
-
ggml_compute_forward_mul_mat_id(params, tensor
|
15422
|
+
ggml_compute_forward_mul_mat_id(params, tensor);
|
15024
15423
|
} break;
|
15025
15424
|
case GGML_OP_OUT_PROD:
|
15026
15425
|
{
|
15027
|
-
ggml_compute_forward_out_prod(params, tensor
|
15426
|
+
ggml_compute_forward_out_prod(params, tensor);
|
15028
15427
|
} break;
|
15029
15428
|
case GGML_OP_SCALE:
|
15030
15429
|
{
|
15031
|
-
ggml_compute_forward_scale(params, tensor
|
15430
|
+
ggml_compute_forward_scale(params, tensor);
|
15032
15431
|
} break;
|
15033
15432
|
case GGML_OP_SET:
|
15034
15433
|
{
|
15035
|
-
ggml_compute_forward_set(params, tensor
|
15434
|
+
ggml_compute_forward_set(params, tensor);
|
15036
15435
|
} break;
|
15037
15436
|
case GGML_OP_CPY:
|
15038
15437
|
{
|
15039
|
-
ggml_compute_forward_cpy(params, tensor
|
15438
|
+
ggml_compute_forward_cpy(params, tensor);
|
15040
15439
|
} break;
|
15041
15440
|
case GGML_OP_CONT:
|
15042
15441
|
{
|
15043
|
-
ggml_compute_forward_cont(params, tensor
|
15442
|
+
ggml_compute_forward_cont(params, tensor);
|
15044
15443
|
} break;
|
15045
15444
|
case GGML_OP_RESHAPE:
|
15046
15445
|
{
|
15047
|
-
ggml_compute_forward_reshape(params, tensor
|
15446
|
+
ggml_compute_forward_reshape(params, tensor);
|
15048
15447
|
} break;
|
15049
15448
|
case GGML_OP_VIEW:
|
15050
15449
|
{
|
15051
|
-
ggml_compute_forward_view(params, tensor
|
15450
|
+
ggml_compute_forward_view(params, tensor);
|
15052
15451
|
} break;
|
15053
15452
|
case GGML_OP_PERMUTE:
|
15054
15453
|
{
|
15055
|
-
ggml_compute_forward_permute(params, tensor
|
15454
|
+
ggml_compute_forward_permute(params, tensor);
|
15056
15455
|
} break;
|
15057
15456
|
case GGML_OP_TRANSPOSE:
|
15058
15457
|
{
|
15059
|
-
ggml_compute_forward_transpose(params, tensor
|
15458
|
+
ggml_compute_forward_transpose(params, tensor);
|
15060
15459
|
} break;
|
15061
15460
|
case GGML_OP_GET_ROWS:
|
15062
15461
|
{
|
15063
|
-
ggml_compute_forward_get_rows(params, tensor
|
15462
|
+
ggml_compute_forward_get_rows(params, tensor);
|
15064
15463
|
} break;
|
15065
15464
|
case GGML_OP_GET_ROWS_BACK:
|
15066
15465
|
{
|
15067
|
-
ggml_compute_forward_get_rows_back(params, tensor
|
15466
|
+
ggml_compute_forward_get_rows_back(params, tensor);
|
15068
15467
|
} break;
|
15069
15468
|
case GGML_OP_DIAG:
|
15070
15469
|
{
|
15071
|
-
ggml_compute_forward_diag(params, tensor
|
15470
|
+
ggml_compute_forward_diag(params, tensor);
|
15072
15471
|
} break;
|
15073
15472
|
case GGML_OP_DIAG_MASK_INF:
|
15074
15473
|
{
|
15075
|
-
ggml_compute_forward_diag_mask_inf(params, tensor
|
15474
|
+
ggml_compute_forward_diag_mask_inf(params, tensor);
|
15076
15475
|
} break;
|
15077
15476
|
case GGML_OP_DIAG_MASK_ZERO:
|
15078
15477
|
{
|
15079
|
-
ggml_compute_forward_diag_mask_zero(params, tensor
|
15478
|
+
ggml_compute_forward_diag_mask_zero(params, tensor);
|
15080
15479
|
} break;
|
15081
15480
|
case GGML_OP_SOFT_MAX:
|
15082
15481
|
{
|
15083
|
-
ggml_compute_forward_soft_max(params, tensor
|
15482
|
+
ggml_compute_forward_soft_max(params, tensor);
|
15084
15483
|
} break;
|
15085
15484
|
case GGML_OP_SOFT_MAX_BACK:
|
15086
15485
|
{
|
15087
|
-
ggml_compute_forward_soft_max_back(params, tensor
|
15486
|
+
ggml_compute_forward_soft_max_back(params, tensor);
|
15088
15487
|
} break;
|
15089
15488
|
case GGML_OP_ROPE:
|
15090
15489
|
{
|
15091
|
-
ggml_compute_forward_rope(params, tensor
|
15490
|
+
ggml_compute_forward_rope(params, tensor);
|
15092
15491
|
} break;
|
15093
15492
|
case GGML_OP_ROPE_BACK:
|
15094
15493
|
{
|
15095
|
-
ggml_compute_forward_rope_back(params, tensor
|
15494
|
+
ggml_compute_forward_rope_back(params, tensor);
|
15096
15495
|
} break;
|
15097
15496
|
case GGML_OP_ALIBI:
|
15098
15497
|
{
|
15099
|
-
ggml_compute_forward_alibi(params, tensor
|
15498
|
+
ggml_compute_forward_alibi(params, tensor);
|
15100
15499
|
} break;
|
15101
15500
|
case GGML_OP_CLAMP:
|
15102
15501
|
{
|
15103
|
-
ggml_compute_forward_clamp(params, tensor
|
15502
|
+
ggml_compute_forward_clamp(params, tensor);
|
15104
15503
|
} break;
|
15105
15504
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
15106
15505
|
{
|
15107
|
-
ggml_compute_forward_conv_transpose_1d(params, tensor
|
15506
|
+
ggml_compute_forward_conv_transpose_1d(params, tensor);
|
15108
15507
|
} break;
|
15109
15508
|
case GGML_OP_IM2COL:
|
15110
15509
|
{
|
15111
|
-
ggml_compute_forward_im2col(params, tensor
|
15510
|
+
ggml_compute_forward_im2col(params, tensor);
|
15112
15511
|
} break;
|
15113
15512
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
15114
15513
|
{
|
15115
|
-
ggml_compute_forward_conv_transpose_2d(params, tensor
|
15514
|
+
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
15116
15515
|
} break;
|
15117
15516
|
case GGML_OP_POOL_1D:
|
15118
15517
|
{
|
15119
|
-
ggml_compute_forward_pool_1d(params, tensor
|
15518
|
+
ggml_compute_forward_pool_1d(params, tensor);
|
15120
15519
|
} break;
|
15121
15520
|
case GGML_OP_POOL_2D:
|
15122
15521
|
{
|
15123
|
-
ggml_compute_forward_pool_2d(params, tensor
|
15522
|
+
ggml_compute_forward_pool_2d(params, tensor);
|
15124
15523
|
} break;
|
15125
15524
|
case GGML_OP_UPSCALE:
|
15126
15525
|
{
|
15127
|
-
ggml_compute_forward_upscale(params, tensor
|
15526
|
+
ggml_compute_forward_upscale(params, tensor);
|
15128
15527
|
} break;
|
15129
15528
|
case GGML_OP_PAD:
|
15130
15529
|
{
|
15131
|
-
ggml_compute_forward_pad(params, tensor
|
15530
|
+
ggml_compute_forward_pad(params, tensor);
|
15132
15531
|
} break;
|
15133
15532
|
case GGML_OP_ARGSORT:
|
15134
15533
|
{
|
15135
|
-
ggml_compute_forward_argsort(params, tensor
|
15534
|
+
ggml_compute_forward_argsort(params, tensor);
|
15136
15535
|
} break;
|
15137
15536
|
case GGML_OP_LEAKY_RELU:
|
15138
15537
|
{
|
15139
|
-
ggml_compute_forward_leaky_relu(params, tensor
|
15538
|
+
ggml_compute_forward_leaky_relu(params, tensor);
|
15140
15539
|
} break;
|
15141
15540
|
case GGML_OP_FLASH_ATTN:
|
15142
15541
|
{
|
15143
15542
|
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15144
15543
|
GGML_ASSERT(t == 0 || t == 1);
|
15145
15544
|
const bool masked = t != 0;
|
15146
|
-
ggml_compute_forward_flash_attn(params,
|
15545
|
+
ggml_compute_forward_flash_attn(params, masked, tensor);
|
15147
15546
|
} break;
|
15148
15547
|
case GGML_OP_FLASH_FF:
|
15149
15548
|
{
|
15150
|
-
ggml_compute_forward_flash_ff(params, tensor
|
15549
|
+
ggml_compute_forward_flash_ff(params, tensor);
|
15151
15550
|
} break;
|
15152
15551
|
case GGML_OP_FLASH_ATTN_BACK:
|
15153
15552
|
{
|
15154
15553
|
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15155
15554
|
GGML_ASSERT(t == 0 || t == 1);
|
15156
15555
|
bool masked = t != 0;
|
15157
|
-
ggml_compute_forward_flash_attn_back(params,
|
15556
|
+
ggml_compute_forward_flash_attn_back(params, masked, tensor);
|
15158
15557
|
} break;
|
15159
15558
|
case GGML_OP_WIN_PART:
|
15160
15559
|
{
|
15161
|
-
ggml_compute_forward_win_part(params, tensor
|
15560
|
+
ggml_compute_forward_win_part(params, tensor);
|
15162
15561
|
} break;
|
15163
15562
|
case GGML_OP_WIN_UNPART:
|
15164
15563
|
{
|
15165
|
-
ggml_compute_forward_win_unpart(params, tensor
|
15564
|
+
ggml_compute_forward_win_unpart(params, tensor);
|
15166
15565
|
} break;
|
15167
15566
|
case GGML_OP_UNARY:
|
15168
15567
|
{
|
15169
|
-
ggml_compute_forward_unary(params, tensor
|
15568
|
+
ggml_compute_forward_unary(params, tensor);
|
15170
15569
|
} break;
|
15171
15570
|
case GGML_OP_GET_REL_POS:
|
15172
15571
|
{
|
15173
|
-
ggml_compute_forward_get_rel_pos(params, tensor
|
15572
|
+
ggml_compute_forward_get_rel_pos(params, tensor);
|
15174
15573
|
} break;
|
15175
15574
|
case GGML_OP_ADD_REL_POS:
|
15176
15575
|
{
|
15177
|
-
ggml_compute_forward_add_rel_pos(params, tensor
|
15576
|
+
ggml_compute_forward_add_rel_pos(params, tensor);
|
15178
15577
|
} break;
|
15179
15578
|
case GGML_OP_MAP_UNARY:
|
15180
15579
|
{
|
15181
15580
|
ggml_unary_op_f32_t fun;
|
15182
15581
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15183
|
-
ggml_compute_forward_map_unary(params, tensor
|
15582
|
+
ggml_compute_forward_map_unary(params, tensor, fun);
|
15184
15583
|
}
|
15185
15584
|
break;
|
15186
15585
|
case GGML_OP_MAP_BINARY:
|
15187
15586
|
{
|
15188
15587
|
ggml_binary_op_f32_t fun;
|
15189
15588
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15190
|
-
ggml_compute_forward_map_binary(params, tensor
|
15589
|
+
ggml_compute_forward_map_binary(params, tensor, fun);
|
15191
15590
|
}
|
15192
15591
|
break;
|
15193
15592
|
case GGML_OP_MAP_CUSTOM1_F32:
|
15194
15593
|
{
|
15195
15594
|
ggml_custom1_op_f32_t fun;
|
15196
15595
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15197
|
-
ggml_compute_forward_map_custom1_f32(params, tensor
|
15596
|
+
ggml_compute_forward_map_custom1_f32(params, tensor, fun);
|
15198
15597
|
}
|
15199
15598
|
break;
|
15200
15599
|
case GGML_OP_MAP_CUSTOM2_F32:
|
15201
15600
|
{
|
15202
15601
|
ggml_custom2_op_f32_t fun;
|
15203
15602
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15204
|
-
ggml_compute_forward_map_custom2_f32(params, tensor
|
15603
|
+
ggml_compute_forward_map_custom2_f32(params, tensor, fun);
|
15205
15604
|
}
|
15206
15605
|
break;
|
15207
15606
|
case GGML_OP_MAP_CUSTOM3_F32:
|
15208
15607
|
{
|
15209
15608
|
ggml_custom3_op_f32_t fun;
|
15210
15609
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15211
|
-
ggml_compute_forward_map_custom3_f32(params, tensor
|
15610
|
+
ggml_compute_forward_map_custom3_f32(params, tensor, fun);
|
15212
15611
|
}
|
15213
15612
|
break;
|
15214
15613
|
case GGML_OP_MAP_CUSTOM1:
|
15215
15614
|
{
|
15216
|
-
ggml_compute_forward_map_custom1(params, tensor
|
15615
|
+
ggml_compute_forward_map_custom1(params, tensor);
|
15217
15616
|
}
|
15218
15617
|
break;
|
15219
15618
|
case GGML_OP_MAP_CUSTOM2:
|
15220
15619
|
{
|
15221
|
-
ggml_compute_forward_map_custom2(params, tensor
|
15620
|
+
ggml_compute_forward_map_custom2(params, tensor);
|
15222
15621
|
}
|
15223
15622
|
break;
|
15224
15623
|
case GGML_OP_MAP_CUSTOM3:
|
15225
15624
|
{
|
15226
|
-
ggml_compute_forward_map_custom3(params, tensor
|
15625
|
+
ggml_compute_forward_map_custom3(params, tensor);
|
15227
15626
|
}
|
15228
15627
|
break;
|
15229
15628
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
15230
15629
|
{
|
15231
|
-
ggml_compute_forward_cross_entropy_loss(params, tensor
|
15630
|
+
ggml_compute_forward_cross_entropy_loss(params, tensor);
|
15232
15631
|
}
|
15233
15632
|
break;
|
15234
15633
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
15235
15634
|
{
|
15236
|
-
ggml_compute_forward_cross_entropy_loss_back(params, tensor
|
15635
|
+
ggml_compute_forward_cross_entropy_loss_back(params, tensor);
|
15237
15636
|
}
|
15238
15637
|
break;
|
15239
15638
|
case GGML_OP_NONE:
|
@@ -16637,27 +17036,47 @@ typedef pthread_t ggml_thread_t;
|
|
16637
17036
|
#endif
|
16638
17037
|
|
16639
17038
|
// Android's libc implementation "bionic" does not support setting affinity
|
16640
|
-
#if defined(
|
16641
|
-
static void set_numa_thread_affinity(int thread_n
|
17039
|
+
#if defined(__gnu_linux__)
|
17040
|
+
static void set_numa_thread_affinity(int thread_n) {
|
16642
17041
|
if (!ggml_is_numa()) {
|
16643
17042
|
return;
|
16644
17043
|
}
|
16645
17044
|
|
16646
|
-
|
16647
|
-
|
16648
|
-
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
17045
|
+
int node_num;
|
17046
|
+
int rv;
|
16649
17047
|
size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);
|
16650
17048
|
|
17049
|
+
switch(g_state.numa.numa_strategy) {
|
17050
|
+
case GGML_NUMA_STRATEGY_DISTRIBUTE:
|
17051
|
+
// run thread on node_num thread_n / (threads per node)
|
17052
|
+
node_num = thread_n % g_state.numa.n_nodes;
|
17053
|
+
break;
|
17054
|
+
case GGML_NUMA_STRATEGY_ISOLATE:
|
17055
|
+
// run thread on current_node
|
17056
|
+
node_num = g_state.numa.current_node;
|
17057
|
+
break;
|
17058
|
+
case GGML_NUMA_STRATEGY_NUMACTL:
|
17059
|
+
// use the cpuset that numactl gave us
|
17060
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset);
|
17061
|
+
if (rv) {
|
17062
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
|
17063
|
+
}
|
17064
|
+
return;
|
17065
|
+
default:
|
17066
|
+
return;
|
17067
|
+
}
|
17068
|
+
|
17069
|
+
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
17070
|
+
|
16651
17071
|
cpu_set_t * cpus = CPU_ALLOC(g_state.numa.total_cpus);
|
16652
17072
|
CPU_ZERO_S(setsize, cpus);
|
16653
17073
|
for (size_t i = 0; i < node->n_cpus; ++i) {
|
16654
17074
|
CPU_SET_S(node->cpus[i], setsize, cpus);
|
16655
17075
|
}
|
16656
17076
|
|
16657
|
-
|
17077
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
16658
17078
|
if (rv) {
|
16659
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
16660
|
-
strerror(rv));
|
17079
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
16661
17080
|
}
|
16662
17081
|
|
16663
17082
|
CPU_FREE(cpus);
|
@@ -16678,8 +17097,7 @@ static void clear_numa_thread_affinity(void) {
|
|
16678
17097
|
|
16679
17098
|
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
16680
17099
|
if (rv) {
|
16681
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
16682
|
-
strerror(rv));
|
17100
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
16683
17101
|
}
|
16684
17102
|
|
16685
17103
|
CPU_FREE(cpus);
|
@@ -16687,7 +17105,7 @@ static void clear_numa_thread_affinity(void) {
|
|
16687
17105
|
#else
|
16688
17106
|
// TODO: Windows etc.
|
16689
17107
|
// (the linux implementation may also work on BSD, someone should test)
|
16690
|
-
static void set_numa_thread_affinity(int thread_n
|
17108
|
+
static void set_numa_thread_affinity(int thread_n) { UNUSED(thread_n); }
|
16691
17109
|
static void clear_numa_thread_affinity(void) {}
|
16692
17110
|
#endif
|
16693
17111
|
|
@@ -16987,7 +17405,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16987
17405
|
|
16988
17406
|
const int n_threads = state->shared->n_threads;
|
16989
17407
|
|
16990
|
-
set_numa_thread_affinity(state->ith
|
17408
|
+
set_numa_thread_affinity(state->ith);
|
16991
17409
|
|
16992
17410
|
int node_n = -1;
|
16993
17411
|
int task_phase = GGML_TASK_FINALIZE;
|
@@ -17793,7 +18211,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
17793
18211
|
|
17794
18212
|
ptr += ggml_nbytes(tensor);
|
17795
18213
|
|
17796
|
-
fprintf(stderr, "%s: loaded leaf %
|
18214
|
+
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
17797
18215
|
}
|
17798
18216
|
}
|
17799
18217
|
|
@@ -17896,7 +18314,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
17896
18314
|
|
17897
18315
|
result->nodes[i] = tensor;
|
17898
18316
|
|
17899
|
-
fprintf(stderr, "%s: loaded node %
|
18317
|
+
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
17900
18318
|
}
|
17901
18319
|
}
|
17902
18320
|
}
|
@@ -18521,7 +18939,9 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18521
18939
|
(*step) *= width;
|
18522
18940
|
}
|
18523
18941
|
|
18524
|
-
|
18942
|
+
GGML_ASSERT(false && "line search failed");
|
18943
|
+
|
18944
|
+
return GGML_LINESEARCH_FAIL;
|
18525
18945
|
}
|
18526
18946
|
|
18527
18947
|
static enum ggml_opt_result ggml_opt_lbfgs(
|
@@ -18789,7 +19209,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18789
19209
|
step[0] = 1.0;
|
18790
19210
|
}
|
18791
19211
|
|
18792
|
-
|
19212
|
+
GGML_ASSERT(false && "lbfgs failed");
|
19213
|
+
|
19214
|
+
return GGML_OPT_DID_NOT_CONVERGE;
|
18793
19215
|
}
|
18794
19216
|
|
18795
19217
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
@@ -19037,8 +19459,9 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19037
19459
|
ggml_critical_section_start();
|
19038
19460
|
|
19039
19461
|
switch (type) {
|
19040
|
-
case GGML_TYPE_IQ2_XXS:
|
19041
|
-
case GGML_TYPE_IQ2_XS:
|
19462
|
+
case GGML_TYPE_IQ2_XXS:
|
19463
|
+
case GGML_TYPE_IQ2_XS:
|
19464
|
+
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
19042
19465
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
19043
19466
|
default: // nothing
|
19044
19467
|
break;
|
@@ -19050,8 +19473,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19050
19473
|
void ggml_quantize_free(void) {
|
19051
19474
|
ggml_critical_section_start();
|
19052
19475
|
|
19053
|
-
iq2xs_free_impl(
|
19054
|
-
iq2xs_free_impl(
|
19476
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
|
19477
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XS);
|
19478
|
+
iq2xs_free_impl(GGML_TYPE_IQ1_S);
|
19479
|
+
iq3xs_free_impl(256);
|
19055
19480
|
|
19056
19481
|
ggml_critical_section_end();
|
19057
19482
|
}
|
@@ -19186,7 +19611,8 @@ size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t *
|
|
19186
19611
|
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
19187
19612
|
return
|
19188
19613
|
type == GGML_TYPE_IQ2_XXS ||
|
19189
|
-
type == GGML_TYPE_IQ2_XS
|
19614
|
+
type == GGML_TYPE_IQ2_XS ||
|
19615
|
+
type == GGML_TYPE_IQ1_S;
|
19190
19616
|
}
|
19191
19617
|
|
19192
19618
|
size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start,
|
@@ -19311,6 +19737,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19311
19737
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19312
19738
|
GGML_ASSERT(result == row_size * nrows);
|
19313
19739
|
} break;
|
19740
|
+
case GGML_TYPE_IQ1_S:
|
19741
|
+
{
|
19742
|
+
GGML_ASSERT(start % QK_K == 0);
|
19743
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19744
|
+
size_t start_row = start / n_per_row;
|
19745
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19746
|
+
result = quantize_iq1_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19747
|
+
GGML_ASSERT(result == row_size * nrows);
|
19748
|
+
} break;
|
19749
|
+
case GGML_TYPE_IQ4_NL:
|
19750
|
+
{
|
19751
|
+
GGML_ASSERT(start % QK4_NL == 0);
|
19752
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19753
|
+
size_t start_row = start / n_per_row;
|
19754
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19755
|
+
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19756
|
+
GGML_ASSERT(result == row_size * nrows);
|
19757
|
+
} break;
|
19314
19758
|
case GGML_TYPE_F16:
|
19315
19759
|
{
|
19316
19760
|
size_t elemsize = sizeof(ggml_fp16_t);
|