llama_cpp 0.12.6 → 0.12.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +21 -10
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +8 -1
- data/vendor/tmp/llama.cpp/Makefile +43 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +73 -43
- data/vendor/tmp/llama.cpp/ggml-backend.c +18 -9
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +560 -346
- data/vendor/tmp/llama.cpp/ggml-impl.h +20 -7
- data/vendor/tmp/llama.cpp/ggml-metal.m +99 -11
- data/vendor/tmp/llama.cpp/ggml-metal.metal +608 -9
- data/vendor/tmp/llama.cpp/ggml-quants.c +908 -54
- data/vendor/tmp/llama.cpp/ggml-quants.h +25 -2
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +81 -203
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +124 -52
- data/vendor/tmp/llama.cpp/ggml.c +948 -504
- data/vendor/tmp/llama.cpp/ggml.h +24 -11
- data/vendor/tmp/llama.cpp/llama.cpp +688 -163
- data/vendor/tmp/llama.cpp/llama.h +37 -1
- data/vendor/tmp/llama.cpp/scripts/get-flags.mk +1 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -23,6 +23,9 @@
|
|
23
23
|
#include <limits.h>
|
24
24
|
#include <stdarg.h>
|
25
25
|
#include <signal.h>
|
26
|
+
#if defined(__gnu_linux__)
|
27
|
+
#include <syscall.h>
|
28
|
+
#endif
|
26
29
|
|
27
30
|
#ifdef GGML_USE_METAL
|
28
31
|
#include <unistd.h>
|
@@ -270,6 +273,8 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|
270
273
|
#include <Accelerate/Accelerate.h>
|
271
274
|
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
272
275
|
#include "ggml-opencl.h"
|
276
|
+
#elif defined(GGML_USE_VULKAN)
|
277
|
+
#include "ggml-vulkan.h"
|
273
278
|
#endif
|
274
279
|
#elif defined(GGML_USE_OPENBLAS)
|
275
280
|
#if defined(GGML_BLAS_USE_MKL)
|
@@ -318,7 +323,7 @@ float ggml_table_f32_f16[1 << 16];
|
|
318
323
|
// note: do not use these inside ggml.c
|
319
324
|
// these are meant to be used via the ggml.h API
|
320
325
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
321
|
-
return
|
326
|
+
return GGML_FP16_TO_FP32(x);
|
322
327
|
}
|
323
328
|
|
324
329
|
ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
@@ -673,6 +678,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
673
678
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
674
679
|
.nrows = 1,
|
675
680
|
},
|
681
|
+
[GGML_TYPE_IQ1_S] = {
|
682
|
+
.type_name = "iq1_s",
|
683
|
+
.blck_size = QK_K,
|
684
|
+
.type_size = sizeof(block_iq1_s),
|
685
|
+
.is_quantized = true,
|
686
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq1_s,
|
687
|
+
.from_float = NULL,
|
688
|
+
.from_float_reference = NULL,
|
689
|
+
.vec_dot = ggml_vec_dot_iq1_s_q8_K,
|
690
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
691
|
+
.nrows = 1,
|
692
|
+
},
|
693
|
+
[GGML_TYPE_IQ4_NL] = {
|
694
|
+
.type_name = "iq4_nl",
|
695
|
+
.blck_size = QK4_NL,
|
696
|
+
.type_size = sizeof(block_iq4_nl),
|
697
|
+
.is_quantized = true,
|
698
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
699
|
+
.from_float = quantize_row_iq4_nl,
|
700
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_nl_reference,
|
701
|
+
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
|
702
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
703
|
+
.nrows = 1,
|
704
|
+
},
|
676
705
|
[GGML_TYPE_Q8_K] = {
|
677
706
|
.type_name = "q8_K",
|
678
707
|
.blck_size = QK_K,
|
@@ -769,7 +798,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
769
798
|
#define GGML_F16x8 float16x8_t
|
770
799
|
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
771
800
|
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
772
|
-
#define GGML_F16x8_LOAD
|
801
|
+
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
|
773
802
|
#define GGML_F16x8_STORE vst1q_f16
|
774
803
|
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
775
804
|
#define GGML_F16x8_ADD vaddq_f16
|
@@ -812,7 +841,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|
812
841
|
#define GGML_F32Cx4 float32x4_t
|
813
842
|
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
814
843
|
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
815
|
-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x))
|
844
|
+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
|
816
845
|
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
817
846
|
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
818
847
|
#define GGML_F32Cx4_ADD vaddq_f32
|
@@ -868,7 +897,7 @@ do { \
|
|
868
897
|
const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
|
869
898
|
_mm256_extractf128_ps(x[0], 1)); \
|
870
899
|
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
871
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1));
|
900
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
872
901
|
} while (0)
|
873
902
|
// TODO: is this optimal ?
|
874
903
|
|
@@ -1149,7 +1178,7 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
|
|
1149
1178
|
x[i] = _mm_add_ps(x[i], x[offset+i]); \
|
1150
1179
|
} \
|
1151
1180
|
const __m128 t0 = _mm_hadd_ps(x[0], x[0]); \
|
1152
|
-
res = _mm_cvtss_f32(_mm_hadd_ps(t0, t0));
|
1181
|
+
res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t0, t0)); \
|
1153
1182
|
}
|
1154
1183
|
// TODO: is this optimal ?
|
1155
1184
|
|
@@ -1954,9 +1983,16 @@ struct ggml_numa_node {
|
|
1954
1983
|
};
|
1955
1984
|
|
1956
1985
|
struct ggml_numa_nodes {
|
1986
|
+
enum ggml_numa_strategy numa_strategy;
|
1957
1987
|
struct ggml_numa_node nodes[GGML_NUMA_MAX_NODES];
|
1958
1988
|
uint32_t n_nodes;
|
1959
1989
|
uint32_t total_cpus; // hardware threads on system
|
1990
|
+
uint32_t current_node; // node on which main process is execting
|
1991
|
+
#if defined(__gnu_linux__)
|
1992
|
+
cpu_set_t cpuset; // cpuset from numactl
|
1993
|
+
#else
|
1994
|
+
uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
|
1995
|
+
#endif
|
1960
1996
|
};
|
1961
1997
|
|
1962
1998
|
//
|
@@ -1990,18 +2026,40 @@ inline static void ggml_critical_section_end(void) {
|
|
1990
2026
|
atomic_fetch_sub(&g_state_barrier, 1);
|
1991
2027
|
}
|
1992
2028
|
|
1993
|
-
|
2029
|
+
#if defined(__gnu_linux__)
|
2030
|
+
static cpu_set_t ggml_get_numa_affinity(void) {
|
2031
|
+
cpu_set_t cpuset;
|
2032
|
+
pthread_t thread;
|
2033
|
+
thread = pthread_self();
|
2034
|
+
CPU_ZERO(&cpuset);
|
2035
|
+
pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
2036
|
+
return cpuset;
|
2037
|
+
}
|
2038
|
+
#else
|
2039
|
+
static uint32_t ggml_get_numa_affinity(void) {
|
2040
|
+
return 0; // no NUMA support
|
2041
|
+
}
|
2042
|
+
#endif
|
2043
|
+
|
2044
|
+
void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
1994
2045
|
if (g_state.numa.n_nodes > 0) {
|
1995
2046
|
fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
|
1996
2047
|
|
1997
2048
|
return;
|
1998
2049
|
}
|
1999
2050
|
|
2000
|
-
#
|
2051
|
+
#if defined(__gnu_linux__)
|
2001
2052
|
struct stat st;
|
2002
2053
|
char path[256];
|
2003
2054
|
int rv;
|
2004
2055
|
|
2056
|
+
// set numa scheme
|
2057
|
+
g_state.numa.numa_strategy = numa_flag;
|
2058
|
+
|
2059
|
+
GGML_PRINT_DEBUG("numa strategy %u\n",g_state.numa.numa_strategy);
|
2060
|
+
|
2061
|
+
g_state.numa.cpuset = ggml_get_numa_affinity();
|
2062
|
+
|
2005
2063
|
// enumerate nodes
|
2006
2064
|
while (g_state.numa.n_nodes < GGML_NUMA_MAX_NODES) {
|
2007
2065
|
rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", g_state.numa.n_nodes);
|
@@ -2020,11 +2078,23 @@ void ggml_numa_init(void) {
|
|
2020
2078
|
|
2021
2079
|
GGML_PRINT_DEBUG("found %u numa nodes, %u CPUs\n", g_state.numa.n_nodes, g_state.numa.total_cpus);
|
2022
2080
|
|
2023
|
-
|
2081
|
+
// figure out which node we're on
|
2082
|
+
uint current_cpu;
|
2083
|
+
int getcpu_ret = 0;
|
2084
|
+
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 28)
|
2085
|
+
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
2086
|
+
#else
|
2087
|
+
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
2088
|
+
getcpu_ret = syscall(SYS_getcpu,¤t_cpu,&g_state.numa.current_node);
|
2089
|
+
#endif
|
2090
|
+
|
2091
|
+
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
2024
2092
|
g_state.numa.n_nodes = 0;
|
2025
2093
|
return;
|
2026
2094
|
}
|
2027
2095
|
|
2096
|
+
GGML_PRINT_DEBUG("found our process on numa node %u, CPU %u\n", g_state.numa.current_node, current_cpu);
|
2097
|
+
|
2028
2098
|
for (uint32_t n = 0; n < g_state.numa.n_nodes; ++n) {
|
2029
2099
|
struct ggml_numa_node * node = &g_state.numa.nodes[n];
|
2030
2100
|
GGML_PRINT_DEBUG("CPUs on node %u:", n);
|
@@ -2051,6 +2121,7 @@ void ggml_numa_init(void) {
|
|
2051
2121
|
}
|
2052
2122
|
}
|
2053
2123
|
#else
|
2124
|
+
GGML_UNUSED(numa_flag);
|
2054
2125
|
// TODO
|
2055
2126
|
#endif
|
2056
2127
|
}
|
@@ -2231,6 +2302,8 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2231
2302
|
case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
|
2232
2303
|
case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
|
2233
2304
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2305
|
+
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
2306
|
+
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
2234
2307
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2235
2308
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2236
2309
|
}
|
@@ -3184,7 +3257,7 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
|
3184
3257
|
}
|
3185
3258
|
|
3186
3259
|
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
|
3187
|
-
strncpy(tensor->name, name, sizeof(tensor->name));
|
3260
|
+
strncpy(tensor->name, name, sizeof(tensor->name) - 1);
|
3188
3261
|
tensor->name[sizeof(tensor->name) - 1] = '\0';
|
3189
3262
|
return tensor;
|
3190
3263
|
}
|
@@ -5060,16 +5133,28 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5060
5133
|
struct ggml_context * ctx,
|
5061
5134
|
struct ggml_tensor * a,
|
5062
5135
|
struct ggml_tensor * mask,
|
5136
|
+
struct ggml_tensor * pos,
|
5063
5137
|
float scale,
|
5138
|
+
float max_bias,
|
5064
5139
|
bool inplace) {
|
5065
5140
|
GGML_ASSERT(ggml_is_contiguous(a));
|
5141
|
+
|
5066
5142
|
if (mask) {
|
5067
5143
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
5068
|
-
GGML_ASSERT(mask
|
5069
|
-
GGML_ASSERT(mask->ne[3] == 1);
|
5144
|
+
GGML_ASSERT(ggml_is_matrix(mask));
|
5070
5145
|
GGML_ASSERT(ggml_can_repeat_rows(mask, a));
|
5071
5146
|
}
|
5072
5147
|
|
5148
|
+
if (pos) {
|
5149
|
+
GGML_ASSERT(ggml_is_vector(pos));
|
5150
|
+
GGML_ASSERT(pos->type == GGML_TYPE_F32);
|
5151
|
+
GGML_ASSERT(pos->ne[0] == a->ne[0]);
|
5152
|
+
}
|
5153
|
+
|
5154
|
+
if (max_bias > 0.0f) {
|
5155
|
+
GGML_ASSERT(pos);
|
5156
|
+
}
|
5157
|
+
|
5073
5158
|
bool is_node = false;
|
5074
5159
|
|
5075
5160
|
if (a->grad) {
|
@@ -5078,13 +5163,14 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5078
5163
|
|
5079
5164
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5080
5165
|
|
5081
|
-
float params[] = { scale };
|
5166
|
+
float params[] = { scale, max_bias };
|
5082
5167
|
ggml_set_op_params(result, params, sizeof(params));
|
5083
5168
|
|
5084
5169
|
result->op = GGML_OP_SOFT_MAX;
|
5085
5170
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5086
5171
|
result->src[0] = a;
|
5087
5172
|
result->src[1] = mask;
|
5173
|
+
result->src[2] = pos;
|
5088
5174
|
|
5089
5175
|
return result;
|
5090
5176
|
}
|
@@ -5092,21 +5178,23 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
5092
5178
|
struct ggml_tensor * ggml_soft_max(
|
5093
5179
|
struct ggml_context * ctx,
|
5094
5180
|
struct ggml_tensor * a) {
|
5095
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, false);
|
5181
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, false);
|
5096
5182
|
}
|
5097
5183
|
|
5098
5184
|
struct ggml_tensor * ggml_soft_max_inplace(
|
5099
5185
|
struct ggml_context * ctx,
|
5100
5186
|
struct ggml_tensor * a) {
|
5101
|
-
return ggml_soft_max_impl(ctx, a, NULL, 1.0f, true);
|
5187
|
+
return ggml_soft_max_impl(ctx, a, NULL, NULL, 1.0f, 0.0f, true);
|
5102
5188
|
}
|
5103
5189
|
|
5104
5190
|
struct ggml_tensor * ggml_soft_max_ext(
|
5105
5191
|
struct ggml_context * ctx,
|
5106
5192
|
struct ggml_tensor * a,
|
5107
5193
|
struct ggml_tensor * mask,
|
5108
|
-
|
5109
|
-
|
5194
|
+
struct ggml_tensor * pos,
|
5195
|
+
float scale,
|
5196
|
+
float max_bias) {
|
5197
|
+
return ggml_soft_max_impl(ctx, a, mask, pos, scale, max_bias, false);
|
5110
5198
|
}
|
5111
5199
|
|
5112
5200
|
// ggml_soft_max_back
|
@@ -5556,7 +5644,9 @@ struct ggml_tensor * ggml_conv_2d(
|
|
5556
5644
|
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
5557
5645
|
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
5558
5646
|
|
5559
|
-
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2],
|
5647
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
|
5648
|
+
result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
|
5649
|
+
|
5560
5650
|
|
5561
5651
|
return result;
|
5562
5652
|
}
|
@@ -6562,8 +6652,10 @@ void ggml_set_param(
|
|
6562
6652
|
|
6563
6653
|
static void ggml_compute_forward_dup_same_cont(
|
6564
6654
|
const struct ggml_compute_params * params,
|
6565
|
-
const struct ggml_tensor * src0,
|
6566
6655
|
struct ggml_tensor * dst) {
|
6656
|
+
|
6657
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6658
|
+
|
6567
6659
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6568
6660
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
6569
6661
|
GGML_ASSERT(src0->type == dst->type);
|
@@ -6594,8 +6686,10 @@ static void ggml_compute_forward_dup_same_cont(
|
|
6594
6686
|
}
|
6595
6687
|
static void ggml_compute_forward_dup_f16(
|
6596
6688
|
const struct ggml_compute_params * params,
|
6597
|
-
const struct ggml_tensor * src0,
|
6598
6689
|
struct ggml_tensor * dst) {
|
6690
|
+
|
6691
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6692
|
+
|
6599
6693
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6600
6694
|
|
6601
6695
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -6608,7 +6702,7 @@ static void ggml_compute_forward_dup_f16(
|
|
6608
6702
|
const int nth = params->nth; // number of threads
|
6609
6703
|
|
6610
6704
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
6611
|
-
ggml_compute_forward_dup_same_cont(params,
|
6705
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
6612
6706
|
return;
|
6613
6707
|
}
|
6614
6708
|
|
@@ -6865,8 +6959,10 @@ static void ggml_compute_forward_dup_f16(
|
|
6865
6959
|
|
6866
6960
|
static void ggml_compute_forward_dup_f32(
|
6867
6961
|
const struct ggml_compute_params * params,
|
6868
|
-
const struct ggml_tensor * src0,
|
6869
6962
|
struct ggml_tensor * dst) {
|
6963
|
+
|
6964
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
6965
|
+
|
6870
6966
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6871
6967
|
|
6872
6968
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -6879,7 +6975,7 @@ static void ggml_compute_forward_dup_f32(
|
|
6879
6975
|
const int nth = params->nth; // number of threads
|
6880
6976
|
|
6881
6977
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
6882
|
-
ggml_compute_forward_dup_same_cont(params,
|
6978
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
6883
6979
|
return;
|
6884
6980
|
}
|
6885
6981
|
|
@@ -7115,8 +7211,10 @@ static void ggml_compute_forward_dup_f32(
|
|
7115
7211
|
// A simplified version of ggml_compute_forward_dup that doesn't do float upcasting, and just plain old memcpy.
|
7116
7212
|
static void ggml_compute_forward_dup_bytes(
|
7117
7213
|
const struct ggml_compute_params * params,
|
7118
|
-
const struct ggml_tensor * src0,
|
7119
7214
|
struct ggml_tensor * dst) {
|
7215
|
+
|
7216
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7217
|
+
|
7120
7218
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
7121
7219
|
GGML_ASSERT(src0->type == dst->type);
|
7122
7220
|
|
@@ -7125,7 +7223,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
7125
7223
|
}
|
7126
7224
|
|
7127
7225
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
7128
|
-
ggml_compute_forward_dup_same_cont(params,
|
7226
|
+
ggml_compute_forward_dup_same_cont(params, dst);
|
7129
7227
|
return;
|
7130
7228
|
}
|
7131
7229
|
|
@@ -7264,21 +7362,23 @@ static void ggml_compute_forward_dup_bytes(
|
|
7264
7362
|
|
7265
7363
|
static void ggml_compute_forward_dup(
|
7266
7364
|
const struct ggml_compute_params * params,
|
7267
|
-
const struct ggml_tensor * src0,
|
7268
7365
|
struct ggml_tensor * dst) {
|
7366
|
+
|
7367
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7368
|
+
|
7269
7369
|
if (src0->type == dst->type) {
|
7270
|
-
ggml_compute_forward_dup_bytes(params,
|
7370
|
+
ggml_compute_forward_dup_bytes(params, dst);
|
7271
7371
|
return;
|
7272
7372
|
}
|
7273
7373
|
|
7274
7374
|
switch (src0->type) {
|
7275
7375
|
case GGML_TYPE_F16:
|
7276
7376
|
{
|
7277
|
-
ggml_compute_forward_dup_f16(params,
|
7377
|
+
ggml_compute_forward_dup_f16(params, dst);
|
7278
7378
|
} break;
|
7279
7379
|
case GGML_TYPE_F32:
|
7280
7380
|
{
|
7281
|
-
ggml_compute_forward_dup_f32(params,
|
7381
|
+
ggml_compute_forward_dup_f32(params, dst);
|
7282
7382
|
} break;
|
7283
7383
|
default:
|
7284
7384
|
{
|
@@ -7291,9 +7391,11 @@ static void ggml_compute_forward_dup(
|
|
7291
7391
|
|
7292
7392
|
static void ggml_compute_forward_add_f32(
|
7293
7393
|
const struct ggml_compute_params * params,
|
7294
|
-
const struct ggml_tensor * src0,
|
7295
|
-
const struct ggml_tensor * src1,
|
7296
7394
|
struct ggml_tensor * dst) {
|
7395
|
+
|
7396
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7397
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7398
|
+
|
7297
7399
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
7298
7400
|
|
7299
7401
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7379,9 +7481,11 @@ static void ggml_compute_forward_add_f32(
|
|
7379
7481
|
|
7380
7482
|
static void ggml_compute_forward_add_f16_f32(
|
7381
7483
|
const struct ggml_compute_params * params,
|
7382
|
-
const struct ggml_tensor * src0,
|
7383
|
-
const struct ggml_tensor * src1,
|
7384
7484
|
struct ggml_tensor * dst) {
|
7485
|
+
|
7486
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7487
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7488
|
+
|
7385
7489
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7386
7490
|
|
7387
7491
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7456,9 +7560,11 @@ static void ggml_compute_forward_add_f16_f32(
|
|
7456
7560
|
|
7457
7561
|
static void ggml_compute_forward_add_f16_f16(
|
7458
7562
|
const struct ggml_compute_params * params,
|
7459
|
-
const struct ggml_tensor * src0,
|
7460
|
-
const struct ggml_tensor * src1,
|
7461
7563
|
struct ggml_tensor * dst) {
|
7564
|
+
|
7565
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7566
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7567
|
+
|
7462
7568
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7463
7569
|
|
7464
7570
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7510,9 +7616,11 @@ static void ggml_compute_forward_add_f16_f16(
|
|
7510
7616
|
|
7511
7617
|
static void ggml_compute_forward_add_q_f32(
|
7512
7618
|
const struct ggml_compute_params * params,
|
7513
|
-
const struct ggml_tensor * src0,
|
7514
|
-
const struct ggml_tensor * src1,
|
7515
7619
|
struct ggml_tensor * dst) {
|
7620
|
+
|
7621
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7622
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7623
|
+
|
7516
7624
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7517
7625
|
|
7518
7626
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -7588,14 +7696,16 @@ static void ggml_compute_forward_add_q_f32(
|
|
7588
7696
|
|
7589
7697
|
static void ggml_compute_forward_add(
|
7590
7698
|
const struct ggml_compute_params * params,
|
7591
|
-
const struct ggml_tensor * src0,
|
7592
|
-
const struct ggml_tensor * src1,
|
7593
7699
|
struct ggml_tensor * dst) {
|
7700
|
+
|
7701
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7702
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7703
|
+
|
7594
7704
|
switch (src0->type) {
|
7595
7705
|
case GGML_TYPE_F32:
|
7596
7706
|
{
|
7597
7707
|
if (src1->type == GGML_TYPE_F32) {
|
7598
|
-
ggml_compute_forward_add_f32(params,
|
7708
|
+
ggml_compute_forward_add_f32(params, dst);
|
7599
7709
|
}
|
7600
7710
|
else {
|
7601
7711
|
GGML_ASSERT(false);
|
@@ -7604,10 +7714,10 @@ static void ggml_compute_forward_add(
|
|
7604
7714
|
case GGML_TYPE_F16:
|
7605
7715
|
{
|
7606
7716
|
if (src1->type == GGML_TYPE_F16) {
|
7607
|
-
ggml_compute_forward_add_f16_f16(params,
|
7717
|
+
ggml_compute_forward_add_f16_f16(params, dst);
|
7608
7718
|
}
|
7609
7719
|
else if (src1->type == GGML_TYPE_F32) {
|
7610
|
-
ggml_compute_forward_add_f16_f32(params,
|
7720
|
+
ggml_compute_forward_add_f16_f32(params, dst);
|
7611
7721
|
}
|
7612
7722
|
else {
|
7613
7723
|
GGML_ASSERT(false);
|
@@ -7626,8 +7736,10 @@ static void ggml_compute_forward_add(
|
|
7626
7736
|
case GGML_TYPE_IQ2_XXS:
|
7627
7737
|
case GGML_TYPE_IQ2_XS:
|
7628
7738
|
case GGML_TYPE_IQ3_XXS:
|
7739
|
+
case GGML_TYPE_IQ1_S:
|
7740
|
+
case GGML_TYPE_IQ4_NL:
|
7629
7741
|
{
|
7630
|
-
ggml_compute_forward_add_q_f32(params,
|
7742
|
+
ggml_compute_forward_add_q_f32(params, dst);
|
7631
7743
|
} break;
|
7632
7744
|
default:
|
7633
7745
|
{
|
@@ -7640,9 +7752,11 @@ static void ggml_compute_forward_add(
|
|
7640
7752
|
|
7641
7753
|
static void ggml_compute_forward_add1_f32(
|
7642
7754
|
const struct ggml_compute_params * params,
|
7643
|
-
const struct ggml_tensor * src0,
|
7644
|
-
const struct ggml_tensor * src1,
|
7645
7755
|
struct ggml_tensor * dst) {
|
7756
|
+
|
7757
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7758
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7759
|
+
|
7646
7760
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7647
7761
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7648
7762
|
|
@@ -7692,9 +7806,11 @@ static void ggml_compute_forward_add1_f32(
|
|
7692
7806
|
|
7693
7807
|
static void ggml_compute_forward_add1_f16_f32(
|
7694
7808
|
const struct ggml_compute_params * params,
|
7695
|
-
const struct ggml_tensor * src0,
|
7696
|
-
const struct ggml_tensor * src1,
|
7697
7809
|
struct ggml_tensor * dst) {
|
7810
|
+
|
7811
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7812
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7813
|
+
|
7698
7814
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7699
7815
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7700
7816
|
|
@@ -7742,9 +7858,11 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
7742
7858
|
|
7743
7859
|
static void ggml_compute_forward_add1_f16_f16(
|
7744
7860
|
const struct ggml_compute_params * params,
|
7745
|
-
const struct ggml_tensor * src0,
|
7746
|
-
const struct ggml_tensor * src1,
|
7747
7861
|
struct ggml_tensor * dst) {
|
7862
|
+
|
7863
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7864
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7865
|
+
|
7748
7866
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7749
7867
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7750
7868
|
|
@@ -7792,9 +7910,11 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
7792
7910
|
|
7793
7911
|
static void ggml_compute_forward_add1_q_f32(
|
7794
7912
|
const struct ggml_compute_params * params,
|
7795
|
-
const struct ggml_tensor * src0,
|
7796
|
-
const struct ggml_tensor * src1,
|
7797
7913
|
struct ggml_tensor * dst) {
|
7914
|
+
|
7915
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7916
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7917
|
+
|
7798
7918
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7799
7919
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7800
7920
|
|
@@ -7859,21 +7979,23 @@ static void ggml_compute_forward_add1_q_f32(
|
|
7859
7979
|
|
7860
7980
|
static void ggml_compute_forward_add1(
|
7861
7981
|
const struct ggml_compute_params * params,
|
7862
|
-
const struct ggml_tensor * src0,
|
7863
|
-
const struct ggml_tensor * src1,
|
7864
7982
|
struct ggml_tensor * dst) {
|
7983
|
+
|
7984
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
7985
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
7986
|
+
|
7865
7987
|
switch (src0->type) {
|
7866
7988
|
case GGML_TYPE_F32:
|
7867
7989
|
{
|
7868
|
-
ggml_compute_forward_add1_f32(params,
|
7990
|
+
ggml_compute_forward_add1_f32(params, dst);
|
7869
7991
|
} break;
|
7870
7992
|
case GGML_TYPE_F16:
|
7871
7993
|
{
|
7872
7994
|
if (src1->type == GGML_TYPE_F16) {
|
7873
|
-
ggml_compute_forward_add1_f16_f16(params,
|
7995
|
+
ggml_compute_forward_add1_f16_f16(params, dst);
|
7874
7996
|
}
|
7875
7997
|
else if (src1->type == GGML_TYPE_F32) {
|
7876
|
-
ggml_compute_forward_add1_f16_f32(params,
|
7998
|
+
ggml_compute_forward_add1_f16_f32(params, dst);
|
7877
7999
|
}
|
7878
8000
|
else {
|
7879
8001
|
GGML_ASSERT(false);
|
@@ -7893,8 +8015,10 @@ static void ggml_compute_forward_add1(
|
|
7893
8015
|
case GGML_TYPE_IQ2_XXS:
|
7894
8016
|
case GGML_TYPE_IQ2_XS:
|
7895
8017
|
case GGML_TYPE_IQ3_XXS:
|
8018
|
+
case GGML_TYPE_IQ1_S:
|
8019
|
+
case GGML_TYPE_IQ4_NL:
|
7896
8020
|
{
|
7897
|
-
ggml_compute_forward_add1_q_f32(params,
|
8021
|
+
ggml_compute_forward_add1_q_f32(params, dst);
|
7898
8022
|
} break;
|
7899
8023
|
default:
|
7900
8024
|
{
|
@@ -7907,9 +8031,11 @@ static void ggml_compute_forward_add1(
|
|
7907
8031
|
|
7908
8032
|
static void ggml_compute_forward_acc_f32(
|
7909
8033
|
const struct ggml_compute_params * params,
|
7910
|
-
const struct ggml_tensor * src0,
|
7911
|
-
const struct ggml_tensor * src1,
|
7912
8034
|
struct ggml_tensor * dst) {
|
8035
|
+
|
8036
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8037
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8038
|
+
|
7913
8039
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7914
8040
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
7915
8041
|
|
@@ -7989,14 +8115,14 @@ static void ggml_compute_forward_acc_f32(
|
|
7989
8115
|
|
7990
8116
|
static void ggml_compute_forward_acc(
|
7991
8117
|
const struct ggml_compute_params * params,
|
7992
|
-
const struct ggml_tensor * src0,
|
7993
|
-
const struct ggml_tensor * src1,
|
7994
8118
|
struct ggml_tensor * dst) {
|
7995
8119
|
|
8120
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8121
|
+
|
7996
8122
|
switch (src0->type) {
|
7997
8123
|
case GGML_TYPE_F32:
|
7998
8124
|
{
|
7999
|
-
ggml_compute_forward_acc_f32(params,
|
8125
|
+
ggml_compute_forward_acc_f32(params, dst);
|
8000
8126
|
} break;
|
8001
8127
|
case GGML_TYPE_F16:
|
8002
8128
|
case GGML_TYPE_Q4_0:
|
@@ -8013,6 +8139,8 @@ static void ggml_compute_forward_acc(
|
|
8013
8139
|
case GGML_TYPE_IQ2_XXS:
|
8014
8140
|
case GGML_TYPE_IQ2_XS:
|
8015
8141
|
case GGML_TYPE_IQ3_XXS:
|
8142
|
+
case GGML_TYPE_IQ1_S:
|
8143
|
+
case GGML_TYPE_IQ4_NL:
|
8016
8144
|
default:
|
8017
8145
|
{
|
8018
8146
|
GGML_ASSERT(false);
|
@@ -8024,9 +8152,11 @@ static void ggml_compute_forward_acc(
|
|
8024
8152
|
|
8025
8153
|
static void ggml_compute_forward_sub_f32(
|
8026
8154
|
const struct ggml_compute_params * params,
|
8027
|
-
const struct ggml_tensor * src0,
|
8028
|
-
const struct ggml_tensor * src1,
|
8029
8155
|
struct ggml_tensor * dst) {
|
8156
|
+
|
8157
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8158
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8159
|
+
|
8030
8160
|
assert(params->ith == 0);
|
8031
8161
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
8032
8162
|
|
@@ -8084,13 +8214,14 @@ static void ggml_compute_forward_sub_f32(
|
|
8084
8214
|
|
8085
8215
|
static void ggml_compute_forward_sub(
|
8086
8216
|
const struct ggml_compute_params * params,
|
8087
|
-
const struct ggml_tensor * src0,
|
8088
|
-
const struct ggml_tensor * src1,
|
8089
8217
|
struct ggml_tensor * dst) {
|
8218
|
+
|
8219
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8220
|
+
|
8090
8221
|
switch (src0->type) {
|
8091
8222
|
case GGML_TYPE_F32:
|
8092
8223
|
{
|
8093
|
-
ggml_compute_forward_sub_f32(params,
|
8224
|
+
ggml_compute_forward_sub_f32(params, dst);
|
8094
8225
|
} break;
|
8095
8226
|
default:
|
8096
8227
|
{
|
@@ -8103,9 +8234,11 @@ static void ggml_compute_forward_sub(
|
|
8103
8234
|
|
8104
8235
|
static void ggml_compute_forward_mul_f32(
|
8105
8236
|
const struct ggml_compute_params * params,
|
8106
|
-
const struct ggml_tensor * src0,
|
8107
|
-
const struct ggml_tensor * src1,
|
8108
8237
|
struct ggml_tensor * dst) {
|
8238
|
+
|
8239
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8240
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8241
|
+
|
8109
8242
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8110
8243
|
|
8111
8244
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8186,15 +8319,17 @@ static void ggml_compute_forward_mul_f32(
|
|
8186
8319
|
|
8187
8320
|
static void ggml_compute_forward_mul(
|
8188
8321
|
const struct ggml_compute_params * params,
|
8189
|
-
const struct ggml_tensor * src0,
|
8190
|
-
const struct ggml_tensor * src1,
|
8191
8322
|
struct ggml_tensor * dst) {
|
8323
|
+
|
8324
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8325
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8326
|
+
|
8192
8327
|
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
|
8193
8328
|
|
8194
8329
|
switch (src0->type) {
|
8195
8330
|
case GGML_TYPE_F32:
|
8196
8331
|
{
|
8197
|
-
ggml_compute_forward_mul_f32(params,
|
8332
|
+
ggml_compute_forward_mul_f32(params, dst);
|
8198
8333
|
} break;
|
8199
8334
|
default:
|
8200
8335
|
{
|
@@ -8207,9 +8342,11 @@ static void ggml_compute_forward_mul(
|
|
8207
8342
|
|
8208
8343
|
static void ggml_compute_forward_div_f32(
|
8209
8344
|
const struct ggml_compute_params * params,
|
8210
|
-
const struct ggml_tensor * src0,
|
8211
|
-
const struct ggml_tensor * src1,
|
8212
8345
|
struct ggml_tensor * dst) {
|
8346
|
+
|
8347
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8348
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
8349
|
+
|
8213
8350
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8214
8351
|
|
8215
8352
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8280,13 +8417,14 @@ static void ggml_compute_forward_div_f32(
|
|
8280
8417
|
|
8281
8418
|
static void ggml_compute_forward_div(
|
8282
8419
|
const struct ggml_compute_params * params,
|
8283
|
-
const struct ggml_tensor * src0,
|
8284
|
-
const struct ggml_tensor * src1,
|
8285
8420
|
struct ggml_tensor * dst) {
|
8421
|
+
|
8422
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8423
|
+
|
8286
8424
|
switch (src0->type) {
|
8287
8425
|
case GGML_TYPE_F32:
|
8288
8426
|
{
|
8289
|
-
ggml_compute_forward_div_f32(params,
|
8427
|
+
ggml_compute_forward_div_f32(params, dst);
|
8290
8428
|
} break;
|
8291
8429
|
default:
|
8292
8430
|
{
|
@@ -8299,8 +8437,10 @@ static void ggml_compute_forward_div(
|
|
8299
8437
|
|
8300
8438
|
static void ggml_compute_forward_sqr_f32(
|
8301
8439
|
const struct ggml_compute_params * params,
|
8302
|
-
const struct ggml_tensor * src0,
|
8303
8440
|
struct ggml_tensor * dst) {
|
8441
|
+
|
8442
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8443
|
+
|
8304
8444
|
assert(params->ith == 0);
|
8305
8445
|
assert(ggml_are_same_shape(src0, dst));
|
8306
8446
|
|
@@ -8323,12 +8463,14 @@ static void ggml_compute_forward_sqr_f32(
|
|
8323
8463
|
|
8324
8464
|
static void ggml_compute_forward_sqr(
|
8325
8465
|
const struct ggml_compute_params * params,
|
8326
|
-
const struct ggml_tensor * src0,
|
8327
8466
|
struct ggml_tensor * dst) {
|
8467
|
+
|
8468
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8469
|
+
|
8328
8470
|
switch (src0->type) {
|
8329
8471
|
case GGML_TYPE_F32:
|
8330
8472
|
{
|
8331
|
-
ggml_compute_forward_sqr_f32(params,
|
8473
|
+
ggml_compute_forward_sqr_f32(params, dst);
|
8332
8474
|
} break;
|
8333
8475
|
default:
|
8334
8476
|
{
|
@@ -8341,8 +8483,10 @@ static void ggml_compute_forward_sqr(
|
|
8341
8483
|
|
8342
8484
|
static void ggml_compute_forward_sqrt_f32(
|
8343
8485
|
const struct ggml_compute_params * params,
|
8344
|
-
const struct ggml_tensor * src0,
|
8345
8486
|
struct ggml_tensor * dst) {
|
8487
|
+
|
8488
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8489
|
+
|
8346
8490
|
assert(params->ith == 0);
|
8347
8491
|
assert(ggml_are_same_shape(src0, dst));
|
8348
8492
|
|
@@ -8365,12 +8509,14 @@ static void ggml_compute_forward_sqrt_f32(
|
|
8365
8509
|
|
8366
8510
|
static void ggml_compute_forward_sqrt(
|
8367
8511
|
const struct ggml_compute_params * params,
|
8368
|
-
const struct ggml_tensor * src0,
|
8369
8512
|
struct ggml_tensor * dst) {
|
8513
|
+
|
8514
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8515
|
+
|
8370
8516
|
switch (src0->type) {
|
8371
8517
|
case GGML_TYPE_F32:
|
8372
8518
|
{
|
8373
|
-
ggml_compute_forward_sqrt_f32(params,
|
8519
|
+
ggml_compute_forward_sqrt_f32(params, dst);
|
8374
8520
|
} break;
|
8375
8521
|
default:
|
8376
8522
|
{
|
@@ -8383,8 +8529,10 @@ static void ggml_compute_forward_sqrt(
|
|
8383
8529
|
|
8384
8530
|
static void ggml_compute_forward_log_f32(
|
8385
8531
|
const struct ggml_compute_params * params,
|
8386
|
-
const struct ggml_tensor * src0,
|
8387
8532
|
struct ggml_tensor * dst) {
|
8533
|
+
|
8534
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8535
|
+
|
8388
8536
|
GGML_ASSERT(params->ith == 0);
|
8389
8537
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8390
8538
|
|
@@ -8407,12 +8555,14 @@ static void ggml_compute_forward_log_f32(
|
|
8407
8555
|
|
8408
8556
|
static void ggml_compute_forward_log(
|
8409
8557
|
const struct ggml_compute_params * params,
|
8410
|
-
const struct ggml_tensor * src0,
|
8411
8558
|
struct ggml_tensor * dst) {
|
8559
|
+
|
8560
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8561
|
+
|
8412
8562
|
switch (src0->type) {
|
8413
8563
|
case GGML_TYPE_F32:
|
8414
8564
|
{
|
8415
|
-
ggml_compute_forward_log_f32(params,
|
8565
|
+
ggml_compute_forward_log_f32(params, dst);
|
8416
8566
|
} break;
|
8417
8567
|
default:
|
8418
8568
|
{
|
@@ -8425,8 +8575,10 @@ static void ggml_compute_forward_log(
|
|
8425
8575
|
|
8426
8576
|
static void ggml_compute_forward_sum_f32(
|
8427
8577
|
const struct ggml_compute_params * params,
|
8428
|
-
const struct ggml_tensor * src0,
|
8429
8578
|
struct ggml_tensor * dst) {
|
8579
|
+
|
8580
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8581
|
+
|
8430
8582
|
assert(params->ith == 0);
|
8431
8583
|
assert(ggml_is_scalar(dst));
|
8432
8584
|
|
@@ -8458,8 +8610,10 @@ static void ggml_compute_forward_sum_f32(
|
|
8458
8610
|
|
8459
8611
|
static void ggml_compute_forward_sum_f16(
|
8460
8612
|
const struct ggml_compute_params * params,
|
8461
|
-
const struct ggml_tensor * src0,
|
8462
8613
|
struct ggml_tensor * dst) {
|
8614
|
+
|
8615
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8616
|
+
|
8463
8617
|
assert(params->ith == 0);
|
8464
8618
|
assert(ggml_is_scalar(dst));
|
8465
8619
|
|
@@ -8490,16 +8644,18 @@ static void ggml_compute_forward_sum_f16(
|
|
8490
8644
|
|
8491
8645
|
static void ggml_compute_forward_sum(
|
8492
8646
|
const struct ggml_compute_params * params,
|
8493
|
-
const struct ggml_tensor * src0,
|
8494
8647
|
struct ggml_tensor * dst) {
|
8648
|
+
|
8649
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8650
|
+
|
8495
8651
|
switch (src0->type) {
|
8496
8652
|
case GGML_TYPE_F32:
|
8497
8653
|
{
|
8498
|
-
ggml_compute_forward_sum_f32(params,
|
8654
|
+
ggml_compute_forward_sum_f32(params, dst);
|
8499
8655
|
} break;
|
8500
8656
|
case GGML_TYPE_F16:
|
8501
8657
|
{
|
8502
|
-
ggml_compute_forward_sum_f16(params,
|
8658
|
+
ggml_compute_forward_sum_f16(params, dst);
|
8503
8659
|
} break;
|
8504
8660
|
default:
|
8505
8661
|
{
|
@@ -8512,8 +8668,10 @@ static void ggml_compute_forward_sum(
|
|
8512
8668
|
|
8513
8669
|
static void ggml_compute_forward_sum_rows_f32(
|
8514
8670
|
const struct ggml_compute_params * params,
|
8515
|
-
const struct ggml_tensor * src0,
|
8516
8671
|
struct ggml_tensor * dst) {
|
8672
|
+
|
8673
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8674
|
+
|
8517
8675
|
GGML_ASSERT(params->ith == 0);
|
8518
8676
|
|
8519
8677
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8545,12 +8703,14 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
8545
8703
|
|
8546
8704
|
static void ggml_compute_forward_sum_rows(
|
8547
8705
|
const struct ggml_compute_params * params,
|
8548
|
-
const struct ggml_tensor * src0,
|
8549
8706
|
struct ggml_tensor * dst) {
|
8707
|
+
|
8708
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8709
|
+
|
8550
8710
|
switch (src0->type) {
|
8551
8711
|
case GGML_TYPE_F32:
|
8552
8712
|
{
|
8553
|
-
ggml_compute_forward_sum_rows_f32(params,
|
8713
|
+
ggml_compute_forward_sum_rows_f32(params, dst);
|
8554
8714
|
} break;
|
8555
8715
|
default:
|
8556
8716
|
{
|
@@ -8563,8 +8723,10 @@ static void ggml_compute_forward_sum_rows(
|
|
8563
8723
|
|
8564
8724
|
static void ggml_compute_forward_mean_f32(
|
8565
8725
|
const struct ggml_compute_params * params,
|
8566
|
-
const struct ggml_tensor * src0,
|
8567
8726
|
struct ggml_tensor * dst) {
|
8727
|
+
|
8728
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8729
|
+
|
8568
8730
|
assert(params->ith == 0);
|
8569
8731
|
|
8570
8732
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8600,12 +8762,14 @@ static void ggml_compute_forward_mean_f32(
|
|
8600
8762
|
|
8601
8763
|
static void ggml_compute_forward_mean(
|
8602
8764
|
const struct ggml_compute_params * params,
|
8603
|
-
const struct ggml_tensor * src0,
|
8604
8765
|
struct ggml_tensor * dst) {
|
8766
|
+
|
8767
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8768
|
+
|
8605
8769
|
switch (src0->type) {
|
8606
8770
|
case GGML_TYPE_F32:
|
8607
8771
|
{
|
8608
|
-
ggml_compute_forward_mean_f32(params,
|
8772
|
+
ggml_compute_forward_mean_f32(params, dst);
|
8609
8773
|
} break;
|
8610
8774
|
default:
|
8611
8775
|
{
|
@@ -8618,8 +8782,10 @@ static void ggml_compute_forward_mean(
|
|
8618
8782
|
|
8619
8783
|
static void ggml_compute_forward_argmax_f32(
|
8620
8784
|
const struct ggml_compute_params * params,
|
8621
|
-
const struct ggml_tensor * src0,
|
8622
8785
|
struct ggml_tensor * dst) {
|
8786
|
+
|
8787
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8788
|
+
|
8623
8789
|
assert(params->ith == 0);
|
8624
8790
|
|
8625
8791
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -8646,12 +8812,14 @@ static void ggml_compute_forward_argmax_f32(
|
|
8646
8812
|
|
8647
8813
|
static void ggml_compute_forward_argmax(
|
8648
8814
|
const struct ggml_compute_params * params,
|
8649
|
-
const struct ggml_tensor * src0,
|
8650
8815
|
struct ggml_tensor * dst) {
|
8816
|
+
|
8817
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8818
|
+
|
8651
8819
|
switch (src0->type) {
|
8652
8820
|
case GGML_TYPE_F32:
|
8653
8821
|
{
|
8654
|
-
ggml_compute_forward_argmax_f32(params,
|
8822
|
+
ggml_compute_forward_argmax_f32(params, dst);
|
8655
8823
|
} break;
|
8656
8824
|
default:
|
8657
8825
|
{
|
@@ -8664,8 +8832,10 @@ static void ggml_compute_forward_argmax(
|
|
8664
8832
|
|
8665
8833
|
static void ggml_compute_forward_repeat_f32(
|
8666
8834
|
const struct ggml_compute_params * params,
|
8667
|
-
const struct ggml_tensor * src0,
|
8668
8835
|
struct ggml_tensor * dst) {
|
8836
|
+
|
8837
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8838
|
+
|
8669
8839
|
GGML_ASSERT(params->ith == 0);
|
8670
8840
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8671
8841
|
|
@@ -8707,8 +8877,10 @@ static void ggml_compute_forward_repeat_f32(
|
|
8707
8877
|
|
8708
8878
|
static void ggml_compute_forward_repeat_f16(
|
8709
8879
|
const struct ggml_compute_params * params,
|
8710
|
-
const struct ggml_tensor * src0,
|
8711
8880
|
struct ggml_tensor * dst) {
|
8881
|
+
|
8882
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8883
|
+
|
8712
8884
|
GGML_ASSERT(params->ith == 0);
|
8713
8885
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8714
8886
|
|
@@ -8753,18 +8925,20 @@ static void ggml_compute_forward_repeat_f16(
|
|
8753
8925
|
|
8754
8926
|
static void ggml_compute_forward_repeat(
|
8755
8927
|
const struct ggml_compute_params * params,
|
8756
|
-
const struct ggml_tensor * src0,
|
8757
8928
|
struct ggml_tensor * dst) {
|
8929
|
+
|
8930
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8931
|
+
|
8758
8932
|
switch (src0->type) {
|
8759
8933
|
case GGML_TYPE_F16:
|
8760
8934
|
case GGML_TYPE_I16:
|
8761
8935
|
{
|
8762
|
-
ggml_compute_forward_repeat_f16(params,
|
8936
|
+
ggml_compute_forward_repeat_f16(params, dst);
|
8763
8937
|
} break;
|
8764
8938
|
case GGML_TYPE_F32:
|
8765
8939
|
case GGML_TYPE_I32:
|
8766
8940
|
{
|
8767
|
-
ggml_compute_forward_repeat_f32(params,
|
8941
|
+
ggml_compute_forward_repeat_f32(params, dst);
|
8768
8942
|
} break;
|
8769
8943
|
default:
|
8770
8944
|
{
|
@@ -8777,8 +8951,10 @@ static void ggml_compute_forward_repeat(
|
|
8777
8951
|
|
8778
8952
|
static void ggml_compute_forward_repeat_back_f32(
|
8779
8953
|
const struct ggml_compute_params * params,
|
8780
|
-
const struct ggml_tensor * src0,
|
8781
8954
|
struct ggml_tensor * dst) {
|
8955
|
+
|
8956
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
8957
|
+
|
8782
8958
|
GGML_ASSERT(params->ith == 0);
|
8783
8959
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
8784
8960
|
|
@@ -8834,12 +9010,14 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
8834
9010
|
|
8835
9011
|
static void ggml_compute_forward_repeat_back(
|
8836
9012
|
const struct ggml_compute_params * params,
|
8837
|
-
const struct ggml_tensor * src0,
|
8838
9013
|
struct ggml_tensor * dst) {
|
9014
|
+
|
9015
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9016
|
+
|
8839
9017
|
switch (src0->type) {
|
8840
9018
|
case GGML_TYPE_F32:
|
8841
9019
|
{
|
8842
|
-
ggml_compute_forward_repeat_back_f32(params,
|
9020
|
+
ggml_compute_forward_repeat_back_f32(params, dst);
|
8843
9021
|
} break;
|
8844
9022
|
default:
|
8845
9023
|
{
|
@@ -8852,10 +9030,11 @@ static void ggml_compute_forward_repeat_back(
|
|
8852
9030
|
|
8853
9031
|
static void ggml_compute_forward_concat_f32(
|
8854
9032
|
const struct ggml_compute_params * params,
|
8855
|
-
const struct ggml_tensor * src0,
|
8856
|
-
const struct ggml_tensor * src1,
|
8857
9033
|
struct ggml_tensor * dst) {
|
8858
9034
|
|
9035
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9036
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
9037
|
+
|
8859
9038
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
8860
9039
|
return;
|
8861
9040
|
}
|
@@ -8900,14 +9079,15 @@ static void ggml_compute_forward_concat_f32(
|
|
8900
9079
|
|
8901
9080
|
static void ggml_compute_forward_concat(
|
8902
9081
|
const struct ggml_compute_params* params,
|
8903
|
-
const struct ggml_tensor* src0,
|
8904
|
-
const struct ggml_tensor* src1,
|
8905
9082
|
struct ggml_tensor* dst) {
|
9083
|
+
|
9084
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9085
|
+
|
8906
9086
|
switch (src0->type) {
|
8907
9087
|
case GGML_TYPE_F32:
|
8908
9088
|
case GGML_TYPE_I32:
|
8909
9089
|
{
|
8910
|
-
ggml_compute_forward_concat_f32(params,
|
9090
|
+
ggml_compute_forward_concat_f32(params, dst);
|
8911
9091
|
} break;
|
8912
9092
|
default:
|
8913
9093
|
{
|
@@ -8920,8 +9100,10 @@ static void ggml_compute_forward_concat(
|
|
8920
9100
|
|
8921
9101
|
static void ggml_compute_forward_abs_f32(
|
8922
9102
|
const struct ggml_compute_params * params,
|
8923
|
-
const struct ggml_tensor * src0,
|
8924
9103
|
struct ggml_tensor * dst) {
|
9104
|
+
|
9105
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9106
|
+
|
8925
9107
|
assert(params->ith == 0);
|
8926
9108
|
assert(ggml_are_same_shape(src0, dst));
|
8927
9109
|
|
@@ -8944,12 +9126,14 @@ static void ggml_compute_forward_abs_f32(
|
|
8944
9126
|
|
8945
9127
|
static void ggml_compute_forward_abs(
|
8946
9128
|
const struct ggml_compute_params * params,
|
8947
|
-
const struct ggml_tensor * src0,
|
8948
9129
|
struct ggml_tensor * dst) {
|
9130
|
+
|
9131
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9132
|
+
|
8949
9133
|
switch (src0->type) {
|
8950
9134
|
case GGML_TYPE_F32:
|
8951
9135
|
{
|
8952
|
-
ggml_compute_forward_abs_f32(params,
|
9136
|
+
ggml_compute_forward_abs_f32(params, dst);
|
8953
9137
|
} break;
|
8954
9138
|
default:
|
8955
9139
|
{
|
@@ -8962,8 +9146,10 @@ static void ggml_compute_forward_abs(
|
|
8962
9146
|
|
8963
9147
|
static void ggml_compute_forward_sgn_f32(
|
8964
9148
|
const struct ggml_compute_params * params,
|
8965
|
-
const struct ggml_tensor * src0,
|
8966
9149
|
struct ggml_tensor * dst) {
|
9150
|
+
|
9151
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9152
|
+
|
8967
9153
|
assert(params->ith == 0);
|
8968
9154
|
assert(ggml_are_same_shape(src0, dst));
|
8969
9155
|
|
@@ -8986,12 +9172,14 @@ static void ggml_compute_forward_sgn_f32(
|
|
8986
9172
|
|
8987
9173
|
static void ggml_compute_forward_sgn(
|
8988
9174
|
const struct ggml_compute_params * params,
|
8989
|
-
const struct ggml_tensor * src0,
|
8990
9175
|
struct ggml_tensor * dst) {
|
9176
|
+
|
9177
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9178
|
+
|
8991
9179
|
switch (src0->type) {
|
8992
9180
|
case GGML_TYPE_F32:
|
8993
9181
|
{
|
8994
|
-
ggml_compute_forward_sgn_f32(params,
|
9182
|
+
ggml_compute_forward_sgn_f32(params, dst);
|
8995
9183
|
} break;
|
8996
9184
|
default:
|
8997
9185
|
{
|
@@ -9004,8 +9192,10 @@ static void ggml_compute_forward_sgn(
|
|
9004
9192
|
|
9005
9193
|
static void ggml_compute_forward_neg_f32(
|
9006
9194
|
const struct ggml_compute_params * params,
|
9007
|
-
const struct ggml_tensor * src0,
|
9008
9195
|
struct ggml_tensor * dst) {
|
9196
|
+
|
9197
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9198
|
+
|
9009
9199
|
assert(params->ith == 0);
|
9010
9200
|
assert(ggml_are_same_shape(src0, dst));
|
9011
9201
|
|
@@ -9028,12 +9218,14 @@ static void ggml_compute_forward_neg_f32(
|
|
9028
9218
|
|
9029
9219
|
static void ggml_compute_forward_neg(
|
9030
9220
|
const struct ggml_compute_params * params,
|
9031
|
-
const struct ggml_tensor * src0,
|
9032
9221
|
struct ggml_tensor * dst) {
|
9222
|
+
|
9223
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9224
|
+
|
9033
9225
|
switch (src0->type) {
|
9034
9226
|
case GGML_TYPE_F32:
|
9035
9227
|
{
|
9036
|
-
ggml_compute_forward_neg_f32(params,
|
9228
|
+
ggml_compute_forward_neg_f32(params, dst);
|
9037
9229
|
} break;
|
9038
9230
|
default:
|
9039
9231
|
{
|
@@ -9046,8 +9238,10 @@ static void ggml_compute_forward_neg(
|
|
9046
9238
|
|
9047
9239
|
static void ggml_compute_forward_step_f32(
|
9048
9240
|
const struct ggml_compute_params * params,
|
9049
|
-
const struct ggml_tensor * src0,
|
9050
9241
|
struct ggml_tensor * dst) {
|
9242
|
+
|
9243
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9244
|
+
|
9051
9245
|
assert(params->ith == 0);
|
9052
9246
|
assert(ggml_are_same_shape(src0, dst));
|
9053
9247
|
|
@@ -9070,12 +9264,14 @@ static void ggml_compute_forward_step_f32(
|
|
9070
9264
|
|
9071
9265
|
static void ggml_compute_forward_step(
|
9072
9266
|
const struct ggml_compute_params * params,
|
9073
|
-
const struct ggml_tensor * src0,
|
9074
9267
|
struct ggml_tensor * dst) {
|
9268
|
+
|
9269
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9270
|
+
|
9075
9271
|
switch (src0->type) {
|
9076
9272
|
case GGML_TYPE_F32:
|
9077
9273
|
{
|
9078
|
-
ggml_compute_forward_step_f32(params,
|
9274
|
+
ggml_compute_forward_step_f32(params, dst);
|
9079
9275
|
} break;
|
9080
9276
|
default:
|
9081
9277
|
{
|
@@ -9088,8 +9284,10 @@ static void ggml_compute_forward_step(
|
|
9088
9284
|
|
9089
9285
|
static void ggml_compute_forward_tanh_f32(
|
9090
9286
|
const struct ggml_compute_params * params,
|
9091
|
-
const struct ggml_tensor * src0,
|
9092
9287
|
struct ggml_tensor * dst) {
|
9288
|
+
|
9289
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9290
|
+
|
9093
9291
|
assert(params->ith == 0);
|
9094
9292
|
assert(ggml_are_same_shape(src0, dst));
|
9095
9293
|
|
@@ -9112,12 +9310,14 @@ static void ggml_compute_forward_tanh_f32(
|
|
9112
9310
|
|
9113
9311
|
static void ggml_compute_forward_tanh(
|
9114
9312
|
const struct ggml_compute_params * params,
|
9115
|
-
const struct ggml_tensor * src0,
|
9116
9313
|
struct ggml_tensor * dst) {
|
9314
|
+
|
9315
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9316
|
+
|
9117
9317
|
switch (src0->type) {
|
9118
9318
|
case GGML_TYPE_F32:
|
9119
9319
|
{
|
9120
|
-
ggml_compute_forward_tanh_f32(params,
|
9320
|
+
ggml_compute_forward_tanh_f32(params, dst);
|
9121
9321
|
} break;
|
9122
9322
|
default:
|
9123
9323
|
{
|
@@ -9130,8 +9330,10 @@ static void ggml_compute_forward_tanh(
|
|
9130
9330
|
|
9131
9331
|
static void ggml_compute_forward_elu_f32(
|
9132
9332
|
const struct ggml_compute_params * params,
|
9133
|
-
const struct ggml_tensor * src0,
|
9134
9333
|
struct ggml_tensor * dst) {
|
9334
|
+
|
9335
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9336
|
+
|
9135
9337
|
assert(params->ith == 0);
|
9136
9338
|
assert(ggml_are_same_shape(src0, dst));
|
9137
9339
|
|
@@ -9154,12 +9356,14 @@ static void ggml_compute_forward_elu_f32(
|
|
9154
9356
|
|
9155
9357
|
static void ggml_compute_forward_elu(
|
9156
9358
|
const struct ggml_compute_params * params,
|
9157
|
-
const struct ggml_tensor * src0,
|
9158
9359
|
struct ggml_tensor * dst) {
|
9360
|
+
|
9361
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9362
|
+
|
9159
9363
|
switch (src0->type) {
|
9160
9364
|
case GGML_TYPE_F32:
|
9161
9365
|
{
|
9162
|
-
ggml_compute_forward_elu_f32(params,
|
9366
|
+
ggml_compute_forward_elu_f32(params, dst);
|
9163
9367
|
} break;
|
9164
9368
|
default:
|
9165
9369
|
{
|
@@ -9172,8 +9376,10 @@ static void ggml_compute_forward_elu(
|
|
9172
9376
|
|
9173
9377
|
static void ggml_compute_forward_relu_f32(
|
9174
9378
|
const struct ggml_compute_params * params,
|
9175
|
-
const struct ggml_tensor * src0,
|
9176
9379
|
struct ggml_tensor * dst) {
|
9380
|
+
|
9381
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9382
|
+
|
9177
9383
|
assert(params->ith == 0);
|
9178
9384
|
assert(ggml_are_same_shape(src0, dst));
|
9179
9385
|
|
@@ -9196,12 +9402,14 @@ static void ggml_compute_forward_relu_f32(
|
|
9196
9402
|
|
9197
9403
|
static void ggml_compute_forward_relu(
|
9198
9404
|
const struct ggml_compute_params * params,
|
9199
|
-
const struct ggml_tensor * src0,
|
9200
9405
|
struct ggml_tensor * dst) {
|
9406
|
+
|
9407
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9408
|
+
|
9201
9409
|
switch (src0->type) {
|
9202
9410
|
case GGML_TYPE_F32:
|
9203
9411
|
{
|
9204
|
-
ggml_compute_forward_relu_f32(params,
|
9412
|
+
ggml_compute_forward_relu_f32(params, dst);
|
9205
9413
|
} break;
|
9206
9414
|
default:
|
9207
9415
|
{
|
@@ -9214,8 +9422,10 @@ static void ggml_compute_forward_relu(
|
|
9214
9422
|
|
9215
9423
|
static void ggml_compute_forward_gelu_f32(
|
9216
9424
|
const struct ggml_compute_params * params,
|
9217
|
-
const struct ggml_tensor * src0,
|
9218
9425
|
struct ggml_tensor * dst) {
|
9426
|
+
|
9427
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9428
|
+
|
9219
9429
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9220
9430
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9221
9431
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9255,12 +9465,14 @@ static void ggml_compute_forward_gelu_f32(
|
|
9255
9465
|
|
9256
9466
|
static void ggml_compute_forward_gelu(
|
9257
9467
|
const struct ggml_compute_params * params,
|
9258
|
-
const struct ggml_tensor * src0,
|
9259
9468
|
struct ggml_tensor * dst) {
|
9469
|
+
|
9470
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9471
|
+
|
9260
9472
|
switch (src0->type) {
|
9261
9473
|
case GGML_TYPE_F32:
|
9262
9474
|
{
|
9263
|
-
ggml_compute_forward_gelu_f32(params,
|
9475
|
+
ggml_compute_forward_gelu_f32(params, dst);
|
9264
9476
|
} break;
|
9265
9477
|
default:
|
9266
9478
|
{
|
@@ -9273,8 +9485,10 @@ static void ggml_compute_forward_gelu(
|
|
9273
9485
|
|
9274
9486
|
static void ggml_compute_forward_gelu_quick_f32(
|
9275
9487
|
const struct ggml_compute_params * params,
|
9276
|
-
const struct ggml_tensor * src0,
|
9277
9488
|
struct ggml_tensor * dst) {
|
9489
|
+
|
9490
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9491
|
+
|
9278
9492
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9279
9493
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9280
9494
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9314,12 +9528,14 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
9314
9528
|
|
9315
9529
|
static void ggml_compute_forward_gelu_quick(
|
9316
9530
|
const struct ggml_compute_params * params,
|
9317
|
-
const struct ggml_tensor * src0,
|
9318
9531
|
struct ggml_tensor * dst) {
|
9532
|
+
|
9533
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9534
|
+
|
9319
9535
|
switch (src0->type) {
|
9320
9536
|
case GGML_TYPE_F32:
|
9321
9537
|
{
|
9322
|
-
ggml_compute_forward_gelu_quick_f32(params,
|
9538
|
+
ggml_compute_forward_gelu_quick_f32(params, dst);
|
9323
9539
|
} break;
|
9324
9540
|
default:
|
9325
9541
|
{
|
@@ -9332,8 +9548,10 @@ static void ggml_compute_forward_gelu_quick(
|
|
9332
9548
|
|
9333
9549
|
static void ggml_compute_forward_silu_f32(
|
9334
9550
|
const struct ggml_compute_params * params,
|
9335
|
-
const struct ggml_tensor * src0,
|
9336
9551
|
struct ggml_tensor * dst) {
|
9552
|
+
|
9553
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9554
|
+
|
9337
9555
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9338
9556
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9339
9557
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -9373,12 +9591,14 @@ static void ggml_compute_forward_silu_f32(
|
|
9373
9591
|
|
9374
9592
|
static void ggml_compute_forward_silu(
|
9375
9593
|
const struct ggml_compute_params * params,
|
9376
|
-
const struct ggml_tensor * src0,
|
9377
9594
|
struct ggml_tensor * dst) {
|
9595
|
+
|
9596
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9597
|
+
|
9378
9598
|
switch (src0->type) {
|
9379
9599
|
case GGML_TYPE_F32:
|
9380
9600
|
{
|
9381
|
-
ggml_compute_forward_silu_f32(params,
|
9601
|
+
ggml_compute_forward_silu_f32(params, dst);
|
9382
9602
|
} break;
|
9383
9603
|
default:
|
9384
9604
|
{
|
@@ -9390,8 +9610,10 @@ static void ggml_compute_forward_silu(
|
|
9390
9610
|
|
9391
9611
|
static void ggml_compute_forward_leaky_relu_f32(
|
9392
9612
|
const struct ggml_compute_params * params,
|
9393
|
-
const struct ggml_tensor * src0,
|
9394
9613
|
struct ggml_tensor * dst) {
|
9614
|
+
|
9615
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9616
|
+
|
9395
9617
|
assert(params->ith == 0);
|
9396
9618
|
assert(ggml_are_same_shape(src0, dst));
|
9397
9619
|
|
@@ -9417,12 +9639,14 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
9417
9639
|
|
9418
9640
|
static void ggml_compute_forward_leaky_relu(
|
9419
9641
|
const struct ggml_compute_params * params,
|
9420
|
-
const struct ggml_tensor * src0,
|
9421
9642
|
struct ggml_tensor * dst) {
|
9643
|
+
|
9644
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9645
|
+
|
9422
9646
|
switch (src0->type) {
|
9423
9647
|
case GGML_TYPE_F32:
|
9424
9648
|
{
|
9425
|
-
ggml_compute_forward_leaky_relu_f32(params,
|
9649
|
+
ggml_compute_forward_leaky_relu_f32(params, dst);
|
9426
9650
|
} break;
|
9427
9651
|
default:
|
9428
9652
|
{
|
@@ -9435,9 +9659,11 @@ static void ggml_compute_forward_leaky_relu(
|
|
9435
9659
|
|
9436
9660
|
static void ggml_compute_forward_silu_back_f32(
|
9437
9661
|
const struct ggml_compute_params * params,
|
9438
|
-
const struct ggml_tensor * src0,
|
9439
|
-
const struct ggml_tensor * grad,
|
9440
9662
|
struct ggml_tensor * dst) {
|
9663
|
+
|
9664
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9665
|
+
const struct ggml_tensor * grad = dst->src[1];
|
9666
|
+
|
9441
9667
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
9442
9668
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9443
9669
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
@@ -9480,13 +9706,14 @@ static void ggml_compute_forward_silu_back_f32(
|
|
9480
9706
|
|
9481
9707
|
static void ggml_compute_forward_silu_back(
|
9482
9708
|
const struct ggml_compute_params * params,
|
9483
|
-
const struct ggml_tensor * src0,
|
9484
|
-
const struct ggml_tensor * grad,
|
9485
9709
|
struct ggml_tensor * dst) {
|
9710
|
+
|
9711
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9712
|
+
|
9486
9713
|
switch (src0->type) {
|
9487
9714
|
case GGML_TYPE_F32:
|
9488
9715
|
{
|
9489
|
-
ggml_compute_forward_silu_back_f32(params,
|
9716
|
+
ggml_compute_forward_silu_back_f32(params, dst);
|
9490
9717
|
} break;
|
9491
9718
|
default:
|
9492
9719
|
{
|
@@ -9498,8 +9725,10 @@ static void ggml_compute_forward_silu_back(
|
|
9498
9725
|
|
9499
9726
|
static void ggml_compute_forward_hardswish_f32(
|
9500
9727
|
const struct ggml_compute_params * params,
|
9501
|
-
const struct ggml_tensor * src0,
|
9502
9728
|
struct ggml_tensor * dst) {
|
9729
|
+
|
9730
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9731
|
+
|
9503
9732
|
assert(params->ith == 0);
|
9504
9733
|
assert(ggml_are_same_shape(src0, dst));
|
9505
9734
|
|
@@ -9521,12 +9750,14 @@ static void ggml_compute_forward_hardswish_f32(
|
|
9521
9750
|
}
|
9522
9751
|
static void ggml_compute_forward_hardswish(
|
9523
9752
|
const struct ggml_compute_params * params,
|
9524
|
-
const struct ggml_tensor * src0,
|
9525
9753
|
struct ggml_tensor * dst) {
|
9754
|
+
|
9755
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9756
|
+
|
9526
9757
|
switch (src0->type) {
|
9527
9758
|
case GGML_TYPE_F32:
|
9528
9759
|
{
|
9529
|
-
ggml_compute_forward_hardswish_f32(params,
|
9760
|
+
ggml_compute_forward_hardswish_f32(params, dst);
|
9530
9761
|
} break;
|
9531
9762
|
default:
|
9532
9763
|
{
|
@@ -9537,8 +9768,10 @@ static void ggml_compute_forward_hardswish(
|
|
9537
9768
|
|
9538
9769
|
static void ggml_compute_forward_hardsigmoid_f32(
|
9539
9770
|
const struct ggml_compute_params * params,
|
9540
|
-
const struct ggml_tensor * src0,
|
9541
9771
|
struct ggml_tensor * dst) {
|
9772
|
+
|
9773
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9774
|
+
|
9542
9775
|
assert(params->ith == 0);
|
9543
9776
|
assert(ggml_are_same_shape(src0, dst));
|
9544
9777
|
|
@@ -9561,12 +9794,14 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
9561
9794
|
|
9562
9795
|
static void ggml_compute_forward_hardsigmoid(
|
9563
9796
|
const struct ggml_compute_params * params,
|
9564
|
-
const struct ggml_tensor * src0,
|
9565
9797
|
struct ggml_tensor * dst) {
|
9798
|
+
|
9799
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9800
|
+
|
9566
9801
|
switch (src0->type) {
|
9567
9802
|
case GGML_TYPE_F32:
|
9568
9803
|
{
|
9569
|
-
ggml_compute_forward_hardsigmoid_f32(params,
|
9804
|
+
ggml_compute_forward_hardsigmoid_f32(params, dst);
|
9570
9805
|
} break;
|
9571
9806
|
default:
|
9572
9807
|
{
|
@@ -9580,8 +9815,10 @@ static void ggml_compute_forward_hardsigmoid(
|
|
9580
9815
|
|
9581
9816
|
static void ggml_compute_forward_norm_f32(
|
9582
9817
|
const struct ggml_compute_params * params,
|
9583
|
-
const struct ggml_tensor * src0,
|
9584
9818
|
struct ggml_tensor * dst) {
|
9819
|
+
|
9820
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9821
|
+
|
9585
9822
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9586
9823
|
|
9587
9824
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9633,12 +9870,14 @@ static void ggml_compute_forward_norm_f32(
|
|
9633
9870
|
|
9634
9871
|
static void ggml_compute_forward_norm(
|
9635
9872
|
const struct ggml_compute_params * params,
|
9636
|
-
const struct ggml_tensor * src0,
|
9637
9873
|
struct ggml_tensor * dst) {
|
9874
|
+
|
9875
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9876
|
+
|
9638
9877
|
switch (src0->type) {
|
9639
9878
|
case GGML_TYPE_F32:
|
9640
9879
|
{
|
9641
|
-
ggml_compute_forward_norm_f32(params,
|
9880
|
+
ggml_compute_forward_norm_f32(params, dst);
|
9642
9881
|
} break;
|
9643
9882
|
default:
|
9644
9883
|
{
|
@@ -9651,8 +9890,10 @@ static void ggml_compute_forward_norm(
|
|
9651
9890
|
|
9652
9891
|
static void ggml_compute_forward_rms_norm_f32(
|
9653
9892
|
const struct ggml_compute_params * params,
|
9654
|
-
const struct ggml_tensor * src0,
|
9655
9893
|
struct ggml_tensor * dst) {
|
9894
|
+
|
9895
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9896
|
+
|
9656
9897
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9657
9898
|
|
9658
9899
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9701,12 +9942,14 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
9701
9942
|
|
9702
9943
|
static void ggml_compute_forward_rms_norm(
|
9703
9944
|
const struct ggml_compute_params * params,
|
9704
|
-
const struct ggml_tensor * src0,
|
9705
9945
|
struct ggml_tensor * dst) {
|
9946
|
+
|
9947
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9948
|
+
|
9706
9949
|
switch (src0->type) {
|
9707
9950
|
case GGML_TYPE_F32:
|
9708
9951
|
{
|
9709
|
-
ggml_compute_forward_rms_norm_f32(params,
|
9952
|
+
ggml_compute_forward_rms_norm_f32(params, dst);
|
9710
9953
|
} break;
|
9711
9954
|
default:
|
9712
9955
|
{
|
@@ -9717,9 +9960,11 @@ static void ggml_compute_forward_rms_norm(
|
|
9717
9960
|
|
9718
9961
|
static void ggml_compute_forward_rms_norm_back_f32(
|
9719
9962
|
const struct ggml_compute_params * params,
|
9720
|
-
const struct ggml_tensor * src0,
|
9721
|
-
const struct ggml_tensor * src1,
|
9722
9963
|
struct ggml_tensor * dst) {
|
9964
|
+
|
9965
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
9966
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
9967
|
+
|
9723
9968
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
9724
9969
|
|
9725
9970
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9874,13 +10119,14 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
9874
10119
|
|
9875
10120
|
static void ggml_compute_forward_rms_norm_back(
|
9876
10121
|
const struct ggml_compute_params * params,
|
9877
|
-
const struct ggml_tensor * src0,
|
9878
|
-
const struct ggml_tensor * src1,
|
9879
10122
|
struct ggml_tensor * dst) {
|
10123
|
+
|
10124
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10125
|
+
|
9880
10126
|
switch (src0->type) {
|
9881
10127
|
case GGML_TYPE_F32:
|
9882
10128
|
{
|
9883
|
-
ggml_compute_forward_rms_norm_back_f32(params,
|
10129
|
+
ggml_compute_forward_rms_norm_back_f32(params, dst);
|
9884
10130
|
} break;
|
9885
10131
|
default:
|
9886
10132
|
{
|
@@ -9893,8 +10139,10 @@ static void ggml_compute_forward_rms_norm_back(
|
|
9893
10139
|
|
9894
10140
|
static void ggml_compute_forward_group_norm_f32(
|
9895
10141
|
const struct ggml_compute_params * params,
|
9896
|
-
const struct ggml_tensor * src0,
|
9897
10142
|
struct ggml_tensor * dst) {
|
10143
|
+
|
10144
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10145
|
+
|
9898
10146
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9899
10147
|
|
9900
10148
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -9965,12 +10213,14 @@ static void ggml_compute_forward_group_norm_f32(
|
|
9965
10213
|
|
9966
10214
|
static void ggml_compute_forward_group_norm(
|
9967
10215
|
const struct ggml_compute_params * params,
|
9968
|
-
const struct ggml_tensor * src0,
|
9969
10216
|
struct ggml_tensor * dst) {
|
10217
|
+
|
10218
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10219
|
+
|
9970
10220
|
switch (src0->type) {
|
9971
10221
|
case GGML_TYPE_F32:
|
9972
10222
|
{
|
9973
|
-
ggml_compute_forward_group_norm_f32(params,
|
10223
|
+
ggml_compute_forward_group_norm_f32(params, dst);
|
9974
10224
|
} break;
|
9975
10225
|
default:
|
9976
10226
|
{
|
@@ -10016,9 +10266,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
|
|
10016
10266
|
|
10017
10267
|
static void ggml_compute_forward_mul_mat(
|
10018
10268
|
const struct ggml_compute_params * params,
|
10019
|
-
const struct ggml_tensor * src0,
|
10020
|
-
const struct ggml_tensor * src1,
|
10021
10269
|
struct ggml_tensor * dst) {
|
10270
|
+
|
10271
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10272
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10273
|
+
|
10022
10274
|
int64_t t0 = ggml_perf_time_us();
|
10023
10275
|
UNUSED(t0);
|
10024
10276
|
|
@@ -10263,10 +10515,11 @@ static void ggml_compute_forward_mul_mat(
|
|
10263
10515
|
|
10264
10516
|
static void ggml_compute_forward_mul_mat_id(
|
10265
10517
|
const struct ggml_compute_params * params,
|
10266
|
-
const struct ggml_tensor * ids,
|
10267
|
-
const struct ggml_tensor * src1,
|
10268
10518
|
struct ggml_tensor * dst) {
|
10269
10519
|
|
10520
|
+
const struct ggml_tensor * ids = dst->src[0];
|
10521
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10522
|
+
|
10270
10523
|
const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
|
10271
10524
|
|
10272
10525
|
GGML_TENSOR_BINARY_OP_LOCALS
|
@@ -10457,9 +10710,11 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10457
10710
|
|
10458
10711
|
static void ggml_compute_forward_out_prod_f32(
|
10459
10712
|
const struct ggml_compute_params * params,
|
10460
|
-
const struct ggml_tensor * src0,
|
10461
|
-
const struct ggml_tensor * src1,
|
10462
10713
|
struct ggml_tensor * dst) {
|
10714
|
+
|
10715
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10716
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10717
|
+
|
10463
10718
|
// int64_t t0 = ggml_perf_time_us();
|
10464
10719
|
// UNUSED(t0);
|
10465
10720
|
|
@@ -10649,9 +10904,11 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10649
10904
|
|
10650
10905
|
static void ggml_compute_forward_out_prod_q_f32(
|
10651
10906
|
const struct ggml_compute_params * params,
|
10652
|
-
const struct ggml_tensor * src0,
|
10653
|
-
const struct ggml_tensor * src1,
|
10654
10907
|
struct ggml_tensor * dst) {
|
10908
|
+
|
10909
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
10910
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
10911
|
+
|
10655
10912
|
// int64_t t0 = ggml_perf_time_us();
|
10656
10913
|
// UNUSED(t0);
|
10657
10914
|
|
@@ -10762,9 +11019,10 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10762
11019
|
|
10763
11020
|
static void ggml_compute_forward_out_prod(
|
10764
11021
|
const struct ggml_compute_params * params,
|
10765
|
-
const struct ggml_tensor * src0,
|
10766
|
-
const struct ggml_tensor * src1,
|
10767
11022
|
struct ggml_tensor * dst) {
|
11023
|
+
|
11024
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11025
|
+
|
10768
11026
|
switch (src0->type) {
|
10769
11027
|
case GGML_TYPE_Q4_0:
|
10770
11028
|
case GGML_TYPE_Q4_1:
|
@@ -10779,17 +11037,19 @@ static void ggml_compute_forward_out_prod(
|
|
10779
11037
|
case GGML_TYPE_IQ2_XXS:
|
10780
11038
|
case GGML_TYPE_IQ2_XS:
|
10781
11039
|
case GGML_TYPE_IQ3_XXS:
|
11040
|
+
case GGML_TYPE_IQ1_S:
|
11041
|
+
case GGML_TYPE_IQ4_NL:
|
10782
11042
|
{
|
10783
|
-
ggml_compute_forward_out_prod_q_f32(params,
|
11043
|
+
ggml_compute_forward_out_prod_q_f32(params, dst);
|
10784
11044
|
} break;
|
10785
11045
|
case GGML_TYPE_F16:
|
10786
11046
|
{
|
10787
11047
|
GGML_ASSERT(false); // todo
|
10788
|
-
// ggml_compute_forward_out_prod_f16_f32(params,
|
11048
|
+
// ggml_compute_forward_out_prod_f16_f32(params, dst);
|
10789
11049
|
} break;
|
10790
11050
|
case GGML_TYPE_F32:
|
10791
11051
|
{
|
10792
|
-
ggml_compute_forward_out_prod_f32(params,
|
11052
|
+
ggml_compute_forward_out_prod_f32(params, dst);
|
10793
11053
|
} break;
|
10794
11054
|
default:
|
10795
11055
|
{
|
@@ -10802,8 +11062,10 @@ static void ggml_compute_forward_out_prod(
|
|
10802
11062
|
|
10803
11063
|
static void ggml_compute_forward_scale_f32(
|
10804
11064
|
const struct ggml_compute_params * params,
|
10805
|
-
const struct ggml_tensor * src0,
|
10806
11065
|
struct ggml_tensor * dst) {
|
11066
|
+
|
11067
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11068
|
+
|
10807
11069
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
10808
11070
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
10809
11071
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
@@ -10844,12 +11106,14 @@ static void ggml_compute_forward_scale_f32(
|
|
10844
11106
|
|
10845
11107
|
static void ggml_compute_forward_scale(
|
10846
11108
|
const struct ggml_compute_params * params,
|
10847
|
-
const struct ggml_tensor * src0,
|
10848
11109
|
struct ggml_tensor * dst) {
|
11110
|
+
|
11111
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11112
|
+
|
10849
11113
|
switch (src0->type) {
|
10850
11114
|
case GGML_TYPE_F32:
|
10851
11115
|
{
|
10852
|
-
ggml_compute_forward_scale_f32(params,
|
11116
|
+
ggml_compute_forward_scale_f32(params, dst);
|
10853
11117
|
} break;
|
10854
11118
|
default:
|
10855
11119
|
{
|
@@ -10862,9 +11126,11 @@ static void ggml_compute_forward_scale(
|
|
10862
11126
|
|
10863
11127
|
static void ggml_compute_forward_set_f32(
|
10864
11128
|
const struct ggml_compute_params * params,
|
10865
|
-
const struct ggml_tensor * src0,
|
10866
|
-
const struct ggml_tensor * src1,
|
10867
11129
|
struct ggml_tensor * dst) {
|
11130
|
+
|
11131
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11132
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11133
|
+
|
10868
11134
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10869
11135
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
10870
11136
|
|
@@ -10935,14 +11201,14 @@ static void ggml_compute_forward_set_f32(
|
|
10935
11201
|
|
10936
11202
|
static void ggml_compute_forward_set(
|
10937
11203
|
const struct ggml_compute_params * params,
|
10938
|
-
const struct ggml_tensor * src0,
|
10939
|
-
const struct ggml_tensor * src1,
|
10940
11204
|
struct ggml_tensor * dst) {
|
10941
11205
|
|
11206
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11207
|
+
|
10942
11208
|
switch (src0->type) {
|
10943
11209
|
case GGML_TYPE_F32:
|
10944
11210
|
{
|
10945
|
-
ggml_compute_forward_set_f32(params,
|
11211
|
+
ggml_compute_forward_set_f32(params, dst);
|
10946
11212
|
} break;
|
10947
11213
|
case GGML_TYPE_F16:
|
10948
11214
|
case GGML_TYPE_Q4_0:
|
@@ -10959,6 +11225,8 @@ static void ggml_compute_forward_set(
|
|
10959
11225
|
case GGML_TYPE_IQ2_XXS:
|
10960
11226
|
case GGML_TYPE_IQ2_XS:
|
10961
11227
|
case GGML_TYPE_IQ3_XXS:
|
11228
|
+
case GGML_TYPE_IQ1_S:
|
11229
|
+
case GGML_TYPE_IQ4_NL:
|
10962
11230
|
default:
|
10963
11231
|
{
|
10964
11232
|
GGML_ASSERT(false);
|
@@ -10970,29 +11238,25 @@ static void ggml_compute_forward_set(
|
|
10970
11238
|
|
10971
11239
|
static void ggml_compute_forward_cpy(
|
10972
11240
|
const struct ggml_compute_params * params,
|
10973
|
-
const struct ggml_tensor * src0,
|
10974
11241
|
struct ggml_tensor * dst) {
|
10975
|
-
ggml_compute_forward_dup(params,
|
11242
|
+
ggml_compute_forward_dup(params, dst);
|
10976
11243
|
}
|
10977
11244
|
|
10978
11245
|
// ggml_compute_forward_cont
|
10979
11246
|
|
10980
11247
|
static void ggml_compute_forward_cont(
|
10981
11248
|
const struct ggml_compute_params * params,
|
10982
|
-
const struct ggml_tensor * src0,
|
10983
11249
|
struct ggml_tensor * dst) {
|
10984
|
-
ggml_compute_forward_dup(params,
|
11250
|
+
ggml_compute_forward_dup(params, dst);
|
10985
11251
|
}
|
10986
11252
|
|
10987
11253
|
// ggml_compute_forward_reshape
|
10988
11254
|
|
10989
11255
|
static void ggml_compute_forward_reshape(
|
10990
11256
|
const struct ggml_compute_params * params,
|
10991
|
-
const struct ggml_tensor * src0,
|
10992
11257
|
struct ggml_tensor * dst) {
|
10993
11258
|
// NOP
|
10994
11259
|
UNUSED(params);
|
10995
|
-
UNUSED(src0);
|
10996
11260
|
UNUSED(dst);
|
10997
11261
|
}
|
10998
11262
|
|
@@ -11000,39 +11264,41 @@ static void ggml_compute_forward_reshape(
|
|
11000
11264
|
|
11001
11265
|
static void ggml_compute_forward_view(
|
11002
11266
|
const struct ggml_compute_params * params,
|
11003
|
-
const struct ggml_tensor *
|
11267
|
+
const struct ggml_tensor * dst) {
|
11004
11268
|
// NOP
|
11005
11269
|
UNUSED(params);
|
11006
|
-
UNUSED(
|
11270
|
+
UNUSED(dst);
|
11007
11271
|
}
|
11008
11272
|
|
11009
11273
|
// ggml_compute_forward_permute
|
11010
11274
|
|
11011
11275
|
static void ggml_compute_forward_permute(
|
11012
11276
|
const struct ggml_compute_params * params,
|
11013
|
-
const struct ggml_tensor *
|
11277
|
+
const struct ggml_tensor * dst) {
|
11014
11278
|
// NOP
|
11015
11279
|
UNUSED(params);
|
11016
|
-
UNUSED(
|
11280
|
+
UNUSED(dst);
|
11017
11281
|
}
|
11018
11282
|
|
11019
11283
|
// ggml_compute_forward_transpose
|
11020
11284
|
|
11021
11285
|
static void ggml_compute_forward_transpose(
|
11022
11286
|
const struct ggml_compute_params * params,
|
11023
|
-
const struct ggml_tensor *
|
11287
|
+
const struct ggml_tensor * dst) {
|
11024
11288
|
// NOP
|
11025
11289
|
UNUSED(params);
|
11026
|
-
UNUSED(
|
11290
|
+
UNUSED(dst);
|
11027
11291
|
}
|
11028
11292
|
|
11029
11293
|
// ggml_compute_forward_get_rows
|
11030
11294
|
|
11031
11295
|
static void ggml_compute_forward_get_rows_q(
|
11032
11296
|
const struct ggml_compute_params * params,
|
11033
|
-
const struct ggml_tensor * src0,
|
11034
|
-
const struct ggml_tensor * src1,
|
11035
11297
|
struct ggml_tensor * dst) {
|
11298
|
+
|
11299
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11300
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11301
|
+
|
11036
11302
|
assert(params->ith == 0);
|
11037
11303
|
|
11038
11304
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11068,9 +11334,11 @@ static void ggml_compute_forward_get_rows_q(
|
|
11068
11334
|
|
11069
11335
|
static void ggml_compute_forward_get_rows_f16(
|
11070
11336
|
const struct ggml_compute_params * params,
|
11071
|
-
const struct ggml_tensor * src0,
|
11072
|
-
const struct ggml_tensor * src1,
|
11073
11337
|
struct ggml_tensor * dst) {
|
11338
|
+
|
11339
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11340
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11341
|
+
|
11074
11342
|
assert(params->ith == 0);
|
11075
11343
|
|
11076
11344
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11103,9 +11371,11 @@ static void ggml_compute_forward_get_rows_f16(
|
|
11103
11371
|
|
11104
11372
|
static void ggml_compute_forward_get_rows_f32(
|
11105
11373
|
const struct ggml_compute_params * params,
|
11106
|
-
const struct ggml_tensor * src0,
|
11107
|
-
const struct ggml_tensor * src1,
|
11108
11374
|
struct ggml_tensor * dst) {
|
11375
|
+
|
11376
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11377
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11378
|
+
|
11109
11379
|
assert(params->ith == 0);
|
11110
11380
|
|
11111
11381
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11138,9 +11408,10 @@ static void ggml_compute_forward_get_rows_f32(
|
|
11138
11408
|
|
11139
11409
|
static void ggml_compute_forward_get_rows(
|
11140
11410
|
const struct ggml_compute_params * params,
|
11141
|
-
const struct ggml_tensor * src0,
|
11142
|
-
const struct ggml_tensor * src1,
|
11143
11411
|
struct ggml_tensor * dst) {
|
11412
|
+
|
11413
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11414
|
+
|
11144
11415
|
switch (src0->type) {
|
11145
11416
|
case GGML_TYPE_Q4_0:
|
11146
11417
|
case GGML_TYPE_Q4_1:
|
@@ -11156,17 +11427,19 @@ static void ggml_compute_forward_get_rows(
|
|
11156
11427
|
case GGML_TYPE_IQ2_XXS:
|
11157
11428
|
case GGML_TYPE_IQ2_XS:
|
11158
11429
|
case GGML_TYPE_IQ3_XXS:
|
11430
|
+
case GGML_TYPE_IQ1_S:
|
11431
|
+
case GGML_TYPE_IQ4_NL:
|
11159
11432
|
{
|
11160
|
-
ggml_compute_forward_get_rows_q(params,
|
11433
|
+
ggml_compute_forward_get_rows_q(params, dst);
|
11161
11434
|
} break;
|
11162
11435
|
case GGML_TYPE_F16:
|
11163
11436
|
{
|
11164
|
-
ggml_compute_forward_get_rows_f16(params,
|
11437
|
+
ggml_compute_forward_get_rows_f16(params, dst);
|
11165
11438
|
} break;
|
11166
11439
|
case GGML_TYPE_F32:
|
11167
11440
|
case GGML_TYPE_I32:
|
11168
11441
|
{
|
11169
|
-
ggml_compute_forward_get_rows_f32(params,
|
11442
|
+
ggml_compute_forward_get_rows_f32(params, dst);
|
11170
11443
|
} break;
|
11171
11444
|
default:
|
11172
11445
|
{
|
@@ -11197,9 +11470,11 @@ static void ggml_compute_forward_get_rows(
|
|
11197
11470
|
|
11198
11471
|
static void ggml_compute_forward_get_rows_back_f32_f16(
|
11199
11472
|
const struct ggml_compute_params * params,
|
11200
|
-
const struct ggml_tensor * src0,
|
11201
|
-
const struct ggml_tensor * src1,
|
11202
11473
|
struct ggml_tensor * dst) {
|
11474
|
+
|
11475
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11476
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11477
|
+
|
11203
11478
|
GGML_ASSERT(params->ith == 0);
|
11204
11479
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11205
11480
|
|
@@ -11234,9 +11509,11 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11234
11509
|
|
11235
11510
|
static void ggml_compute_forward_get_rows_back_f32(
|
11236
11511
|
const struct ggml_compute_params * params,
|
11237
|
-
const struct ggml_tensor * src0,
|
11238
|
-
const struct ggml_tensor * src1,
|
11239
11512
|
struct ggml_tensor * dst) {
|
11513
|
+
|
11514
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11515
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11516
|
+
|
11240
11517
|
GGML_ASSERT(params->ith == 0);
|
11241
11518
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11242
11519
|
|
@@ -11271,17 +11548,18 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
11271
11548
|
|
11272
11549
|
static void ggml_compute_forward_get_rows_back(
|
11273
11550
|
const struct ggml_compute_params * params,
|
11274
|
-
const struct ggml_tensor * src0,
|
11275
|
-
const struct ggml_tensor * src1,
|
11276
11551
|
struct ggml_tensor * dst) {
|
11552
|
+
|
11553
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11554
|
+
|
11277
11555
|
switch (src0->type) {
|
11278
11556
|
case GGML_TYPE_F16:
|
11279
11557
|
{
|
11280
|
-
ggml_compute_forward_get_rows_back_f32_f16(params,
|
11558
|
+
ggml_compute_forward_get_rows_back_f32_f16(params, dst);
|
11281
11559
|
} break;
|
11282
11560
|
case GGML_TYPE_F32:
|
11283
11561
|
{
|
11284
|
-
ggml_compute_forward_get_rows_back_f32(params,
|
11562
|
+
ggml_compute_forward_get_rows_back_f32(params, dst);
|
11285
11563
|
} break;
|
11286
11564
|
default:
|
11287
11565
|
{
|
@@ -11312,8 +11590,10 @@ static void ggml_compute_forward_get_rows_back(
|
|
11312
11590
|
|
11313
11591
|
static void ggml_compute_forward_diag_f32(
|
11314
11592
|
const struct ggml_compute_params * params,
|
11315
|
-
const struct ggml_tensor * src0,
|
11316
11593
|
struct ggml_tensor * dst) {
|
11594
|
+
|
11595
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11596
|
+
|
11317
11597
|
GGML_ASSERT(params->ith == 0);
|
11318
11598
|
|
11319
11599
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11352,12 +11632,14 @@ static void ggml_compute_forward_diag_f32(
|
|
11352
11632
|
|
11353
11633
|
static void ggml_compute_forward_diag(
|
11354
11634
|
const struct ggml_compute_params * params,
|
11355
|
-
const struct ggml_tensor * src0,
|
11356
11635
|
struct ggml_tensor * dst) {
|
11636
|
+
|
11637
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11638
|
+
|
11357
11639
|
switch (src0->type) {
|
11358
11640
|
case GGML_TYPE_F32:
|
11359
11641
|
{
|
11360
|
-
ggml_compute_forward_diag_f32(params,
|
11642
|
+
ggml_compute_forward_diag_f32(params, dst);
|
11361
11643
|
} break;
|
11362
11644
|
default:
|
11363
11645
|
{
|
@@ -11370,10 +11652,11 @@ static void ggml_compute_forward_diag(
|
|
11370
11652
|
|
11371
11653
|
static void ggml_compute_forward_diag_mask_f32(
|
11372
11654
|
const struct ggml_compute_params * params,
|
11373
|
-
const struct ggml_tensor * src0,
|
11374
11655
|
struct ggml_tensor * dst,
|
11375
11656
|
const float value) {
|
11376
11657
|
|
11658
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11659
|
+
|
11377
11660
|
const int ith = params->ith;
|
11378
11661
|
const int nth = params->nth;
|
11379
11662
|
|
@@ -11423,12 +11706,14 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11423
11706
|
|
11424
11707
|
static void ggml_compute_forward_diag_mask_inf(
|
11425
11708
|
const struct ggml_compute_params * params,
|
11426
|
-
const struct ggml_tensor * src0,
|
11427
11709
|
struct ggml_tensor * dst) {
|
11710
|
+
|
11711
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11712
|
+
|
11428
11713
|
switch (src0->type) {
|
11429
11714
|
case GGML_TYPE_F32:
|
11430
11715
|
{
|
11431
|
-
ggml_compute_forward_diag_mask_f32(params,
|
11716
|
+
ggml_compute_forward_diag_mask_f32(params, dst, -INFINITY);
|
11432
11717
|
} break;
|
11433
11718
|
default:
|
11434
11719
|
{
|
@@ -11439,12 +11724,14 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11439
11724
|
|
11440
11725
|
static void ggml_compute_forward_diag_mask_zero(
|
11441
11726
|
const struct ggml_compute_params * params,
|
11442
|
-
const struct ggml_tensor * src0,
|
11443
11727
|
struct ggml_tensor * dst) {
|
11728
|
+
|
11729
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11730
|
+
|
11444
11731
|
switch (src0->type) {
|
11445
11732
|
case GGML_TYPE_F32:
|
11446
11733
|
{
|
11447
|
-
ggml_compute_forward_diag_mask_f32(params,
|
11734
|
+
ggml_compute_forward_diag_mask_f32(params, dst, 0);
|
11448
11735
|
} break;
|
11449
11736
|
default:
|
11450
11737
|
{
|
@@ -11457,9 +11744,12 @@ static void ggml_compute_forward_diag_mask_zero(
|
|
11457
11744
|
|
11458
11745
|
static void ggml_compute_forward_soft_max_f32(
|
11459
11746
|
const struct ggml_compute_params * params,
|
11460
|
-
const struct ggml_tensor * src0,
|
11461
|
-
const struct ggml_tensor * src1,
|
11462
11747
|
struct ggml_tensor * dst) {
|
11748
|
+
|
11749
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11750
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11751
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
11752
|
+
|
11463
11753
|
assert(ggml_is_contiguous(dst));
|
11464
11754
|
assert(ggml_are_same_shape(src0, dst));
|
11465
11755
|
|
@@ -11467,16 +11757,29 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11467
11757
|
return;
|
11468
11758
|
}
|
11469
11759
|
|
11470
|
-
float scale
|
11471
|
-
|
11760
|
+
float scale = 1.0f;
|
11761
|
+
float max_bias = 0.0f;
|
11762
|
+
|
11763
|
+
memcpy(&scale, (float *) dst->op_params + 0, sizeof(float));
|
11764
|
+
memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
|
11472
11765
|
|
11473
11766
|
// TODO: handle transposed/permuted matrices
|
11474
11767
|
|
11475
11768
|
const int ith = params->ith;
|
11476
11769
|
const int nth = params->nth;
|
11477
11770
|
|
11771
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
11772
|
+
|
11478
11773
|
const int64_t ne11 = src1 ? src1->ne[1] : 1;
|
11479
11774
|
|
11775
|
+
// TODO: is this supposed to be ceil instead of floor?
|
11776
|
+
// https://huggingface.co/mosaicml/mpt-7b/blob/main/attention.py#L370
|
11777
|
+
const uint32_t n_head_kv = ne02;
|
11778
|
+
const uint32_t n_head_log2 = 1u << (uint32_t) floor(log2(n_head_kv));
|
11779
|
+
|
11780
|
+
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
|
11781
|
+
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
11782
|
+
|
11480
11783
|
const int nc = src0->ne[0];
|
11481
11784
|
const int nr = ggml_nrows(src0);
|
11482
11785
|
|
@@ -11489,6 +11792,9 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11489
11792
|
|
11490
11793
|
float * wp = (float *) params->wdata + (nc + CACHE_LINE_SIZE_F32) * ith;
|
11491
11794
|
|
11795
|
+
// when max_bias <= 0.0f, src2 is not used and we default it to src0 to avoid branching
|
11796
|
+
float * pos = src2 ? (float *) src2->data : src0->data;
|
11797
|
+
|
11492
11798
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
11493
11799
|
float * sp = (float *)((char *) src0->data + i1*src0->nb[1]);
|
11494
11800
|
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
|
@@ -11502,6 +11808,16 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11502
11808
|
ggml_vec_acc_f32(nc, wp, mp);
|
11503
11809
|
}
|
11504
11810
|
|
11811
|
+
// ALiBi bias
|
11812
|
+
if (max_bias > 0.0f) {
|
11813
|
+
const uint32_t h = (i1/ne01)%ne02; // head
|
11814
|
+
const float slope = h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1);
|
11815
|
+
|
11816
|
+
for (int i = 0; i < nc; i++) {
|
11817
|
+
wp[i] = wp[i] + slope*pos[i];
|
11818
|
+
}
|
11819
|
+
}
|
11820
|
+
|
11505
11821
|
#ifndef NDEBUG
|
11506
11822
|
for (int i = 0; i < nc; ++i) {
|
11507
11823
|
//printf("p[%d] = %f\n", i, p[i]);
|
@@ -11544,13 +11860,14 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11544
11860
|
|
11545
11861
|
static void ggml_compute_forward_soft_max(
|
11546
11862
|
const struct ggml_compute_params * params,
|
11547
|
-
const struct ggml_tensor * src0,
|
11548
|
-
const struct ggml_tensor * src1,
|
11549
11863
|
struct ggml_tensor * dst) {
|
11864
|
+
|
11865
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11866
|
+
|
11550
11867
|
switch (src0->type) {
|
11551
11868
|
case GGML_TYPE_F32:
|
11552
11869
|
{
|
11553
|
-
ggml_compute_forward_soft_max_f32(params,
|
11870
|
+
ggml_compute_forward_soft_max_f32(params, dst);
|
11554
11871
|
} break;
|
11555
11872
|
default:
|
11556
11873
|
{
|
@@ -11563,9 +11880,11 @@ static void ggml_compute_forward_soft_max(
|
|
11563
11880
|
|
11564
11881
|
static void ggml_compute_forward_soft_max_back_f32(
|
11565
11882
|
const struct ggml_compute_params * params,
|
11566
|
-
const struct ggml_tensor * src0,
|
11567
|
-
const struct ggml_tensor * src1,
|
11568
11883
|
struct ggml_tensor * dst) {
|
11884
|
+
|
11885
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11886
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
11887
|
+
|
11569
11888
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
11570
11889
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
11571
11890
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
@@ -11640,13 +11959,14 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11640
11959
|
|
11641
11960
|
static void ggml_compute_forward_soft_max_back(
|
11642
11961
|
const struct ggml_compute_params * params,
|
11643
|
-
const struct ggml_tensor * src0,
|
11644
|
-
const struct ggml_tensor * src1,
|
11645
11962
|
struct ggml_tensor * dst) {
|
11963
|
+
|
11964
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11965
|
+
|
11646
11966
|
switch (src0->type) {
|
11647
11967
|
case GGML_TYPE_F32:
|
11648
11968
|
{
|
11649
|
-
ggml_compute_forward_soft_max_back_f32(params,
|
11969
|
+
ggml_compute_forward_soft_max_back_f32(params, dst);
|
11650
11970
|
} break;
|
11651
11971
|
default:
|
11652
11972
|
{
|
@@ -11659,8 +11979,10 @@ static void ggml_compute_forward_soft_max_back(
|
|
11659
11979
|
|
11660
11980
|
static void ggml_compute_forward_alibi_f32(
|
11661
11981
|
const struct ggml_compute_params * params,
|
11662
|
-
const struct ggml_tensor * src0,
|
11663
11982
|
struct ggml_tensor * dst) {
|
11983
|
+
|
11984
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
11985
|
+
|
11664
11986
|
assert(params->ith == 0);
|
11665
11987
|
|
11666
11988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11694,22 +12016,20 @@ static void ggml_compute_forward_alibi_f32(
|
|
11694
12016
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
11695
12017
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
11696
12018
|
|
11697
|
-
for (int64_t
|
11698
|
-
|
11699
|
-
|
11700
|
-
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
11701
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11702
|
-
|
11703
|
-
// TODO: k*nb2 or k*nb3
|
11704
|
-
|
11705
|
-
float m_k;
|
12019
|
+
for (int64_t k = 0; k < ne2_ne3; k++) {
|
12020
|
+
// TODO: k*nb2 or k*nb3
|
12021
|
+
float m_k;
|
11706
12022
|
|
11707
|
-
|
11708
|
-
|
11709
|
-
|
11710
|
-
|
11711
|
-
|
12023
|
+
if (k < n_heads_log2_floor) {
|
12024
|
+
m_k = powf(m0, k + 1);
|
12025
|
+
} else {
|
12026
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
12027
|
+
}
|
11712
12028
|
|
12029
|
+
for (int64_t i = 0; i < ne0; i++) {
|
12030
|
+
for (int64_t j = 0; j < ne1; j++) {
|
12031
|
+
float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
12032
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11713
12033
|
pdst[0] = i * m_k + src[0];
|
11714
12034
|
}
|
11715
12035
|
}
|
@@ -11718,8 +12038,10 @@ static void ggml_compute_forward_alibi_f32(
|
|
11718
12038
|
|
11719
12039
|
static void ggml_compute_forward_alibi_f16(
|
11720
12040
|
const struct ggml_compute_params * params,
|
11721
|
-
const struct ggml_tensor * src0,
|
11722
12041
|
struct ggml_tensor * dst) {
|
12042
|
+
|
12043
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12044
|
+
|
11723
12045
|
assert(params->ith == 0);
|
11724
12046
|
|
11725
12047
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11754,21 +12076,20 @@ static void ggml_compute_forward_alibi_f16(
|
|
11754
12076
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
11755
12077
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
11756
12078
|
|
11757
|
-
for (int
|
11758
|
-
|
11759
|
-
|
11760
|
-
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
11761
|
-
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11762
|
-
|
11763
|
-
// TODO: k*nb2 or k*nb3
|
12079
|
+
for (int k = 0; k < ne2_ne3; k++) {
|
12080
|
+
// TODO: k*nb2 or k*nb3
|
12081
|
+
float m_k;
|
11764
12082
|
|
11765
|
-
|
12083
|
+
if (k < n_heads_log2_floor) {
|
12084
|
+
m_k = powf(m0, k + 1);
|
12085
|
+
} else {
|
12086
|
+
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
12087
|
+
}
|
11766
12088
|
|
11767
|
-
|
11768
|
-
|
11769
|
-
|
11770
|
-
|
11771
|
-
}
|
12089
|
+
for (int i = 0; i < ne0; i++) {
|
12090
|
+
for (int j = 0; j < ne1; j++) {
|
12091
|
+
ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2);
|
12092
|
+
float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2);
|
11772
12093
|
|
11773
12094
|
// we return F32
|
11774
12095
|
pdst[0] = i * m_k + GGML_FP16_TO_FP32(src[0]);
|
@@ -11779,16 +12100,18 @@ static void ggml_compute_forward_alibi_f16(
|
|
11779
12100
|
|
11780
12101
|
static void ggml_compute_forward_alibi(
|
11781
12102
|
const struct ggml_compute_params * params,
|
11782
|
-
const struct ggml_tensor * src0,
|
11783
12103
|
struct ggml_tensor * dst) {
|
12104
|
+
|
12105
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12106
|
+
|
11784
12107
|
switch (src0->type) {
|
11785
12108
|
case GGML_TYPE_F16:
|
11786
12109
|
{
|
11787
|
-
ggml_compute_forward_alibi_f16(params,
|
12110
|
+
ggml_compute_forward_alibi_f16(params, dst);
|
11788
12111
|
} break;
|
11789
12112
|
case GGML_TYPE_F32:
|
11790
12113
|
{
|
11791
|
-
ggml_compute_forward_alibi_f32(params,
|
12114
|
+
ggml_compute_forward_alibi_f32(params, dst);
|
11792
12115
|
} break;
|
11793
12116
|
case GGML_TYPE_Q4_0:
|
11794
12117
|
case GGML_TYPE_Q4_1:
|
@@ -11804,6 +12127,8 @@ static void ggml_compute_forward_alibi(
|
|
11804
12127
|
case GGML_TYPE_IQ2_XXS:
|
11805
12128
|
case GGML_TYPE_IQ2_XS:
|
11806
12129
|
case GGML_TYPE_IQ3_XXS:
|
12130
|
+
case GGML_TYPE_IQ1_S:
|
12131
|
+
case GGML_TYPE_IQ4_NL:
|
11807
12132
|
case GGML_TYPE_Q8_K:
|
11808
12133
|
case GGML_TYPE_I8:
|
11809
12134
|
case GGML_TYPE_I16:
|
@@ -11819,8 +12144,10 @@ static void ggml_compute_forward_alibi(
|
|
11819
12144
|
|
11820
12145
|
static void ggml_compute_forward_clamp_f32(
|
11821
12146
|
const struct ggml_compute_params * params,
|
11822
|
-
const struct ggml_tensor * src0,
|
11823
12147
|
struct ggml_tensor * dst) {
|
12148
|
+
|
12149
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12150
|
+
|
11824
12151
|
assert(params->ith == 0);
|
11825
12152
|
|
11826
12153
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -11859,12 +12186,14 @@ static void ggml_compute_forward_clamp_f32(
|
|
11859
12186
|
|
11860
12187
|
static void ggml_compute_forward_clamp(
|
11861
12188
|
const struct ggml_compute_params * params,
|
11862
|
-
const struct ggml_tensor * src0,
|
11863
12189
|
struct ggml_tensor * dst) {
|
12190
|
+
|
12191
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12192
|
+
|
11864
12193
|
switch (src0->type) {
|
11865
12194
|
case GGML_TYPE_F32:
|
11866
12195
|
{
|
11867
|
-
ggml_compute_forward_clamp_f32(params,
|
12196
|
+
ggml_compute_forward_clamp_f32(params, dst);
|
11868
12197
|
} break;
|
11869
12198
|
case GGML_TYPE_F16:
|
11870
12199
|
case GGML_TYPE_Q4_0:
|
@@ -11881,6 +12210,8 @@ static void ggml_compute_forward_clamp(
|
|
11881
12210
|
case GGML_TYPE_IQ2_XXS:
|
11882
12211
|
case GGML_TYPE_IQ2_XS:
|
11883
12212
|
case GGML_TYPE_IQ3_XXS:
|
12213
|
+
case GGML_TYPE_IQ1_S:
|
12214
|
+
case GGML_TYPE_IQ4_NL:
|
11884
12215
|
case GGML_TYPE_Q8_K:
|
11885
12216
|
case GGML_TYPE_I8:
|
11886
12217
|
case GGML_TYPE_I16:
|
@@ -11952,10 +12283,12 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
11952
12283
|
|
11953
12284
|
static void ggml_compute_forward_rope_f32(
|
11954
12285
|
const struct ggml_compute_params * params,
|
11955
|
-
const struct ggml_tensor * src0,
|
11956
|
-
const struct ggml_tensor * src1,
|
11957
12286
|
struct ggml_tensor * dst,
|
11958
12287
|
const bool forward) {
|
12288
|
+
|
12289
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12290
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12291
|
+
|
11959
12292
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11960
12293
|
return;
|
11961
12294
|
}
|
@@ -12128,10 +12461,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12128
12461
|
|
12129
12462
|
static void ggml_compute_forward_rope_f16(
|
12130
12463
|
const struct ggml_compute_params * params,
|
12131
|
-
const struct ggml_tensor * src0,
|
12132
|
-
const struct ggml_tensor * src1,
|
12133
12464
|
struct ggml_tensor * dst,
|
12134
12465
|
const bool forward) {
|
12466
|
+
|
12467
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12468
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12469
|
+
|
12135
12470
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12136
12471
|
return;
|
12137
12472
|
}
|
@@ -12293,17 +12628,18 @@ static void ggml_compute_forward_rope_f16(
|
|
12293
12628
|
|
12294
12629
|
static void ggml_compute_forward_rope(
|
12295
12630
|
const struct ggml_compute_params * params,
|
12296
|
-
const struct ggml_tensor * src0,
|
12297
|
-
const struct ggml_tensor * src1,
|
12298
12631
|
struct ggml_tensor * dst) {
|
12632
|
+
|
12633
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12634
|
+
|
12299
12635
|
switch (src0->type) {
|
12300
12636
|
case GGML_TYPE_F16:
|
12301
12637
|
{
|
12302
|
-
ggml_compute_forward_rope_f16(params,
|
12638
|
+
ggml_compute_forward_rope_f16(params, dst, true);
|
12303
12639
|
} break;
|
12304
12640
|
case GGML_TYPE_F32:
|
12305
12641
|
{
|
12306
|
-
ggml_compute_forward_rope_f32(params,
|
12642
|
+
ggml_compute_forward_rope_f32(params, dst, true);
|
12307
12643
|
} break;
|
12308
12644
|
default:
|
12309
12645
|
{
|
@@ -12316,17 +12652,18 @@ static void ggml_compute_forward_rope(
|
|
12316
12652
|
|
12317
12653
|
static void ggml_compute_forward_rope_back(
|
12318
12654
|
const struct ggml_compute_params * params,
|
12319
|
-
const struct ggml_tensor * src0,
|
12320
|
-
const struct ggml_tensor * src1,
|
12321
12655
|
struct ggml_tensor * dst) {
|
12656
|
+
|
12657
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12658
|
+
|
12322
12659
|
switch (src0->type) {
|
12323
12660
|
case GGML_TYPE_F16:
|
12324
12661
|
{
|
12325
|
-
ggml_compute_forward_rope_f16(params,
|
12662
|
+
ggml_compute_forward_rope_f16(params, dst, false);
|
12326
12663
|
} break;
|
12327
12664
|
case GGML_TYPE_F32:
|
12328
12665
|
{
|
12329
|
-
ggml_compute_forward_rope_f32(params,
|
12666
|
+
ggml_compute_forward_rope_f32(params, dst, false);
|
12330
12667
|
} break;
|
12331
12668
|
default:
|
12332
12669
|
{
|
@@ -12339,9 +12676,11 @@ static void ggml_compute_forward_rope_back(
|
|
12339
12676
|
|
12340
12677
|
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
12341
12678
|
const struct ggml_compute_params * params,
|
12342
|
-
const struct ggml_tensor * src0,
|
12343
|
-
const struct ggml_tensor * src1,
|
12344
12679
|
struct ggml_tensor * dst) {
|
12680
|
+
|
12681
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12682
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12683
|
+
|
12345
12684
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12346
12685
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12347
12686
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12436,9 +12775,11 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12436
12775
|
|
12437
12776
|
static void ggml_compute_forward_conv_transpose_1d_f32(
|
12438
12777
|
const struct ggml_compute_params * params,
|
12439
|
-
const struct ggml_tensor * src0,
|
12440
|
-
const struct ggml_tensor * src1,
|
12441
12778
|
struct ggml_tensor * dst) {
|
12779
|
+
|
12780
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12781
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12782
|
+
|
12442
12783
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
12443
12784
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12444
12785
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12533,17 +12874,18 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12533
12874
|
|
12534
12875
|
static void ggml_compute_forward_conv_transpose_1d(
|
12535
12876
|
const struct ggml_compute_params * params,
|
12536
|
-
const struct ggml_tensor * src0,
|
12537
|
-
const struct ggml_tensor * src1,
|
12538
12877
|
struct ggml_tensor * dst) {
|
12878
|
+
|
12879
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12880
|
+
|
12539
12881
|
switch (src0->type) {
|
12540
12882
|
case GGML_TYPE_F16:
|
12541
12883
|
{
|
12542
|
-
ggml_compute_forward_conv_transpose_1d_f16_f32(params,
|
12884
|
+
ggml_compute_forward_conv_transpose_1d_f16_f32(params, dst);
|
12543
12885
|
} break;
|
12544
12886
|
case GGML_TYPE_F32:
|
12545
12887
|
{
|
12546
|
-
ggml_compute_forward_conv_transpose_1d_f32(params,
|
12888
|
+
ggml_compute_forward_conv_transpose_1d_f32(params, dst);
|
12547
12889
|
} break;
|
12548
12890
|
default:
|
12549
12891
|
{
|
@@ -12557,9 +12899,11 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
12557
12899
|
// dst: result [N, OH, OW, IC*KH*KW]
|
12558
12900
|
static void ggml_compute_forward_im2col_f32(
|
12559
12901
|
const struct ggml_compute_params * params,
|
12560
|
-
const struct ggml_tensor * src0,
|
12561
|
-
const struct ggml_tensor * src1,
|
12562
12902
|
struct ggml_tensor * dst) {
|
12903
|
+
|
12904
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12905
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12906
|
+
|
12563
12907
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12564
12908
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12565
12909
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12643,9 +12987,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
12643
12987
|
// dst: result [N, OH, OW, IC*KH*KW]
|
12644
12988
|
static void ggml_compute_forward_im2col_f16(
|
12645
12989
|
const struct ggml_compute_params * params,
|
12646
|
-
const struct ggml_tensor * src0,
|
12647
|
-
const struct ggml_tensor * src1,
|
12648
12990
|
struct ggml_tensor * dst) {
|
12991
|
+
|
12992
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
12993
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
12994
|
+
|
12649
12995
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12650
12996
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12651
12997
|
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
@@ -12725,17 +13071,15 @@ static void ggml_compute_forward_im2col_f16(
|
|
12725
13071
|
|
12726
13072
|
static void ggml_compute_forward_im2col(
|
12727
13073
|
const struct ggml_compute_params * params,
|
12728
|
-
const struct ggml_tensor * src0,
|
12729
|
-
const struct ggml_tensor * src1,
|
12730
13074
|
struct ggml_tensor * dst) {
|
12731
13075
|
switch (dst->type) {
|
12732
13076
|
case GGML_TYPE_F16:
|
12733
13077
|
{
|
12734
|
-
ggml_compute_forward_im2col_f16(params,
|
13078
|
+
ggml_compute_forward_im2col_f16(params, dst);
|
12735
13079
|
} break;
|
12736
13080
|
case GGML_TYPE_F32:
|
12737
13081
|
{
|
12738
|
-
ggml_compute_forward_im2col_f32(params,
|
13082
|
+
ggml_compute_forward_im2col_f32(params, dst);
|
12739
13083
|
} break;
|
12740
13084
|
default:
|
12741
13085
|
{
|
@@ -12749,9 +13093,11 @@ static void ggml_compute_forward_im2col(
|
|
12749
13093
|
|
12750
13094
|
static void ggml_compute_forward_conv_transpose_2d(
|
12751
13095
|
const struct ggml_compute_params * params,
|
12752
|
-
const struct ggml_tensor * src0,
|
12753
|
-
const struct ggml_tensor * src1,
|
12754
13096
|
struct ggml_tensor * dst) {
|
13097
|
+
|
13098
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13099
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
13100
|
+
|
12755
13101
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12756
13102
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12757
13103
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
@@ -12855,9 +13201,11 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
12855
13201
|
static void ggml_compute_forward_pool_1d_sk_p0(
|
12856
13202
|
const struct ggml_compute_params * params,
|
12857
13203
|
const enum ggml_op_pool op,
|
12858
|
-
const struct ggml_tensor * src,
|
12859
13204
|
const int k,
|
12860
13205
|
struct ggml_tensor * dst) {
|
13206
|
+
|
13207
|
+
const struct ggml_tensor * src = dst->src[0];
|
13208
|
+
|
12861
13209
|
assert(src->type == GGML_TYPE_F32);
|
12862
13210
|
assert(params->ith == 0);
|
12863
13211
|
|
@@ -12906,7 +13254,6 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
12906
13254
|
|
12907
13255
|
static void ggml_compute_forward_pool_1d(
|
12908
13256
|
const struct ggml_compute_params * params,
|
12909
|
-
const struct ggml_tensor * src0,
|
12910
13257
|
struct ggml_tensor * dst) {
|
12911
13258
|
|
12912
13259
|
const int32_t * opts = (const int32_t *)dst->op_params;
|
@@ -12917,15 +13264,17 @@ static void ggml_compute_forward_pool_1d(
|
|
12917
13264
|
GGML_ASSERT(p0 == 0); // padding not supported
|
12918
13265
|
GGML_ASSERT(k0 == s0); // only s = k supported
|
12919
13266
|
|
12920
|
-
ggml_compute_forward_pool_1d_sk_p0(params, op,
|
13267
|
+
ggml_compute_forward_pool_1d_sk_p0(params, op, k0, dst);
|
12921
13268
|
}
|
12922
13269
|
|
12923
13270
|
// ggml_compute_forward_pool_2d
|
12924
13271
|
|
12925
13272
|
static void ggml_compute_forward_pool_2d(
|
12926
13273
|
const struct ggml_compute_params * params,
|
12927
|
-
const struct ggml_tensor * src,
|
12928
13274
|
struct ggml_tensor * dst) {
|
13275
|
+
|
13276
|
+
const struct ggml_tensor * src = dst->src[0];
|
13277
|
+
|
12929
13278
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
12930
13279
|
GGML_ASSERT(params->ith == 0);
|
12931
13280
|
|
@@ -12998,9 +13347,10 @@ static void ggml_compute_forward_pool_2d(
|
|
12998
13347
|
|
12999
13348
|
static void ggml_compute_forward_upscale_f32(
|
13000
13349
|
const struct ggml_compute_params * params,
|
13001
|
-
const struct ggml_tensor * src0,
|
13002
13350
|
struct ggml_tensor * dst) {
|
13003
13351
|
|
13352
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13353
|
+
|
13004
13354
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13005
13355
|
return;
|
13006
13356
|
}
|
@@ -13037,12 +13387,14 @@ static void ggml_compute_forward_upscale_f32(
|
|
13037
13387
|
|
13038
13388
|
static void ggml_compute_forward_upscale(
|
13039
13389
|
const struct ggml_compute_params * params,
|
13040
|
-
const struct ggml_tensor * src0,
|
13041
13390
|
struct ggml_tensor * dst) {
|
13391
|
+
|
13392
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13393
|
+
|
13042
13394
|
switch (src0->type) {
|
13043
13395
|
case GGML_TYPE_F32:
|
13044
13396
|
{
|
13045
|
-
ggml_compute_forward_upscale_f32(params,
|
13397
|
+
ggml_compute_forward_upscale_f32(params, dst);
|
13046
13398
|
} break;
|
13047
13399
|
default:
|
13048
13400
|
{
|
@@ -13055,9 +13407,10 @@ static void ggml_compute_forward_upscale(
|
|
13055
13407
|
|
13056
13408
|
static void ggml_compute_forward_pad_f32(
|
13057
13409
|
const struct ggml_compute_params * params,
|
13058
|
-
const struct ggml_tensor * src0,
|
13059
13410
|
struct ggml_tensor * dst) {
|
13060
13411
|
|
13412
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13413
|
+
|
13061
13414
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13062
13415
|
return;
|
13063
13416
|
}
|
@@ -13095,12 +13448,14 @@ static void ggml_compute_forward_pad_f32(
|
|
13095
13448
|
|
13096
13449
|
static void ggml_compute_forward_pad(
|
13097
13450
|
const struct ggml_compute_params * params,
|
13098
|
-
const struct ggml_tensor * src0,
|
13099
13451
|
struct ggml_tensor * dst) {
|
13452
|
+
|
13453
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13454
|
+
|
13100
13455
|
switch (src0->type) {
|
13101
13456
|
case GGML_TYPE_F32:
|
13102
13457
|
{
|
13103
|
-
ggml_compute_forward_pad_f32(params,
|
13458
|
+
ggml_compute_forward_pad_f32(params, dst);
|
13104
13459
|
} break;
|
13105
13460
|
default:
|
13106
13461
|
{
|
@@ -13113,9 +13468,10 @@ static void ggml_compute_forward_pad(
|
|
13113
13468
|
|
13114
13469
|
static void ggml_compute_forward_argsort_f32(
|
13115
13470
|
const struct ggml_compute_params * params,
|
13116
|
-
const struct ggml_tensor * src0,
|
13117
13471
|
struct ggml_tensor * dst) {
|
13118
13472
|
|
13473
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13474
|
+
|
13119
13475
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
13120
13476
|
return;
|
13121
13477
|
}
|
@@ -13155,13 +13511,14 @@ static void ggml_compute_forward_argsort_f32(
|
|
13155
13511
|
|
13156
13512
|
static void ggml_compute_forward_argsort(
|
13157
13513
|
const struct ggml_compute_params * params,
|
13158
|
-
const struct ggml_tensor * src0,
|
13159
13514
|
struct ggml_tensor * dst) {
|
13160
13515
|
|
13516
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13517
|
+
|
13161
13518
|
switch (src0->type) {
|
13162
13519
|
case GGML_TYPE_F32:
|
13163
13520
|
{
|
13164
|
-
ggml_compute_forward_argsort_f32(params,
|
13521
|
+
ggml_compute_forward_argsort_f32(params, dst);
|
13165
13522
|
} break;
|
13166
13523
|
default:
|
13167
13524
|
{
|
@@ -13174,11 +13531,13 @@ static void ggml_compute_forward_argsort(
|
|
13174
13531
|
|
13175
13532
|
static void ggml_compute_forward_flash_attn_f32(
|
13176
13533
|
const struct ggml_compute_params * params,
|
13177
|
-
const struct ggml_tensor * q,
|
13178
|
-
const struct ggml_tensor * k,
|
13179
|
-
const struct ggml_tensor * v,
|
13180
13534
|
const bool masked,
|
13181
13535
|
struct ggml_tensor * dst) {
|
13536
|
+
|
13537
|
+
const struct ggml_tensor * q = dst->src[0];
|
13538
|
+
const struct ggml_tensor * k = dst->src[1];
|
13539
|
+
const struct ggml_tensor * v = dst->src[2];
|
13540
|
+
|
13182
13541
|
int64_t t0 = ggml_perf_time_us();
|
13183
13542
|
UNUSED(t0);
|
13184
13543
|
|
@@ -13364,11 +13723,13 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13364
13723
|
|
13365
13724
|
static void ggml_compute_forward_flash_attn_f16(
|
13366
13725
|
const struct ggml_compute_params * params,
|
13367
|
-
const struct ggml_tensor * q,
|
13368
|
-
const struct ggml_tensor * k,
|
13369
|
-
const struct ggml_tensor * v,
|
13370
13726
|
const bool masked,
|
13371
13727
|
struct ggml_tensor * dst) {
|
13728
|
+
|
13729
|
+
const struct ggml_tensor * q = dst->src[0];
|
13730
|
+
const struct ggml_tensor * k = dst->src[1];
|
13731
|
+
const struct ggml_tensor * v = dst->src[2];
|
13732
|
+
|
13372
13733
|
int64_t t0 = ggml_perf_time_us();
|
13373
13734
|
UNUSED(t0);
|
13374
13735
|
|
@@ -13590,19 +13951,19 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13590
13951
|
|
13591
13952
|
static void ggml_compute_forward_flash_attn(
|
13592
13953
|
const struct ggml_compute_params * params,
|
13593
|
-
const struct ggml_tensor * q,
|
13594
|
-
const struct ggml_tensor * k,
|
13595
|
-
const struct ggml_tensor * v,
|
13596
13954
|
const bool masked,
|
13597
13955
|
struct ggml_tensor * dst) {
|
13956
|
+
|
13957
|
+
const struct ggml_tensor * q = dst->src[0];
|
13958
|
+
|
13598
13959
|
switch (q->type) {
|
13599
13960
|
case GGML_TYPE_F16:
|
13600
13961
|
{
|
13601
|
-
ggml_compute_forward_flash_attn_f16(params,
|
13962
|
+
ggml_compute_forward_flash_attn_f16(params, masked, dst);
|
13602
13963
|
} break;
|
13603
13964
|
case GGML_TYPE_F32:
|
13604
13965
|
{
|
13605
|
-
ggml_compute_forward_flash_attn_f32(params,
|
13966
|
+
ggml_compute_forward_flash_attn_f32(params, masked, dst);
|
13606
13967
|
} break;
|
13607
13968
|
default:
|
13608
13969
|
{
|
@@ -13615,12 +13976,14 @@ static void ggml_compute_forward_flash_attn(
|
|
13615
13976
|
|
13616
13977
|
static void ggml_compute_forward_flash_ff_f16(
|
13617
13978
|
const struct ggml_compute_params * params,
|
13618
|
-
const struct ggml_tensor * a, // F16
|
13619
|
-
const struct ggml_tensor * b0, // F16 fc_w
|
13620
|
-
const struct ggml_tensor * b1, // F32 fc_b
|
13621
|
-
const struct ggml_tensor * c0, // F16 proj_w
|
13622
|
-
const struct ggml_tensor * c1, // F32 proj_b
|
13623
13979
|
struct ggml_tensor * dst) {
|
13980
|
+
|
13981
|
+
const struct ggml_tensor * a = dst->src[0]; // F16
|
13982
|
+
const struct ggml_tensor * b0 = dst->src[1]; // F16 fc_w
|
13983
|
+
const struct ggml_tensor * b1 = dst->src[2]; // F32 fc_b
|
13984
|
+
const struct ggml_tensor * c0 = dst->src[3]; // F16 proj_w
|
13985
|
+
const struct ggml_tensor * c1 = dst->src[4]; // F32 proj_b
|
13986
|
+
|
13624
13987
|
int64_t t0 = ggml_perf_time_us();
|
13625
13988
|
UNUSED(t0);
|
13626
13989
|
|
@@ -13748,16 +14111,14 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
13748
14111
|
|
13749
14112
|
static void ggml_compute_forward_flash_ff(
|
13750
14113
|
const struct ggml_compute_params * params,
|
13751
|
-
const struct ggml_tensor * a,
|
13752
|
-
const struct ggml_tensor * b0,
|
13753
|
-
const struct ggml_tensor * b1,
|
13754
|
-
const struct ggml_tensor * c0,
|
13755
|
-
const struct ggml_tensor * c1,
|
13756
14114
|
struct ggml_tensor * dst) {
|
14115
|
+
|
14116
|
+
const struct ggml_tensor * b0 = dst->src[1];
|
14117
|
+
|
13757
14118
|
switch (b0->type) {
|
13758
14119
|
case GGML_TYPE_F16:
|
13759
14120
|
{
|
13760
|
-
ggml_compute_forward_flash_ff_f16(params,
|
14121
|
+
ggml_compute_forward_flash_ff_f16(params, dst);
|
13761
14122
|
} break;
|
13762
14123
|
case GGML_TYPE_F32:
|
13763
14124
|
{
|
@@ -13774,12 +14135,14 @@ static void ggml_compute_forward_flash_ff(
|
|
13774
14135
|
|
13775
14136
|
static void ggml_compute_forward_flash_attn_back_f32(
|
13776
14137
|
const struct ggml_compute_params * params,
|
13777
|
-
const struct ggml_tensor * q,
|
13778
|
-
const struct ggml_tensor * k,
|
13779
|
-
const struct ggml_tensor * v,
|
13780
|
-
const struct ggml_tensor * d,
|
13781
14138
|
const bool masked,
|
13782
14139
|
struct ggml_tensor * dst) {
|
14140
|
+
|
14141
|
+
const struct ggml_tensor * q = dst->src[0];
|
14142
|
+
const struct ggml_tensor * k = dst->src[1];
|
14143
|
+
const struct ggml_tensor * v = dst->src[2];
|
14144
|
+
const struct ggml_tensor * d = dst->src[3];
|
14145
|
+
|
13783
14146
|
int64_t t0 = ggml_perf_time_us();
|
13784
14147
|
UNUSED(t0);
|
13785
14148
|
|
@@ -14127,16 +14490,15 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14127
14490
|
|
14128
14491
|
static void ggml_compute_forward_flash_attn_back(
|
14129
14492
|
const struct ggml_compute_params * params,
|
14130
|
-
const struct ggml_tensor * q,
|
14131
|
-
const struct ggml_tensor * k,
|
14132
|
-
const struct ggml_tensor * v,
|
14133
|
-
const struct ggml_tensor * d,
|
14134
14493
|
const bool masked,
|
14135
14494
|
struct ggml_tensor * dst) {
|
14495
|
+
|
14496
|
+
const struct ggml_tensor * q = dst->src[0];
|
14497
|
+
|
14136
14498
|
switch (q->type) {
|
14137
14499
|
case GGML_TYPE_F32:
|
14138
14500
|
{
|
14139
|
-
ggml_compute_forward_flash_attn_back_f32(params,
|
14501
|
+
ggml_compute_forward_flash_attn_back_f32(params, masked, dst);
|
14140
14502
|
} break;
|
14141
14503
|
default:
|
14142
14504
|
{
|
@@ -14149,8 +14511,10 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14149
14511
|
|
14150
14512
|
static void ggml_compute_forward_win_part_f32(
|
14151
14513
|
const struct ggml_compute_params * params,
|
14152
|
-
const struct ggml_tensor * src0,
|
14153
14514
|
struct ggml_tensor * dst) {
|
14515
|
+
|
14516
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14517
|
+
|
14154
14518
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14155
14519
|
return;
|
14156
14520
|
}
|
@@ -14193,12 +14557,14 @@ static void ggml_compute_forward_win_part_f32(
|
|
14193
14557
|
|
14194
14558
|
static void ggml_compute_forward_win_part(
|
14195
14559
|
const struct ggml_compute_params * params,
|
14196
|
-
const struct ggml_tensor * src0,
|
14197
14560
|
struct ggml_tensor * dst) {
|
14561
|
+
|
14562
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14563
|
+
|
14198
14564
|
switch (src0->type) {
|
14199
14565
|
case GGML_TYPE_F32:
|
14200
14566
|
{
|
14201
|
-
ggml_compute_forward_win_part_f32(params,
|
14567
|
+
ggml_compute_forward_win_part_f32(params, dst);
|
14202
14568
|
} break;
|
14203
14569
|
default:
|
14204
14570
|
{
|
@@ -14211,8 +14577,10 @@ static void ggml_compute_forward_win_part(
|
|
14211
14577
|
|
14212
14578
|
static void ggml_compute_forward_win_unpart_f32(
|
14213
14579
|
const struct ggml_compute_params * params,
|
14214
|
-
const struct ggml_tensor * src0,
|
14215
14580
|
struct ggml_tensor * dst) {
|
14581
|
+
|
14582
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14583
|
+
|
14216
14584
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14217
14585
|
return;
|
14218
14586
|
}
|
@@ -14253,12 +14621,14 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14253
14621
|
|
14254
14622
|
static void ggml_compute_forward_win_unpart(
|
14255
14623
|
const struct ggml_compute_params * params,
|
14256
|
-
const struct ggml_tensor * src0,
|
14257
14624
|
struct ggml_tensor * dst) {
|
14625
|
+
|
14626
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14627
|
+
|
14258
14628
|
switch (src0->type) {
|
14259
14629
|
case GGML_TYPE_F32:
|
14260
14630
|
{
|
14261
|
-
ggml_compute_forward_win_unpart_f32(params,
|
14631
|
+
ggml_compute_forward_win_unpart_f32(params, dst);
|
14262
14632
|
} break;
|
14263
14633
|
default:
|
14264
14634
|
{
|
@@ -14271,58 +14641,58 @@ static void ggml_compute_forward_win_unpart(
|
|
14271
14641
|
|
14272
14642
|
static void ggml_compute_forward_unary(
|
14273
14643
|
const struct ggml_compute_params * params,
|
14274
|
-
const struct ggml_tensor * src0,
|
14275
14644
|
struct ggml_tensor * dst) {
|
14645
|
+
|
14276
14646
|
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14277
14647
|
|
14278
14648
|
switch (op) {
|
14279
14649
|
case GGML_UNARY_OP_ABS:
|
14280
14650
|
{
|
14281
|
-
ggml_compute_forward_abs(params,
|
14651
|
+
ggml_compute_forward_abs(params, dst);
|
14282
14652
|
} break;
|
14283
14653
|
case GGML_UNARY_OP_SGN:
|
14284
14654
|
{
|
14285
|
-
ggml_compute_forward_sgn(params,
|
14655
|
+
ggml_compute_forward_sgn(params, dst);
|
14286
14656
|
} break;
|
14287
14657
|
case GGML_UNARY_OP_NEG:
|
14288
14658
|
{
|
14289
|
-
ggml_compute_forward_neg(params,
|
14659
|
+
ggml_compute_forward_neg(params, dst);
|
14290
14660
|
} break;
|
14291
14661
|
case GGML_UNARY_OP_STEP:
|
14292
14662
|
{
|
14293
|
-
ggml_compute_forward_step(params,
|
14663
|
+
ggml_compute_forward_step(params, dst);
|
14294
14664
|
} break;
|
14295
14665
|
case GGML_UNARY_OP_TANH:
|
14296
14666
|
{
|
14297
|
-
ggml_compute_forward_tanh(params,
|
14667
|
+
ggml_compute_forward_tanh(params, dst);
|
14298
14668
|
} break;
|
14299
14669
|
case GGML_UNARY_OP_ELU:
|
14300
14670
|
{
|
14301
|
-
ggml_compute_forward_elu(params,
|
14671
|
+
ggml_compute_forward_elu(params, dst);
|
14302
14672
|
} break;
|
14303
14673
|
case GGML_UNARY_OP_RELU:
|
14304
14674
|
{
|
14305
|
-
ggml_compute_forward_relu(params,
|
14675
|
+
ggml_compute_forward_relu(params, dst);
|
14306
14676
|
} break;
|
14307
14677
|
case GGML_UNARY_OP_GELU:
|
14308
14678
|
{
|
14309
|
-
ggml_compute_forward_gelu(params,
|
14679
|
+
ggml_compute_forward_gelu(params, dst);
|
14310
14680
|
} break;
|
14311
14681
|
case GGML_UNARY_OP_GELU_QUICK:
|
14312
14682
|
{
|
14313
|
-
ggml_compute_forward_gelu_quick(params,
|
14683
|
+
ggml_compute_forward_gelu_quick(params, dst);
|
14314
14684
|
} break;
|
14315
14685
|
case GGML_UNARY_OP_SILU:
|
14316
14686
|
{
|
14317
|
-
ggml_compute_forward_silu(params,
|
14687
|
+
ggml_compute_forward_silu(params, dst);
|
14318
14688
|
} break;
|
14319
14689
|
case GGML_UNARY_OP_HARDSWISH:
|
14320
14690
|
{
|
14321
|
-
ggml_compute_forward_hardswish(params,
|
14691
|
+
ggml_compute_forward_hardswish(params, dst);
|
14322
14692
|
} break;
|
14323
14693
|
case GGML_UNARY_OP_HARDSIGMOID:
|
14324
14694
|
{
|
14325
|
-
ggml_compute_forward_hardsigmoid(params,
|
14695
|
+
ggml_compute_forward_hardsigmoid(params, dst);
|
14326
14696
|
} break;
|
14327
14697
|
default:
|
14328
14698
|
{
|
@@ -14335,8 +14705,10 @@ static void ggml_compute_forward_unary(
|
|
14335
14705
|
|
14336
14706
|
static void ggml_compute_forward_get_rel_pos_f16(
|
14337
14707
|
const struct ggml_compute_params * params,
|
14338
|
-
const struct ggml_tensor * src0,
|
14339
14708
|
struct ggml_tensor * dst) {
|
14709
|
+
|
14710
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14711
|
+
|
14340
14712
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14341
14713
|
return;
|
14342
14714
|
}
|
@@ -14362,12 +14734,14 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
14362
14734
|
|
14363
14735
|
static void ggml_compute_forward_get_rel_pos(
|
14364
14736
|
const struct ggml_compute_params * params,
|
14365
|
-
const struct ggml_tensor * src0,
|
14366
14737
|
struct ggml_tensor * dst) {
|
14738
|
+
|
14739
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14740
|
+
|
14367
14741
|
switch (src0->type) {
|
14368
14742
|
case GGML_TYPE_F16:
|
14369
14743
|
{
|
14370
|
-
ggml_compute_forward_get_rel_pos_f16(params,
|
14744
|
+
ggml_compute_forward_get_rel_pos_f16(params, dst);
|
14371
14745
|
} break;
|
14372
14746
|
default:
|
14373
14747
|
{
|
@@ -14380,11 +14754,12 @@ static void ggml_compute_forward_get_rel_pos(
|
|
14380
14754
|
|
14381
14755
|
static void ggml_compute_forward_add_rel_pos_f32(
|
14382
14756
|
const struct ggml_compute_params * params,
|
14383
|
-
const struct ggml_tensor * src0,
|
14384
|
-
const struct ggml_tensor * src1,
|
14385
|
-
const struct ggml_tensor * src2,
|
14386
14757
|
struct ggml_tensor * dst) {
|
14387
14758
|
|
14759
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14760
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
14761
|
+
const struct ggml_tensor * src2 = dst->src[2];
|
14762
|
+
|
14388
14763
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
14389
14764
|
if (!inplace && params->type == GGML_TASK_INIT) {
|
14390
14765
|
if (params->ith != 0) {
|
@@ -14448,14 +14823,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
14448
14823
|
|
14449
14824
|
static void ggml_compute_forward_add_rel_pos(
|
14450
14825
|
const struct ggml_compute_params * params,
|
14451
|
-
const struct ggml_tensor * src0,
|
14452
|
-
const struct ggml_tensor * src1,
|
14453
|
-
const struct ggml_tensor * src2,
|
14454
14826
|
struct ggml_tensor * dst) {
|
14827
|
+
|
14828
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14829
|
+
|
14455
14830
|
switch (src0->type) {
|
14456
14831
|
case GGML_TYPE_F32:
|
14457
14832
|
{
|
14458
|
-
ggml_compute_forward_add_rel_pos_f32(params,
|
14833
|
+
ggml_compute_forward_add_rel_pos_f32(params, dst);
|
14459
14834
|
} break;
|
14460
14835
|
default:
|
14461
14836
|
{
|
@@ -14468,9 +14843,11 @@ static void ggml_compute_forward_add_rel_pos(
|
|
14468
14843
|
|
14469
14844
|
static void ggml_compute_forward_map_unary_f32(
|
14470
14845
|
const struct ggml_compute_params * params,
|
14471
|
-
const struct ggml_tensor * src0,
|
14472
14846
|
struct ggml_tensor * dst,
|
14473
14847
|
const ggml_unary_op_f32_t fun) {
|
14848
|
+
|
14849
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14850
|
+
|
14474
14851
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
14475
14852
|
|
14476
14853
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14492,13 +14869,15 @@ static void ggml_compute_forward_map_unary_f32(
|
|
14492
14869
|
|
14493
14870
|
static void ggml_compute_forward_map_unary(
|
14494
14871
|
const struct ggml_compute_params * params,
|
14495
|
-
const struct ggml_tensor * src0,
|
14496
14872
|
struct ggml_tensor * dst,
|
14497
14873
|
const ggml_unary_op_f32_t fun) {
|
14874
|
+
|
14875
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14876
|
+
|
14498
14877
|
switch (src0->type) {
|
14499
14878
|
case GGML_TYPE_F32:
|
14500
14879
|
{
|
14501
|
-
ggml_compute_forward_map_unary_f32(params,
|
14880
|
+
ggml_compute_forward_map_unary_f32(params, dst, fun);
|
14502
14881
|
} break;
|
14503
14882
|
default:
|
14504
14883
|
{
|
@@ -14511,10 +14890,12 @@ static void ggml_compute_forward_map_unary(
|
|
14511
14890
|
|
14512
14891
|
static void ggml_compute_forward_map_binary_f32(
|
14513
14892
|
const struct ggml_compute_params * params,
|
14514
|
-
const struct ggml_tensor * src0,
|
14515
|
-
const struct ggml_tensor * src1,
|
14516
14893
|
struct ggml_tensor * dst,
|
14517
14894
|
const ggml_binary_op_f32_t fun) {
|
14895
|
+
|
14896
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14897
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
14898
|
+
|
14518
14899
|
assert(params->ith == 0);
|
14519
14900
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
14520
14901
|
|
@@ -14539,14 +14920,15 @@ static void ggml_compute_forward_map_binary_f32(
|
|
14539
14920
|
|
14540
14921
|
static void ggml_compute_forward_map_binary(
|
14541
14922
|
const struct ggml_compute_params * params,
|
14542
|
-
const struct ggml_tensor * src0,
|
14543
|
-
const struct ggml_tensor * src1,
|
14544
14923
|
struct ggml_tensor * dst,
|
14545
14924
|
const ggml_binary_op_f32_t fun) {
|
14925
|
+
|
14926
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
14927
|
+
|
14546
14928
|
switch (src0->type) {
|
14547
14929
|
case GGML_TYPE_F32:
|
14548
14930
|
{
|
14549
|
-
ggml_compute_forward_map_binary_f32(params,
|
14931
|
+
ggml_compute_forward_map_binary_f32(params, dst, fun);
|
14550
14932
|
} break;
|
14551
14933
|
default:
|
14552
14934
|
{
|
@@ -14559,9 +14941,11 @@ static void ggml_compute_forward_map_binary(
|
|
14559
14941
|
|
14560
14942
|
static void ggml_compute_forward_map_custom1_f32(
|
14561
14943
|
const struct ggml_compute_params * params,
|
14562
|
-
const struct ggml_tensor * a,
|
14563
14944
|
struct ggml_tensor * dst,
|
14564
14945
|
const ggml_custom1_op_f32_t fun) {
|
14946
|
+
|
14947
|
+
const struct ggml_tensor * a = dst->src[0];
|
14948
|
+
|
14565
14949
|
assert(params->ith == 0);
|
14566
14950
|
|
14567
14951
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14575,10 +14959,12 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
14575
14959
|
|
14576
14960
|
static void ggml_compute_forward_map_custom2_f32(
|
14577
14961
|
const struct ggml_compute_params * params,
|
14578
|
-
const struct ggml_tensor * a,
|
14579
|
-
const struct ggml_tensor * b,
|
14580
14962
|
struct ggml_tensor * dst,
|
14581
14963
|
const ggml_custom2_op_f32_t fun) {
|
14964
|
+
|
14965
|
+
const struct ggml_tensor * a = dst->src[0];
|
14966
|
+
const struct ggml_tensor * b = dst->src[1];
|
14967
|
+
|
14582
14968
|
assert(params->ith == 0);
|
14583
14969
|
|
14584
14970
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14592,11 +14978,13 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
14592
14978
|
|
14593
14979
|
static void ggml_compute_forward_map_custom3_f32(
|
14594
14980
|
const struct ggml_compute_params * params,
|
14595
|
-
const struct ggml_tensor * a,
|
14596
|
-
const struct ggml_tensor * b,
|
14597
|
-
const struct ggml_tensor * c,
|
14598
14981
|
struct ggml_tensor * dst,
|
14599
14982
|
const ggml_custom3_op_f32_t fun) {
|
14983
|
+
|
14984
|
+
const struct ggml_tensor * a = dst->src[0];
|
14985
|
+
const struct ggml_tensor * b = dst->src[1];
|
14986
|
+
const struct ggml_tensor * c = dst->src[1];
|
14987
|
+
|
14600
14988
|
assert(params->ith == 0);
|
14601
14989
|
|
14602
14990
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -14610,8 +14998,10 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
14610
14998
|
|
14611
14999
|
static void ggml_compute_forward_map_custom1(
|
14612
15000
|
const struct ggml_compute_params * params,
|
14613
|
-
const struct ggml_tensor * a,
|
14614
15001
|
struct ggml_tensor * dst) {
|
15002
|
+
|
15003
|
+
const struct ggml_tensor * a = dst->src[0];
|
15004
|
+
|
14615
15005
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14616
15006
|
return;
|
14617
15007
|
}
|
@@ -14625,9 +15015,11 @@ static void ggml_compute_forward_map_custom1(
|
|
14625
15015
|
|
14626
15016
|
static void ggml_compute_forward_map_custom2(
|
14627
15017
|
const struct ggml_compute_params * params,
|
14628
|
-
const struct ggml_tensor * a,
|
14629
|
-
const struct ggml_tensor * b,
|
14630
15018
|
struct ggml_tensor * dst) {
|
15019
|
+
|
15020
|
+
const struct ggml_tensor * a = dst->src[0];
|
15021
|
+
const struct ggml_tensor * b = dst->src[1];
|
15022
|
+
|
14631
15023
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14632
15024
|
return;
|
14633
15025
|
}
|
@@ -14641,10 +15033,12 @@ static void ggml_compute_forward_map_custom2(
|
|
14641
15033
|
|
14642
15034
|
static void ggml_compute_forward_map_custom3(
|
14643
15035
|
const struct ggml_compute_params * params,
|
14644
|
-
const struct ggml_tensor * a,
|
14645
|
-
const struct ggml_tensor * b,
|
14646
|
-
const struct ggml_tensor * c,
|
14647
15036
|
struct ggml_tensor * dst) {
|
15037
|
+
|
15038
|
+
const struct ggml_tensor * a = dst->src[0];
|
15039
|
+
const struct ggml_tensor * b = dst->src[1];
|
15040
|
+
const struct ggml_tensor * c = dst->src[2];
|
15041
|
+
|
14648
15042
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14649
15043
|
return;
|
14650
15044
|
}
|
@@ -14658,9 +15052,11 @@ static void ggml_compute_forward_map_custom3(
|
|
14658
15052
|
|
14659
15053
|
static void ggml_compute_forward_cross_entropy_loss_f32(
|
14660
15054
|
const struct ggml_compute_params * params,
|
14661
|
-
const struct ggml_tensor * src0,
|
14662
|
-
const struct ggml_tensor * src1,
|
14663
15055
|
struct ggml_tensor * dst) {
|
15056
|
+
|
15057
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15058
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
15059
|
+
|
14664
15060
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
14665
15061
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
14666
15062
|
GGML_ASSERT(ggml_is_scalar(dst));
|
@@ -14764,13 +15160,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
14764
15160
|
|
14765
15161
|
static void ggml_compute_forward_cross_entropy_loss(
|
14766
15162
|
const struct ggml_compute_params * params,
|
14767
|
-
const struct ggml_tensor * src0,
|
14768
|
-
const struct ggml_tensor * src1,
|
14769
15163
|
struct ggml_tensor * dst) {
|
15164
|
+
|
15165
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15166
|
+
|
14770
15167
|
switch (src0->type) {
|
14771
15168
|
case GGML_TYPE_F32:
|
14772
15169
|
{
|
14773
|
-
ggml_compute_forward_cross_entropy_loss_f32(params,
|
15170
|
+
ggml_compute_forward_cross_entropy_loss_f32(params, dst);
|
14774
15171
|
} break;
|
14775
15172
|
default:
|
14776
15173
|
{
|
@@ -14783,10 +15180,12 @@ static void ggml_compute_forward_cross_entropy_loss(
|
|
14783
15180
|
|
14784
15181
|
static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
14785
15182
|
const struct ggml_compute_params * params,
|
14786
|
-
const struct ggml_tensor * src0,
|
14787
|
-
const struct ggml_tensor * src1,
|
14788
|
-
const struct ggml_tensor * opt0,
|
14789
15183
|
struct ggml_tensor * dst) {
|
15184
|
+
|
15185
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15186
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
15187
|
+
const struct ggml_tensor * opt0 = dst->src[2];
|
15188
|
+
|
14790
15189
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
14791
15190
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
14792
15191
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
@@ -14873,14 +15272,14 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
14873
15272
|
|
14874
15273
|
static void ggml_compute_forward_cross_entropy_loss_back(
|
14875
15274
|
const struct ggml_compute_params * params,
|
14876
|
-
const struct ggml_tensor * src0,
|
14877
|
-
const struct ggml_tensor * src1,
|
14878
|
-
const struct ggml_tensor * opt0,
|
14879
15275
|
struct ggml_tensor * dst) {
|
15276
|
+
|
15277
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
15278
|
+
|
14880
15279
|
switch (src0->type) {
|
14881
15280
|
case GGML_TYPE_F32:
|
14882
15281
|
{
|
14883
|
-
ggml_compute_forward_cross_entropy_loss_back_f32(params,
|
15282
|
+
ggml_compute_forward_cross_entropy_loss_back_f32(params, dst);
|
14884
15283
|
} break;
|
14885
15284
|
default:
|
14886
15285
|
{
|
@@ -14928,312 +15327,312 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14928
15327
|
switch (tensor->op) {
|
14929
15328
|
case GGML_OP_DUP:
|
14930
15329
|
{
|
14931
|
-
ggml_compute_forward_dup(params, tensor
|
15330
|
+
ggml_compute_forward_dup(params, tensor);
|
14932
15331
|
} break;
|
14933
15332
|
case GGML_OP_ADD:
|
14934
15333
|
{
|
14935
|
-
ggml_compute_forward_add(params, tensor
|
15334
|
+
ggml_compute_forward_add(params, tensor);
|
14936
15335
|
} break;
|
14937
15336
|
case GGML_OP_ADD1:
|
14938
15337
|
{
|
14939
|
-
ggml_compute_forward_add1(params, tensor
|
15338
|
+
ggml_compute_forward_add1(params, tensor);
|
14940
15339
|
} break;
|
14941
15340
|
case GGML_OP_ACC:
|
14942
15341
|
{
|
14943
|
-
ggml_compute_forward_acc(params, tensor
|
15342
|
+
ggml_compute_forward_acc(params, tensor);
|
14944
15343
|
} break;
|
14945
15344
|
case GGML_OP_SUB:
|
14946
15345
|
{
|
14947
|
-
ggml_compute_forward_sub(params, tensor
|
15346
|
+
ggml_compute_forward_sub(params, tensor);
|
14948
15347
|
} break;
|
14949
15348
|
case GGML_OP_MUL:
|
14950
15349
|
{
|
14951
|
-
ggml_compute_forward_mul(params, tensor
|
15350
|
+
ggml_compute_forward_mul(params, tensor);
|
14952
15351
|
} break;
|
14953
15352
|
case GGML_OP_DIV:
|
14954
15353
|
{
|
14955
|
-
ggml_compute_forward_div(params, tensor
|
15354
|
+
ggml_compute_forward_div(params, tensor);
|
14956
15355
|
} break;
|
14957
15356
|
case GGML_OP_SQR:
|
14958
15357
|
{
|
14959
|
-
ggml_compute_forward_sqr(params, tensor
|
15358
|
+
ggml_compute_forward_sqr(params, tensor);
|
14960
15359
|
} break;
|
14961
15360
|
case GGML_OP_SQRT:
|
14962
15361
|
{
|
14963
|
-
ggml_compute_forward_sqrt(params, tensor
|
15362
|
+
ggml_compute_forward_sqrt(params, tensor);
|
14964
15363
|
} break;
|
14965
15364
|
case GGML_OP_LOG:
|
14966
15365
|
{
|
14967
|
-
ggml_compute_forward_log(params, tensor
|
15366
|
+
ggml_compute_forward_log(params, tensor);
|
14968
15367
|
} break;
|
14969
15368
|
case GGML_OP_SUM:
|
14970
15369
|
{
|
14971
|
-
ggml_compute_forward_sum(params, tensor
|
15370
|
+
ggml_compute_forward_sum(params, tensor);
|
14972
15371
|
} break;
|
14973
15372
|
case GGML_OP_SUM_ROWS:
|
14974
15373
|
{
|
14975
|
-
ggml_compute_forward_sum_rows(params, tensor
|
15374
|
+
ggml_compute_forward_sum_rows(params, tensor);
|
14976
15375
|
} break;
|
14977
15376
|
case GGML_OP_MEAN:
|
14978
15377
|
{
|
14979
|
-
ggml_compute_forward_mean(params, tensor
|
15378
|
+
ggml_compute_forward_mean(params, tensor);
|
14980
15379
|
} break;
|
14981
15380
|
case GGML_OP_ARGMAX:
|
14982
15381
|
{
|
14983
|
-
ggml_compute_forward_argmax(params, tensor
|
15382
|
+
ggml_compute_forward_argmax(params, tensor);
|
14984
15383
|
} break;
|
14985
15384
|
case GGML_OP_REPEAT:
|
14986
15385
|
{
|
14987
|
-
ggml_compute_forward_repeat(params, tensor
|
15386
|
+
ggml_compute_forward_repeat(params, tensor);
|
14988
15387
|
} break;
|
14989
15388
|
case GGML_OP_REPEAT_BACK:
|
14990
15389
|
{
|
14991
|
-
ggml_compute_forward_repeat_back(params, tensor
|
15390
|
+
ggml_compute_forward_repeat_back(params, tensor);
|
14992
15391
|
} break;
|
14993
15392
|
case GGML_OP_CONCAT:
|
14994
15393
|
{
|
14995
|
-
ggml_compute_forward_concat(params, tensor
|
15394
|
+
ggml_compute_forward_concat(params, tensor);
|
14996
15395
|
} break;
|
14997
15396
|
case GGML_OP_SILU_BACK:
|
14998
15397
|
{
|
14999
|
-
ggml_compute_forward_silu_back(params, tensor
|
15398
|
+
ggml_compute_forward_silu_back(params, tensor);
|
15000
15399
|
} break;
|
15001
15400
|
case GGML_OP_NORM:
|
15002
15401
|
{
|
15003
|
-
ggml_compute_forward_norm(params, tensor
|
15402
|
+
ggml_compute_forward_norm(params, tensor);
|
15004
15403
|
} break;
|
15005
15404
|
case GGML_OP_RMS_NORM:
|
15006
15405
|
{
|
15007
|
-
ggml_compute_forward_rms_norm(params, tensor
|
15406
|
+
ggml_compute_forward_rms_norm(params, tensor);
|
15008
15407
|
} break;
|
15009
15408
|
case GGML_OP_RMS_NORM_BACK:
|
15010
15409
|
{
|
15011
|
-
ggml_compute_forward_rms_norm_back(params, tensor
|
15410
|
+
ggml_compute_forward_rms_norm_back(params, tensor);
|
15012
15411
|
} break;
|
15013
15412
|
case GGML_OP_GROUP_NORM:
|
15014
15413
|
{
|
15015
|
-
ggml_compute_forward_group_norm(params, tensor
|
15414
|
+
ggml_compute_forward_group_norm(params, tensor);
|
15016
15415
|
} break;
|
15017
15416
|
case GGML_OP_MUL_MAT:
|
15018
15417
|
{
|
15019
|
-
ggml_compute_forward_mul_mat(params, tensor
|
15418
|
+
ggml_compute_forward_mul_mat(params, tensor);
|
15020
15419
|
} break;
|
15021
15420
|
case GGML_OP_MUL_MAT_ID:
|
15022
15421
|
{
|
15023
|
-
ggml_compute_forward_mul_mat_id(params, tensor
|
15422
|
+
ggml_compute_forward_mul_mat_id(params, tensor);
|
15024
15423
|
} break;
|
15025
15424
|
case GGML_OP_OUT_PROD:
|
15026
15425
|
{
|
15027
|
-
ggml_compute_forward_out_prod(params, tensor
|
15426
|
+
ggml_compute_forward_out_prod(params, tensor);
|
15028
15427
|
} break;
|
15029
15428
|
case GGML_OP_SCALE:
|
15030
15429
|
{
|
15031
|
-
ggml_compute_forward_scale(params, tensor
|
15430
|
+
ggml_compute_forward_scale(params, tensor);
|
15032
15431
|
} break;
|
15033
15432
|
case GGML_OP_SET:
|
15034
15433
|
{
|
15035
|
-
ggml_compute_forward_set(params, tensor
|
15434
|
+
ggml_compute_forward_set(params, tensor);
|
15036
15435
|
} break;
|
15037
15436
|
case GGML_OP_CPY:
|
15038
15437
|
{
|
15039
|
-
ggml_compute_forward_cpy(params, tensor
|
15438
|
+
ggml_compute_forward_cpy(params, tensor);
|
15040
15439
|
} break;
|
15041
15440
|
case GGML_OP_CONT:
|
15042
15441
|
{
|
15043
|
-
ggml_compute_forward_cont(params, tensor
|
15442
|
+
ggml_compute_forward_cont(params, tensor);
|
15044
15443
|
} break;
|
15045
15444
|
case GGML_OP_RESHAPE:
|
15046
15445
|
{
|
15047
|
-
ggml_compute_forward_reshape(params, tensor
|
15446
|
+
ggml_compute_forward_reshape(params, tensor);
|
15048
15447
|
} break;
|
15049
15448
|
case GGML_OP_VIEW:
|
15050
15449
|
{
|
15051
|
-
ggml_compute_forward_view(params, tensor
|
15450
|
+
ggml_compute_forward_view(params, tensor);
|
15052
15451
|
} break;
|
15053
15452
|
case GGML_OP_PERMUTE:
|
15054
15453
|
{
|
15055
|
-
ggml_compute_forward_permute(params, tensor
|
15454
|
+
ggml_compute_forward_permute(params, tensor);
|
15056
15455
|
} break;
|
15057
15456
|
case GGML_OP_TRANSPOSE:
|
15058
15457
|
{
|
15059
|
-
ggml_compute_forward_transpose(params, tensor
|
15458
|
+
ggml_compute_forward_transpose(params, tensor);
|
15060
15459
|
} break;
|
15061
15460
|
case GGML_OP_GET_ROWS:
|
15062
15461
|
{
|
15063
|
-
ggml_compute_forward_get_rows(params, tensor
|
15462
|
+
ggml_compute_forward_get_rows(params, tensor);
|
15064
15463
|
} break;
|
15065
15464
|
case GGML_OP_GET_ROWS_BACK:
|
15066
15465
|
{
|
15067
|
-
ggml_compute_forward_get_rows_back(params, tensor
|
15466
|
+
ggml_compute_forward_get_rows_back(params, tensor);
|
15068
15467
|
} break;
|
15069
15468
|
case GGML_OP_DIAG:
|
15070
15469
|
{
|
15071
|
-
ggml_compute_forward_diag(params, tensor
|
15470
|
+
ggml_compute_forward_diag(params, tensor);
|
15072
15471
|
} break;
|
15073
15472
|
case GGML_OP_DIAG_MASK_INF:
|
15074
15473
|
{
|
15075
|
-
ggml_compute_forward_diag_mask_inf(params, tensor
|
15474
|
+
ggml_compute_forward_diag_mask_inf(params, tensor);
|
15076
15475
|
} break;
|
15077
15476
|
case GGML_OP_DIAG_MASK_ZERO:
|
15078
15477
|
{
|
15079
|
-
ggml_compute_forward_diag_mask_zero(params, tensor
|
15478
|
+
ggml_compute_forward_diag_mask_zero(params, tensor);
|
15080
15479
|
} break;
|
15081
15480
|
case GGML_OP_SOFT_MAX:
|
15082
15481
|
{
|
15083
|
-
ggml_compute_forward_soft_max(params, tensor
|
15482
|
+
ggml_compute_forward_soft_max(params, tensor);
|
15084
15483
|
} break;
|
15085
15484
|
case GGML_OP_SOFT_MAX_BACK:
|
15086
15485
|
{
|
15087
|
-
ggml_compute_forward_soft_max_back(params, tensor
|
15486
|
+
ggml_compute_forward_soft_max_back(params, tensor);
|
15088
15487
|
} break;
|
15089
15488
|
case GGML_OP_ROPE:
|
15090
15489
|
{
|
15091
|
-
ggml_compute_forward_rope(params, tensor
|
15490
|
+
ggml_compute_forward_rope(params, tensor);
|
15092
15491
|
} break;
|
15093
15492
|
case GGML_OP_ROPE_BACK:
|
15094
15493
|
{
|
15095
|
-
ggml_compute_forward_rope_back(params, tensor
|
15494
|
+
ggml_compute_forward_rope_back(params, tensor);
|
15096
15495
|
} break;
|
15097
15496
|
case GGML_OP_ALIBI:
|
15098
15497
|
{
|
15099
|
-
ggml_compute_forward_alibi(params, tensor
|
15498
|
+
ggml_compute_forward_alibi(params, tensor);
|
15100
15499
|
} break;
|
15101
15500
|
case GGML_OP_CLAMP:
|
15102
15501
|
{
|
15103
|
-
ggml_compute_forward_clamp(params, tensor
|
15502
|
+
ggml_compute_forward_clamp(params, tensor);
|
15104
15503
|
} break;
|
15105
15504
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
15106
15505
|
{
|
15107
|
-
ggml_compute_forward_conv_transpose_1d(params, tensor
|
15506
|
+
ggml_compute_forward_conv_transpose_1d(params, tensor);
|
15108
15507
|
} break;
|
15109
15508
|
case GGML_OP_IM2COL:
|
15110
15509
|
{
|
15111
|
-
ggml_compute_forward_im2col(params, tensor
|
15510
|
+
ggml_compute_forward_im2col(params, tensor);
|
15112
15511
|
} break;
|
15113
15512
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
15114
15513
|
{
|
15115
|
-
ggml_compute_forward_conv_transpose_2d(params, tensor
|
15514
|
+
ggml_compute_forward_conv_transpose_2d(params, tensor);
|
15116
15515
|
} break;
|
15117
15516
|
case GGML_OP_POOL_1D:
|
15118
15517
|
{
|
15119
|
-
ggml_compute_forward_pool_1d(params, tensor
|
15518
|
+
ggml_compute_forward_pool_1d(params, tensor);
|
15120
15519
|
} break;
|
15121
15520
|
case GGML_OP_POOL_2D:
|
15122
15521
|
{
|
15123
|
-
ggml_compute_forward_pool_2d(params, tensor
|
15522
|
+
ggml_compute_forward_pool_2d(params, tensor);
|
15124
15523
|
} break;
|
15125
15524
|
case GGML_OP_UPSCALE:
|
15126
15525
|
{
|
15127
|
-
ggml_compute_forward_upscale(params, tensor
|
15526
|
+
ggml_compute_forward_upscale(params, tensor);
|
15128
15527
|
} break;
|
15129
15528
|
case GGML_OP_PAD:
|
15130
15529
|
{
|
15131
|
-
ggml_compute_forward_pad(params, tensor
|
15530
|
+
ggml_compute_forward_pad(params, tensor);
|
15132
15531
|
} break;
|
15133
15532
|
case GGML_OP_ARGSORT:
|
15134
15533
|
{
|
15135
|
-
ggml_compute_forward_argsort(params, tensor
|
15534
|
+
ggml_compute_forward_argsort(params, tensor);
|
15136
15535
|
} break;
|
15137
15536
|
case GGML_OP_LEAKY_RELU:
|
15138
15537
|
{
|
15139
|
-
ggml_compute_forward_leaky_relu(params, tensor
|
15538
|
+
ggml_compute_forward_leaky_relu(params, tensor);
|
15140
15539
|
} break;
|
15141
15540
|
case GGML_OP_FLASH_ATTN:
|
15142
15541
|
{
|
15143
15542
|
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15144
15543
|
GGML_ASSERT(t == 0 || t == 1);
|
15145
15544
|
const bool masked = t != 0;
|
15146
|
-
ggml_compute_forward_flash_attn(params,
|
15545
|
+
ggml_compute_forward_flash_attn(params, masked, tensor);
|
15147
15546
|
} break;
|
15148
15547
|
case GGML_OP_FLASH_FF:
|
15149
15548
|
{
|
15150
|
-
ggml_compute_forward_flash_ff(params, tensor
|
15549
|
+
ggml_compute_forward_flash_ff(params, tensor);
|
15151
15550
|
} break;
|
15152
15551
|
case GGML_OP_FLASH_ATTN_BACK:
|
15153
15552
|
{
|
15154
15553
|
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15155
15554
|
GGML_ASSERT(t == 0 || t == 1);
|
15156
15555
|
bool masked = t != 0;
|
15157
|
-
ggml_compute_forward_flash_attn_back(params,
|
15556
|
+
ggml_compute_forward_flash_attn_back(params, masked, tensor);
|
15158
15557
|
} break;
|
15159
15558
|
case GGML_OP_WIN_PART:
|
15160
15559
|
{
|
15161
|
-
ggml_compute_forward_win_part(params, tensor
|
15560
|
+
ggml_compute_forward_win_part(params, tensor);
|
15162
15561
|
} break;
|
15163
15562
|
case GGML_OP_WIN_UNPART:
|
15164
15563
|
{
|
15165
|
-
ggml_compute_forward_win_unpart(params, tensor
|
15564
|
+
ggml_compute_forward_win_unpart(params, tensor);
|
15166
15565
|
} break;
|
15167
15566
|
case GGML_OP_UNARY:
|
15168
15567
|
{
|
15169
|
-
ggml_compute_forward_unary(params, tensor
|
15568
|
+
ggml_compute_forward_unary(params, tensor);
|
15170
15569
|
} break;
|
15171
15570
|
case GGML_OP_GET_REL_POS:
|
15172
15571
|
{
|
15173
|
-
ggml_compute_forward_get_rel_pos(params, tensor
|
15572
|
+
ggml_compute_forward_get_rel_pos(params, tensor);
|
15174
15573
|
} break;
|
15175
15574
|
case GGML_OP_ADD_REL_POS:
|
15176
15575
|
{
|
15177
|
-
ggml_compute_forward_add_rel_pos(params, tensor
|
15576
|
+
ggml_compute_forward_add_rel_pos(params, tensor);
|
15178
15577
|
} break;
|
15179
15578
|
case GGML_OP_MAP_UNARY:
|
15180
15579
|
{
|
15181
15580
|
ggml_unary_op_f32_t fun;
|
15182
15581
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15183
|
-
ggml_compute_forward_map_unary(params, tensor
|
15582
|
+
ggml_compute_forward_map_unary(params, tensor, fun);
|
15184
15583
|
}
|
15185
15584
|
break;
|
15186
15585
|
case GGML_OP_MAP_BINARY:
|
15187
15586
|
{
|
15188
15587
|
ggml_binary_op_f32_t fun;
|
15189
15588
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15190
|
-
ggml_compute_forward_map_binary(params, tensor
|
15589
|
+
ggml_compute_forward_map_binary(params, tensor, fun);
|
15191
15590
|
}
|
15192
15591
|
break;
|
15193
15592
|
case GGML_OP_MAP_CUSTOM1_F32:
|
15194
15593
|
{
|
15195
15594
|
ggml_custom1_op_f32_t fun;
|
15196
15595
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15197
|
-
ggml_compute_forward_map_custom1_f32(params, tensor
|
15596
|
+
ggml_compute_forward_map_custom1_f32(params, tensor, fun);
|
15198
15597
|
}
|
15199
15598
|
break;
|
15200
15599
|
case GGML_OP_MAP_CUSTOM2_F32:
|
15201
15600
|
{
|
15202
15601
|
ggml_custom2_op_f32_t fun;
|
15203
15602
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15204
|
-
ggml_compute_forward_map_custom2_f32(params, tensor
|
15603
|
+
ggml_compute_forward_map_custom2_f32(params, tensor, fun);
|
15205
15604
|
}
|
15206
15605
|
break;
|
15207
15606
|
case GGML_OP_MAP_CUSTOM3_F32:
|
15208
15607
|
{
|
15209
15608
|
ggml_custom3_op_f32_t fun;
|
15210
15609
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15211
|
-
ggml_compute_forward_map_custom3_f32(params, tensor
|
15610
|
+
ggml_compute_forward_map_custom3_f32(params, tensor, fun);
|
15212
15611
|
}
|
15213
15612
|
break;
|
15214
15613
|
case GGML_OP_MAP_CUSTOM1:
|
15215
15614
|
{
|
15216
|
-
ggml_compute_forward_map_custom1(params, tensor
|
15615
|
+
ggml_compute_forward_map_custom1(params, tensor);
|
15217
15616
|
}
|
15218
15617
|
break;
|
15219
15618
|
case GGML_OP_MAP_CUSTOM2:
|
15220
15619
|
{
|
15221
|
-
ggml_compute_forward_map_custom2(params, tensor
|
15620
|
+
ggml_compute_forward_map_custom2(params, tensor);
|
15222
15621
|
}
|
15223
15622
|
break;
|
15224
15623
|
case GGML_OP_MAP_CUSTOM3:
|
15225
15624
|
{
|
15226
|
-
ggml_compute_forward_map_custom3(params, tensor
|
15625
|
+
ggml_compute_forward_map_custom3(params, tensor);
|
15227
15626
|
}
|
15228
15627
|
break;
|
15229
15628
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
15230
15629
|
{
|
15231
|
-
ggml_compute_forward_cross_entropy_loss(params, tensor
|
15630
|
+
ggml_compute_forward_cross_entropy_loss(params, tensor);
|
15232
15631
|
}
|
15233
15632
|
break;
|
15234
15633
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
15235
15634
|
{
|
15236
|
-
ggml_compute_forward_cross_entropy_loss_back(params, tensor
|
15635
|
+
ggml_compute_forward_cross_entropy_loss_back(params, tensor);
|
15237
15636
|
}
|
15238
15637
|
break;
|
15239
15638
|
case GGML_OP_NONE:
|
@@ -16637,27 +17036,47 @@ typedef pthread_t ggml_thread_t;
|
|
16637
17036
|
#endif
|
16638
17037
|
|
16639
17038
|
// Android's libc implementation "bionic" does not support setting affinity
|
16640
|
-
#if defined(
|
16641
|
-
static void set_numa_thread_affinity(int thread_n
|
17039
|
+
#if defined(__gnu_linux__)
|
17040
|
+
static void set_numa_thread_affinity(int thread_n) {
|
16642
17041
|
if (!ggml_is_numa()) {
|
16643
17042
|
return;
|
16644
17043
|
}
|
16645
17044
|
|
16646
|
-
|
16647
|
-
|
16648
|
-
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
17045
|
+
int node_num;
|
17046
|
+
int rv;
|
16649
17047
|
size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);
|
16650
17048
|
|
17049
|
+
switch(g_state.numa.numa_strategy) {
|
17050
|
+
case GGML_NUMA_STRATEGY_DISTRIBUTE:
|
17051
|
+
// run thread on node_num thread_n / (threads per node)
|
17052
|
+
node_num = thread_n % g_state.numa.n_nodes;
|
17053
|
+
break;
|
17054
|
+
case GGML_NUMA_STRATEGY_ISOLATE:
|
17055
|
+
// run thread on current_node
|
17056
|
+
node_num = g_state.numa.current_node;
|
17057
|
+
break;
|
17058
|
+
case GGML_NUMA_STRATEGY_NUMACTL:
|
17059
|
+
// use the cpuset that numactl gave us
|
17060
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset);
|
17061
|
+
if (rv) {
|
17062
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
|
17063
|
+
}
|
17064
|
+
return;
|
17065
|
+
default:
|
17066
|
+
return;
|
17067
|
+
}
|
17068
|
+
|
17069
|
+
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
|
17070
|
+
|
16651
17071
|
cpu_set_t * cpus = CPU_ALLOC(g_state.numa.total_cpus);
|
16652
17072
|
CPU_ZERO_S(setsize, cpus);
|
16653
17073
|
for (size_t i = 0; i < node->n_cpus; ++i) {
|
16654
17074
|
CPU_SET_S(node->cpus[i], setsize, cpus);
|
16655
17075
|
}
|
16656
17076
|
|
16657
|
-
|
17077
|
+
rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
16658
17078
|
if (rv) {
|
16659
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
16660
|
-
strerror(rv));
|
17079
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
16661
17080
|
}
|
16662
17081
|
|
16663
17082
|
CPU_FREE(cpus);
|
@@ -16678,8 +17097,7 @@ static void clear_numa_thread_affinity(void) {
|
|
16678
17097
|
|
16679
17098
|
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
|
16680
17099
|
if (rv) {
|
16681
|
-
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
|
16682
|
-
strerror(rv));
|
17100
|
+
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", strerror(rv));
|
16683
17101
|
}
|
16684
17102
|
|
16685
17103
|
CPU_FREE(cpus);
|
@@ -16687,7 +17105,7 @@ static void clear_numa_thread_affinity(void) {
|
|
16687
17105
|
#else
|
16688
17106
|
// TODO: Windows etc.
|
16689
17107
|
// (the linux implementation may also work on BSD, someone should test)
|
16690
|
-
static void set_numa_thread_affinity(int thread_n
|
17108
|
+
static void set_numa_thread_affinity(int thread_n) { UNUSED(thread_n); }
|
16691
17109
|
static void clear_numa_thread_affinity(void) {}
|
16692
17110
|
#endif
|
16693
17111
|
|
@@ -16987,7 +17405,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16987
17405
|
|
16988
17406
|
const int n_threads = state->shared->n_threads;
|
16989
17407
|
|
16990
|
-
set_numa_thread_affinity(state->ith
|
17408
|
+
set_numa_thread_affinity(state->ith);
|
16991
17409
|
|
16992
17410
|
int node_n = -1;
|
16993
17411
|
int task_phase = GGML_TASK_FINALIZE;
|
@@ -17793,7 +18211,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
17793
18211
|
|
17794
18212
|
ptr += ggml_nbytes(tensor);
|
17795
18213
|
|
17796
|
-
fprintf(stderr, "%s: loaded leaf %
|
18214
|
+
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
17797
18215
|
}
|
17798
18216
|
}
|
17799
18217
|
|
@@ -17896,7 +18314,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|
17896
18314
|
|
17897
18315
|
result->nodes[i] = tensor;
|
17898
18316
|
|
17899
|
-
fprintf(stderr, "%s: loaded node %
|
18317
|
+
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
17900
18318
|
}
|
17901
18319
|
}
|
17902
18320
|
}
|
@@ -18521,7 +18939,9 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18521
18939
|
(*step) *= width;
|
18522
18940
|
}
|
18523
18941
|
|
18524
|
-
|
18942
|
+
GGML_ASSERT(false && "line search failed");
|
18943
|
+
|
18944
|
+
return GGML_LINESEARCH_FAIL;
|
18525
18945
|
}
|
18526
18946
|
|
18527
18947
|
static enum ggml_opt_result ggml_opt_lbfgs(
|
@@ -18789,7 +19209,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18789
19209
|
step[0] = 1.0;
|
18790
19210
|
}
|
18791
19211
|
|
18792
|
-
|
19212
|
+
GGML_ASSERT(false && "lbfgs failed");
|
19213
|
+
|
19214
|
+
return GGML_OPT_DID_NOT_CONVERGE;
|
18793
19215
|
}
|
18794
19216
|
|
18795
19217
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
@@ -19037,8 +19459,9 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19037
19459
|
ggml_critical_section_start();
|
19038
19460
|
|
19039
19461
|
switch (type) {
|
19040
|
-
case GGML_TYPE_IQ2_XXS:
|
19041
|
-
case GGML_TYPE_IQ2_XS:
|
19462
|
+
case GGML_TYPE_IQ2_XXS:
|
19463
|
+
case GGML_TYPE_IQ2_XS:
|
19464
|
+
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
19042
19465
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
19043
19466
|
default: // nothing
|
19044
19467
|
break;
|
@@ -19050,8 +19473,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19050
19473
|
void ggml_quantize_free(void) {
|
19051
19474
|
ggml_critical_section_start();
|
19052
19475
|
|
19053
|
-
iq2xs_free_impl(
|
19054
|
-
iq2xs_free_impl(
|
19476
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
|
19477
|
+
iq2xs_free_impl(GGML_TYPE_IQ2_XS);
|
19478
|
+
iq2xs_free_impl(GGML_TYPE_IQ1_S);
|
19479
|
+
iq3xs_free_impl(256);
|
19055
19480
|
|
19056
19481
|
ggml_critical_section_end();
|
19057
19482
|
}
|
@@ -19186,7 +19611,8 @@ size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t *
|
|
19186
19611
|
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
19187
19612
|
return
|
19188
19613
|
type == GGML_TYPE_IQ2_XXS ||
|
19189
|
-
type == GGML_TYPE_IQ2_XS
|
19614
|
+
type == GGML_TYPE_IQ2_XS ||
|
19615
|
+
type == GGML_TYPE_IQ1_S;
|
19190
19616
|
}
|
19191
19617
|
|
19192
19618
|
size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start,
|
@@ -19311,6 +19737,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19311
19737
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19312
19738
|
GGML_ASSERT(result == row_size * nrows);
|
19313
19739
|
} break;
|
19740
|
+
case GGML_TYPE_IQ1_S:
|
19741
|
+
{
|
19742
|
+
GGML_ASSERT(start % QK_K == 0);
|
19743
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19744
|
+
size_t start_row = start / n_per_row;
|
19745
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19746
|
+
result = quantize_iq1_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19747
|
+
GGML_ASSERT(result == row_size * nrows);
|
19748
|
+
} break;
|
19749
|
+
case GGML_TYPE_IQ4_NL:
|
19750
|
+
{
|
19751
|
+
GGML_ASSERT(start % QK4_NL == 0);
|
19752
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19753
|
+
size_t start_row = start / n_per_row;
|
19754
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19755
|
+
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19756
|
+
GGML_ASSERT(result == row_size * nrows);
|
19757
|
+
} break;
|
19314
19758
|
case GGML_TYPE_F16:
|
19315
19759
|
{
|
19316
19760
|
size_t elemsize = sizeof(ggml_fp16_t);
|