@fugood/llama.node 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +60 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +112 -11
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -3
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +110 -67
- package/src/llama.cpp/examples/server/server.cpp +82 -87
- package/src/llama.cpp/examples/server/utils.hpp +94 -107
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +5 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1396 -386
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1432 -151
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +220 -116
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +168 -721
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +146 -42
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +8 -3
- package/src/llama.cpp/include/llama.h +19 -5
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-kv-cache.h +1 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +69 -5
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +147 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +166 -110
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
@@ -112,7 +112,8 @@ struct ggml_arm_arch_features_type {
|
|
|
112
112
|
int has_i8mm;
|
|
113
113
|
int has_sve;
|
|
114
114
|
int sve_cnt;
|
|
115
|
-
|
|
115
|
+
int has_sme;
|
|
116
|
+
} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
|
|
116
117
|
#endif
|
|
117
118
|
|
|
118
119
|
|
|
@@ -236,6 +237,8 @@ typedef pthread_t ggml_thread_t;
|
|
|
236
237
|
#else
|
|
237
238
|
#if defined(__POWER9_VECTOR__)
|
|
238
239
|
#define CACHE_LINE_SIZE 128
|
|
240
|
+
#elif defined(__VXE__) || defined(__VXE2__)
|
|
241
|
+
#define CACHE_LINE_SIZE 256
|
|
239
242
|
#else
|
|
240
243
|
#define CACHE_LINE_SIZE 64
|
|
241
244
|
#endif
|
|
@@ -244,9 +247,9 @@ typedef pthread_t ggml_thread_t;
|
|
|
244
247
|
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
|
245
248
|
|
|
246
249
|
|
|
247
|
-
static void ggml_vec_dot_f32(int n, float *
|
|
248
|
-
static void ggml_vec_dot_f16(int n, float *
|
|
249
|
-
static void ggml_vec_dot_bf16(int n, float *
|
|
250
|
+
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
|
|
251
|
+
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
|
|
252
|
+
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
|
|
250
253
|
|
|
251
254
|
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
252
255
|
[GGML_TYPE_F32] = {
|
|
@@ -1210,6 +1213,87 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
|
|
|
1210
1213
|
#define GGML_F16_VEC_MUL GGML_F32Cx4_MUL
|
|
1211
1214
|
#define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE
|
|
1212
1215
|
|
|
1216
|
+
#elif defined(__VXE__) || defined(__VXE2__)
|
|
1217
|
+
|
|
1218
|
+
#define GGML_SIMD
|
|
1219
|
+
|
|
1220
|
+
// F32 s390x
|
|
1221
|
+
|
|
1222
|
+
#define GGML_F32_STEP 32
|
|
1223
|
+
#define GGML_F32_EPR 4
|
|
1224
|
+
|
|
1225
|
+
#define GGML_F32x4 __vector float
|
|
1226
|
+
#define GGML_F32x4_ZERO vec_splats(0.0f)
|
|
1227
|
+
#define GGML_F32x4_SET1 vec_splats
|
|
1228
|
+
#define GGML_F32x4_LOAD(p) vec_xl(0, p)
|
|
1229
|
+
#define GGML_F32x4_STORE(p, r) vec_xst(r, 0, p)
|
|
1230
|
+
#define GGML_F32x4_FMA(a, b, c) vec_madd(b, c, a)
|
|
1231
|
+
#define GGML_F32x4_ADD vec_add
|
|
1232
|
+
#define GGML_F32x4_MUL vec_mul
|
|
1233
|
+
#define GGML_F32x4_REDUCE(res, x) \
|
|
1234
|
+
{ \
|
|
1235
|
+
int offset = GGML_F32_ARR >> 1; \
|
|
1236
|
+
for (int i = 0; i < offset; ++i) { \
|
|
1237
|
+
x[i] = vec_add(x[i], x[offset + i]); \
|
|
1238
|
+
} \
|
|
1239
|
+
offset >>= 1; \
|
|
1240
|
+
for (int i = 0; i < offset; ++i) { \
|
|
1241
|
+
x[i] = vec_add(x[i], x[offset + i]); \
|
|
1242
|
+
} \
|
|
1243
|
+
offset >>= 1; \
|
|
1244
|
+
for (int i = 0; i < offset; ++i) { \
|
|
1245
|
+
x[i] = vec_add(x[i], x[offset + i]); \
|
|
1246
|
+
} \
|
|
1247
|
+
res = vec_extract(x[0], 0) + \
|
|
1248
|
+
vec_extract(x[0], 1) + \
|
|
1249
|
+
vec_extract(x[0], 2) + \
|
|
1250
|
+
vec_extract(x[0], 3); \
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
#define GGML_F32_VEC GGML_F32x4
|
|
1254
|
+
#define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
|
|
1255
|
+
#define GGML_F32_VEC_SET1 GGML_F32x4_SET1
|
|
1256
|
+
#define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
|
|
1257
|
+
#define GGML_F32_VEC_STORE GGML_F32x4_STORE
|
|
1258
|
+
#define GGML_F32_VEC_FMA GGML_F32x4_FMA
|
|
1259
|
+
#define GGML_F32_VEC_ADD GGML_F32x4_ADD
|
|
1260
|
+
#define GGML_F32_VEC_MUL GGML_F32x4_MUL
|
|
1261
|
+
#define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
|
|
1262
|
+
|
|
1263
|
+
// F16 s390x
|
|
1264
|
+
#define GGML_F16_STEP GGML_F32_STEP
|
|
1265
|
+
#define GGML_F16_EPR GGML_F32_EPR
|
|
1266
|
+
|
|
1267
|
+
static inline __vector float __lzs_f16cx4_load(const ggml_fp16_t * x) {
|
|
1268
|
+
float tmp[4];
|
|
1269
|
+
|
|
1270
|
+
for (int i = 0; i < 4; i++) {
|
|
1271
|
+
tmp[i] = GGML_FP16_TO_FP32(x[i]);
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
return vec_xl(0, tmp);
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
static inline void __lzs_f16cx4_store(ggml_fp16_t * x, __vector float y) {
|
|
1278
|
+
float arr[4];
|
|
1279
|
+
|
|
1280
|
+
vec_xst(y, 0, arr);
|
|
1281
|
+
|
|
1282
|
+
for (int i = 0; i < 4; i++) {
|
|
1283
|
+
x[i] = GGML_FP32_TO_FP16(arr[i]);
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
#define GGML_F16_VEC GGML_F32x4
|
|
1288
|
+
#define GGML_F16_VEC_ZERO GGML_F32x4_ZERO
|
|
1289
|
+
#define GGML_F16_VEC_SET1 GGML_F32x4_SET1
|
|
1290
|
+
#define GGML_F16_VEC_LOAD(p, i) __lzs_f16cx4_load(p)
|
|
1291
|
+
#define GGML_F16_VEC_STORE(p, r, i) __lzs_f16cx4_store(p, r[i])
|
|
1292
|
+
#define GGML_F16_VEC_FMA GGML_F32x4_FMA
|
|
1293
|
+
#define GGML_F16_VEC_ADD GGML_F32x4_ADD
|
|
1294
|
+
#define GGML_F16_VEC_MUL GGML_F32x4_MUL
|
|
1295
|
+
#define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE
|
|
1296
|
+
|
|
1213
1297
|
#endif
|
|
1214
1298
|
|
|
1215
1299
|
// GGML_F32_ARR / GGML_F16_ARR
|
|
@@ -1331,17 +1415,43 @@ inline static void ggml_vec_cpy_i32(const int n, int32_t * y, const int32_t * x)
|
|
|
1331
1415
|
inline static void ggml_vec_set_f16(const int n, ggml_fp16_t * x, const int32_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
|
|
1332
1416
|
inline static void ggml_vec_set_bf16(const int n, ggml_bf16_t * x, const ggml_bf16_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
|
|
1333
1417
|
inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }
|
|
1418
|
+
inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
|
|
1419
|
+
for (int i = 0; i < n; ++i) {
|
|
1420
|
+
z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) + GGML_FP16_TO_FP32(y[i]));
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1334
1423
|
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; }
|
|
1335
1424
|
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] += x[i]; }
|
|
1336
1425
|
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] += v; }
|
|
1337
1426
|
inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
|
|
1427
|
+
inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
|
|
1428
|
+
for (int i = 0; i < n; ++i) {
|
|
1429
|
+
z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) - GGML_FP16_TO_FP32(y[i]));
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1338
1432
|
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; }
|
|
1339
1433
|
inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; }
|
|
1340
1434
|
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
|
|
1435
|
+
inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1436
|
+
for (int i = 0; i < n; ++i) {
|
|
1437
|
+
y[i] = GGML_FP32_TO_FP16(-GGML_FP16_TO_FP32(x[i]));
|
|
1438
|
+
}
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1341
1441
|
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
|
|
1442
|
+
inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
|
|
1443
|
+
for (int i = 0; i < n; ++i) {
|
|
1444
|
+
z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) * GGML_FP16_TO_FP32(y[i]));
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1342
1447
|
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
|
|
1448
|
+
inline static void ggml_vec_div_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
|
|
1449
|
+
for (int i = 0; i < n; ++i) {
|
|
1450
|
+
z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) / GGML_FP16_TO_FP32(y[i]));
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1343
1453
|
|
|
1344
|
-
static void ggml_vec_dot_f32(int n, float *
|
|
1454
|
+
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc) {
|
|
1345
1455
|
assert(nrc == 1);
|
|
1346
1456
|
UNUSED(nrc);
|
|
1347
1457
|
UNUSED(bx);
|
|
@@ -1384,7 +1494,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
|
|
|
1384
1494
|
*s = sumf;
|
|
1385
1495
|
}
|
|
1386
1496
|
|
|
1387
|
-
static void ggml_vec_dot_bf16(int n, float *
|
|
1497
|
+
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc) {
|
|
1388
1498
|
assert(nrc == 1);
|
|
1389
1499
|
UNUSED(nrc);
|
|
1390
1500
|
UNUSED(bx);
|
|
@@ -1452,7 +1562,7 @@ static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t
|
|
|
1452
1562
|
*s = sumf;
|
|
1453
1563
|
}
|
|
1454
1564
|
|
|
1455
|
-
static void ggml_vec_dot_f16(int n, float *
|
|
1565
|
+
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc) {
|
|
1456
1566
|
assert(nrc == 1);
|
|
1457
1567
|
UNUSED(nrc);
|
|
1458
1568
|
UNUSED(bx);
|
|
@@ -1496,10 +1606,10 @@ static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t *
|
|
|
1496
1606
|
|
|
1497
1607
|
// compute GGML_VEC_DOT_UNROLL dot products at once
|
|
1498
1608
|
// xs - x row stride in bytes
|
|
1499
|
-
inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float *
|
|
1609
|
+
inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * GGML_RESTRICT s, void * GGML_RESTRICT xv, ggml_fp16_t * GGML_RESTRICT y) {
|
|
1500
1610
|
ggml_float sumf[GGML_VEC_DOT_UNROLL] = { 0.0 };
|
|
1501
1611
|
|
|
1502
|
-
ggml_fp16_t *
|
|
1612
|
+
ggml_fp16_t * GGML_RESTRICT x[GGML_VEC_DOT_UNROLL];
|
|
1503
1613
|
|
|
1504
1614
|
for (int i = 0; i < GGML_VEC_DOT_UNROLL; ++i) {
|
|
1505
1615
|
x[i] = (ggml_fp16_t *) ((char *) xv + i*xs);
|
|
@@ -1549,7 +1659,7 @@ inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * re
|
|
|
1549
1659
|
}
|
|
1550
1660
|
}
|
|
1551
1661
|
|
|
1552
|
-
inline static void ggml_vec_mad_f32(const int n, float *
|
|
1662
|
+
inline static void ggml_vec_mad_f32(const int n, float * GGML_RESTRICT y, const float * GGML_RESTRICT x, const float v) {
|
|
1553
1663
|
#if defined(GGML_SIMD)
|
|
1554
1664
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
|
1555
1665
|
|
|
@@ -1580,7 +1690,7 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
|
1580
1690
|
#endif
|
|
1581
1691
|
}
|
|
1582
1692
|
|
|
1583
|
-
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t *
|
|
1693
|
+
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * GGML_RESTRICT y, const ggml_fp16_t * GGML_RESTRICT x, const float v) {
|
|
1584
1694
|
#if defined(GGML_SIMD)
|
|
1585
1695
|
const int np = (n & ~(GGML_F16_STEP - 1));
|
|
1586
1696
|
|
|
@@ -1612,10 +1722,10 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, const
|
|
|
1612
1722
|
}
|
|
1613
1723
|
|
|
1614
1724
|
// xs and vs are byte strides of x and v
|
|
1615
|
-
inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float *
|
|
1725
|
+
inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float * GGML_RESTRICT y, const float * GGML_RESTRICT xv, const float * GGML_RESTRICT vv) {
|
|
1616
1726
|
|
|
1617
|
-
const float *
|
|
1618
|
-
const float *
|
|
1727
|
+
const float * GGML_RESTRICT x[GGML_VEC_MAD_UNROLL];
|
|
1728
|
+
const float * GGML_RESTRICT v[GGML_VEC_MAD_UNROLL];
|
|
1619
1729
|
|
|
1620
1730
|
for (int i = 0; i < GGML_VEC_MAD_UNROLL; ++i) {
|
|
1621
1731
|
x[i] = (const float *) ((const char *) xv + i*xs);
|
|
@@ -1726,22 +1836,107 @@ inline static void ggml_vec_scale_f16(const int n, ggml_fp16_t * y, const float
|
|
|
1726
1836
|
|
|
1727
1837
|
inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, 0, x, 0, x, 0, 1); *s = sqrtf(*s); }
|
|
1728
1838
|
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
|
|
1839
|
+
inline static void ggml_vec_sqr_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1840
|
+
for (int i = 0; i < n; ++i) {
|
|
1841
|
+
float v = GGML_FP16_TO_FP32(x[i]);
|
|
1842
|
+
y[i] = GGML_FP32_TO_FP16(v*v);
|
|
1843
|
+
}
|
|
1844
|
+
}
|
|
1729
1845
|
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
|
|
1846
|
+
inline static void ggml_vec_sqrt_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1847
|
+
for (int i = 0; i < n; ++i) {
|
|
1848
|
+
y[i] = GGML_FP32_TO_FP16(sqrtf(GGML_FP16_TO_FP32(x[i])));
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1730
1851
|
inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
|
|
1852
|
+
inline static void ggml_vec_log_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1853
|
+
for (int i = 0; i < n; ++i) {
|
|
1854
|
+
y[i] = GGML_FP32_TO_FP16(logf(GGML_FP16_TO_FP32(x[i])));
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1731
1857
|
inline static void ggml_vec_sin_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sinf(x[i]); }
|
|
1858
|
+
inline static void ggml_vec_sin_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1859
|
+
for (int i = 0; i < n; ++i) {
|
|
1860
|
+
y[i] = GGML_FP32_TO_FP16(sinf(GGML_FP16_TO_FP32(x[i])));
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1732
1863
|
inline static void ggml_vec_cos_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = cosf(x[i]); }
|
|
1864
|
+
inline static void ggml_vec_cos_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1865
|
+
for (int i = 0; i < n; ++i) {
|
|
1866
|
+
y[i] = GGML_FP32_TO_FP16(cosf(GGML_FP16_TO_FP32(x[i])));
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1733
1869
|
inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
|
|
1870
|
+
inline static void ggml_vec_abs_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1871
|
+
for (int i = 0; i < n; ++i) {
|
|
1872
|
+
y[i] = GGML_FP32_TO_FP16(fabsf(GGML_FP16_TO_FP32(x[i])));
|
|
1873
|
+
}
|
|
1874
|
+
}
|
|
1734
1875
|
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
|
|
1876
|
+
inline static void ggml_vec_sgn_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1877
|
+
for (int i = 0; i < n; ++i) {
|
|
1878
|
+
float v = GGML_FP16_TO_FP32(x[i]);
|
|
1879
|
+
y[i] = GGML_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f));
|
|
1880
|
+
}
|
|
1881
|
+
}
|
|
1735
1882
|
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
|
|
1883
|
+
inline static void ggml_vec_step_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1884
|
+
for (int i = 0; i < n; ++i) {
|
|
1885
|
+
y[i] = GGML_FP32_TO_FP16((GGML_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f);
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1736
1888
|
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
|
|
1889
|
+
inline static void ggml_vec_tanh_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1890
|
+
for (int i = 0; i < n; ++i) {
|
|
1891
|
+
y[i] = GGML_FP32_TO_FP16(tanhf(GGML_FP16_TO_FP32(x[i])));
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1737
1894
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
|
|
1895
|
+
inline static void ggml_vec_elu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1896
|
+
for (int i = 0; i < n; ++i) {
|
|
1897
|
+
y[i] = GGML_FP32_TO_FP16(expm1f(GGML_FP16_TO_FP32(x[i])));
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1738
1900
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
|
1901
|
+
inline static void ggml_vec_relu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1902
|
+
for (int i = 0; i < n; ++i) {
|
|
1903
|
+
float v = GGML_FP16_TO_FP32(x[i]);
|
|
1904
|
+
y[i] = GGML_FP32_TO_FP16((v > 0.f) ? v : 0.f);
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1739
1907
|
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
|
1908
|
+
inline static void ggml_vec_leaky_relu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const float ns) {
|
|
1909
|
+
for (int i = 0; i < n; ++i) {
|
|
1910
|
+
float v = GGML_FP16_TO_FP32(x[i]);
|
|
1911
|
+
y[i] = GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1740
1914
|
inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
|
|
1915
|
+
inline static void ggml_vec_sigmoid_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1916
|
+
for (int i = 0; i < n; ++i) {
|
|
1917
|
+
y[i] = GGML_FP32_TO_FP16(1.f / (1.f + expf(-GGML_FP16_TO_FP32(x[i]))));
|
|
1918
|
+
}
|
|
1919
|
+
}
|
|
1741
1920
|
// TODO: optimize performance
|
|
1742
1921
|
inline static void ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
|
1922
|
+
inline static void ggml_vec_hardswish_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1923
|
+
for (int i = 0; i < n; ++i) {
|
|
1924
|
+
float v = GGML_FP16_TO_FP32(x[i]);
|
|
1925
|
+
y[i] = GGML_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f)));
|
|
1926
|
+
}
|
|
1927
|
+
}
|
|
1743
1928
|
inline static void ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
|
1929
|
+
inline static void ggml_vec_hardsigmoid_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1930
|
+
for (int i = 0; i < n; ++i) {
|
|
1931
|
+
y[i] = GGML_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (GGML_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f)));
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1744
1934
|
inline static void ggml_vec_exp_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = expf(x[i]); }
|
|
1935
|
+
inline static void ggml_vec_exp_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
1936
|
+
for (int i = 0; i < n; ++i) {
|
|
1937
|
+
y[i] = GGML_FP32_TO_FP16(expf(GGML_FP16_TO_FP32(x[i])));
|
|
1938
|
+
}
|
|
1939
|
+
}
|
|
1745
1940
|
|
|
1746
1941
|
static const float GELU_COEF_A = 0.044715f;
|
|
1747
1942
|
static const float GELU_QUICK_COEF = -1.702f;
|
|
@@ -1809,10 +2004,21 @@ inline static void ggml_vec_gelu_quick_f32(const int n, float * y, const float *
|
|
|
1809
2004
|
}
|
|
1810
2005
|
#endif
|
|
1811
2006
|
|
|
2007
|
+
inline static void ggml_vec_gelu_quick_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
2008
|
+
for (int i = 0; i < n; ++i) {
|
|
2009
|
+
float v = GGML_FP16_TO_FP32(x[i]);
|
|
2010
|
+
y[i] = GGML_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v))));
|
|
2011
|
+
}
|
|
2012
|
+
}
|
|
2013
|
+
|
|
1812
2014
|
// Sigmoid Linear Unit (SiLU) function
|
|
1813
2015
|
inline static float ggml_silu_f32(float x) {
|
|
1814
2016
|
return x/(1.0f + expf(-x));
|
|
1815
2017
|
}
|
|
2018
|
+
inline static ggml_fp16_t ggml_silu_f16(ggml_fp16_t x) {
|
|
2019
|
+
float v = GGML_FP16_TO_FP32(x);
|
|
2020
|
+
return GGML_FP32_TO_FP16(v/(1.0f + expf(-v)));
|
|
2021
|
+
}
|
|
1816
2022
|
|
|
1817
2023
|
#if __FINITE_MATH_ONLY__
|
|
1818
2024
|
#error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
|
|
@@ -2036,6 +2242,12 @@ static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
|
|
|
2036
2242
|
}
|
|
2037
2243
|
}
|
|
2038
2244
|
|
|
2245
|
+
inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
|
|
2246
|
+
for (int i = 0; i < n; ++i) {
|
|
2247
|
+
y[i] = ggml_silu_f16(x[i]);
|
|
2248
|
+
}
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2039
2251
|
static ggml_float ggml_vec_soft_max_f32(const int n, float * y, const float * x, float max) {
|
|
2040
2252
|
int i = 0;
|
|
2041
2253
|
ggml_float sum = 0;
|
|
@@ -2107,12 +2319,24 @@ inline static float ggml_silu_backward_f32(float x, float dy) {
|
|
|
2107
2319
|
return dy*s*(1.0f + x*(1.0f - s));
|
|
2108
2320
|
}
|
|
2109
2321
|
|
|
2322
|
+
inline static ggml_fp16_t ggml_silu_backward_f16(ggml_fp16_t x, ggml_fp16_t dy) {
|
|
2323
|
+
const float v = GGML_FP16_TO_FP32(x);
|
|
2324
|
+
const float s = 1.0f/(1.0f + expf(-v));
|
|
2325
|
+
return GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s)));
|
|
2326
|
+
}
|
|
2327
|
+
|
|
2110
2328
|
inline static void ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) {
|
|
2111
2329
|
for (int i = 0; i < n; ++i) {
|
|
2112
2330
|
dx[i] = ggml_silu_backward_f32(x[i], dy[i]);
|
|
2113
2331
|
}
|
|
2114
2332
|
}
|
|
2115
2333
|
|
|
2334
|
+
inline static void ggml_vec_silu_backward_f16(const int n, ggml_fp16_t * dx, const ggml_fp16_t * x, const ggml_fp16_t * dy) {
|
|
2335
|
+
for (int i = 0; i < n; ++i) {
|
|
2336
|
+
dx[i] = ggml_silu_backward_f16(x[i], dy[i]);
|
|
2337
|
+
}
|
|
2338
|
+
}
|
|
2339
|
+
|
|
2116
2340
|
inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
2117
2341
|
#ifndef GGML_USE_ACCELERATE
|
|
2118
2342
|
ggml_float sum = 0.0;
|
|
@@ -2381,15 +2605,20 @@ bool ggml_is_numa(void) {
|
|
|
2381
2605
|
#define HWCAP2_I8MM (1 << 13)
|
|
2382
2606
|
#endif
|
|
2383
2607
|
|
|
2608
|
+
#if !defined(HWCAP2_SME)
|
|
2609
|
+
#define HWCAP2_SME (1 << 23)
|
|
2610
|
+
#endif
|
|
2611
|
+
|
|
2384
2612
|
static void ggml_init_arm_arch_features(void) {
|
|
2385
2613
|
#if defined(__linux__) && defined(__aarch64__)
|
|
2386
2614
|
uint32_t hwcap = getauxval(AT_HWCAP);
|
|
2387
2615
|
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
2388
2616
|
|
|
2389
|
-
ggml_arm_arch_features.has_neon
|
|
2617
|
+
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
|
2390
2618
|
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
|
2391
|
-
ggml_arm_arch_features.has_i8mm
|
|
2392
|
-
ggml_arm_arch_features.has_sve
|
|
2619
|
+
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
2620
|
+
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
|
2621
|
+
ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
|
|
2393
2622
|
|
|
2394
2623
|
#if defined(__ARM_FEATURE_SVE)
|
|
2395
2624
|
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
@@ -2412,6 +2641,11 @@ static void ggml_init_arm_arch_features(void) {
|
|
|
2412
2641
|
}
|
|
2413
2642
|
ggml_arm_arch_features.has_i8mm = oldp;
|
|
2414
2643
|
|
|
2644
|
+
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
|
|
2645
|
+
oldp = 0;
|
|
2646
|
+
}
|
|
2647
|
+
ggml_arm_arch_features.has_sme = oldp;
|
|
2648
|
+
|
|
2415
2649
|
ggml_arm_arch_features.has_sve = 0;
|
|
2416
2650
|
ggml_arm_arch_features.sve_cnt = 0;
|
|
2417
2651
|
#else
|
|
@@ -2435,6 +2669,12 @@ static void ggml_init_arm_arch_features(void) {
|
|
|
2435
2669
|
ggml_arm_arch_features.has_sve = 0;
|
|
2436
2670
|
ggml_arm_arch_features.sve_cnt = 0;
|
|
2437
2671
|
#endif
|
|
2672
|
+
|
|
2673
|
+
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
|
|
2674
|
+
ggml_arm_arch_features.has_sme = 1;
|
|
2675
|
+
#else
|
|
2676
|
+
ggml_arm_arch_features.has_sme = 0;
|
|
2677
|
+
#endif
|
|
2438
2678
|
#endif
|
|
2439
2679
|
}
|
|
2440
2680
|
#endif
|
|
@@ -4279,7 +4519,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|
|
4279
4519
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
4280
4520
|
const struct ggml_tensor * src1 = dst->src[1];
|
|
4281
4521
|
|
|
4282
|
-
GGML_ASSERT(
|
|
4522
|
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
4283
4523
|
|
|
4284
4524
|
const int ith = params->ith;
|
|
4285
4525
|
const int nth = params->nth;
|
|
@@ -4304,17 +4544,22 @@ static void ggml_compute_forward_add_f16_f16(
|
|
|
4304
4544
|
|
|
4305
4545
|
if (nb10 == sizeof(ggml_fp16_t)) {
|
|
4306
4546
|
for (int ir = ir0; ir < ir1; ++ir) {
|
|
4307
|
-
// src0
|
|
4308
|
-
const
|
|
4309
|
-
const
|
|
4310
|
-
const
|
|
4547
|
+
// src1 is broadcastable across src0 and dst in i1, i2, i3
|
|
4548
|
+
const int64_t i03 = ir/(ne02*ne01);
|
|
4549
|
+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
|
4550
|
+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
|
4551
|
+
|
|
4552
|
+
const int64_t i13 = i03 % ne13;
|
|
4553
|
+
const int64_t i12 = i02 % ne12;
|
|
4554
|
+
const int64_t i11 = i01 % ne11;
|
|
4555
|
+
const int64_t nr0 = ne00 / ne10;
|
|
4311
4556
|
|
|
4312
|
-
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data +
|
|
4313
|
-
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data +
|
|
4314
|
-
ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data +
|
|
4557
|
+
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
|
|
4558
|
+
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
|
|
4559
|
+
ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
|
|
4315
4560
|
|
|
4316
|
-
for (
|
|
4317
|
-
dst_ptr
|
|
4561
|
+
for (int64_t r = 0; r < nr0; ++r) {
|
|
4562
|
+
ggml_vec_add_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
|
|
4318
4563
|
}
|
|
4319
4564
|
}
|
|
4320
4565
|
}
|
|
@@ -5102,6 +5347,62 @@ static void ggml_compute_forward_sub_f32(
|
|
|
5102
5347
|
}
|
|
5103
5348
|
}
|
|
5104
5349
|
|
|
5350
|
+
static void ggml_compute_forward_sub_f16(
|
|
5351
|
+
const struct ggml_compute_params * params,
|
|
5352
|
+
struct ggml_tensor * dst) {
|
|
5353
|
+
|
|
5354
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5355
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
5356
|
+
|
|
5357
|
+
assert(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
5358
|
+
|
|
5359
|
+
const int ith = params->ith;
|
|
5360
|
+
const int nth = params->nth;
|
|
5361
|
+
|
|
5362
|
+
const int nr = ggml_nrows(src0);
|
|
5363
|
+
|
|
5364
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
5365
|
+
|
|
5366
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
5367
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
|
5368
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F16);
|
|
5369
|
+
|
|
5370
|
+
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
|
|
5371
|
+
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
5372
|
+
|
|
5373
|
+
// rows per thread
|
|
5374
|
+
const int dr = (nr + nth - 1)/nth;
|
|
5375
|
+
|
|
5376
|
+
// row range for this thread
|
|
5377
|
+
const int ir0 = dr*ith;
|
|
5378
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
|
5379
|
+
|
|
5380
|
+
if (nb10 == sizeof(ggml_fp16_t)) {
|
|
5381
|
+
for (int ir = ir0; ir < ir1; ++ir) {
|
|
5382
|
+
// src1 is broadcastable across src0 and dst in i1, i2, i3
|
|
5383
|
+
const int64_t i03 = ir/(ne02*ne01);
|
|
5384
|
+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
|
5385
|
+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
|
5386
|
+
|
|
5387
|
+
const int64_t i13 = i03 % ne13;
|
|
5388
|
+
const int64_t i12 = i02 % ne12;
|
|
5389
|
+
const int64_t i11 = i01 % ne11;
|
|
5390
|
+
const int64_t nr0 = ne00 / ne10;
|
|
5391
|
+
|
|
5392
|
+
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
|
|
5393
|
+
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
|
|
5394
|
+
ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
|
|
5395
|
+
|
|
5396
|
+
for (int64_t r = 0; r < nr0; ++r) {
|
|
5397
|
+
ggml_vec_sub_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
|
|
5398
|
+
}
|
|
5399
|
+
}
|
|
5400
|
+
} else {
|
|
5401
|
+
// src1 is not contiguous
|
|
5402
|
+
GGML_ABORT("unimplemented error");
|
|
5403
|
+
}
|
|
5404
|
+
}
|
|
5405
|
+
|
|
5105
5406
|
static void ggml_compute_forward_sub(
|
|
5106
5407
|
const struct ggml_compute_params * params,
|
|
5107
5408
|
struct ggml_tensor * dst) {
|
|
@@ -5113,6 +5414,10 @@ static void ggml_compute_forward_sub(
|
|
|
5113
5414
|
{
|
|
5114
5415
|
ggml_compute_forward_sub_f32(params, dst);
|
|
5115
5416
|
} break;
|
|
5417
|
+
case GGML_TYPE_F16:
|
|
5418
|
+
{
|
|
5419
|
+
ggml_compute_forward_sub_f16(params, dst);
|
|
5420
|
+
} break;
|
|
5116
5421
|
default:
|
|
5117
5422
|
{
|
|
5118
5423
|
GGML_ABORT("fatal error");
|
|
@@ -5193,6 +5498,55 @@ static void ggml_compute_forward_mul_f32(
|
|
|
5193
5498
|
}
|
|
5194
5499
|
}
|
|
5195
5500
|
|
|
5501
|
+
static void ggml_compute_forward_mul_f16(
|
|
5502
|
+
const struct ggml_compute_params * params,
|
|
5503
|
+
struct ggml_tensor * dst) {
|
|
5504
|
+
|
|
5505
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5506
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
5507
|
+
|
|
5508
|
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
5509
|
+
|
|
5510
|
+
const int ith = params->ith;
|
|
5511
|
+
const int nth = params->nth;
|
|
5512
|
+
|
|
5513
|
+
const int64_t nr = ggml_nrows(src0);
|
|
5514
|
+
|
|
5515
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
5516
|
+
|
|
5517
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
5518
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
|
5519
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F16);
|
|
5520
|
+
|
|
5521
|
+
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
|
|
5522
|
+
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
5523
|
+
|
|
5524
|
+
if (nb10 == sizeof(ggml_fp16_t)) {
|
|
5525
|
+
for (int64_t ir = ith; ir < nr; ir += nth) {
|
|
5526
|
+
// src0 and dst are same shape => same indices
|
|
5527
|
+
const int64_t i03 = ir/(ne02*ne01);
|
|
5528
|
+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
|
5529
|
+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
|
5530
|
+
|
|
5531
|
+
const int64_t i13 = i03 % ne13;
|
|
5532
|
+
const int64_t i12 = i02 % ne12;
|
|
5533
|
+
const int64_t i11 = i01 % ne11;
|
|
5534
|
+
const int64_t nr0 = ne00 / ne10;
|
|
5535
|
+
|
|
5536
|
+
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
|
|
5537
|
+
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
|
|
5538
|
+
ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
|
|
5539
|
+
|
|
5540
|
+
for (int64_t r = 0 ; r < nr0; ++r) {
|
|
5541
|
+
ggml_vec_mul_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
|
|
5542
|
+
}
|
|
5543
|
+
}
|
|
5544
|
+
} else {
|
|
5545
|
+
// src1 is not contiguous
|
|
5546
|
+
GGML_ABORT("unimplemented error");
|
|
5547
|
+
}
|
|
5548
|
+
}
|
|
5549
|
+
|
|
5196
5550
|
static void ggml_compute_forward_mul(
|
|
5197
5551
|
const struct ggml_compute_params * params,
|
|
5198
5552
|
struct ggml_tensor * dst) {
|
|
@@ -5200,13 +5554,17 @@ static void ggml_compute_forward_mul(
|
|
|
5200
5554
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
5201
5555
|
const struct ggml_tensor * src1 = dst->src[1];
|
|
5202
5556
|
|
|
5203
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
|
|
5557
|
+
GGML_ASSERT((src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16) && "only f32/f16 src1 supported for now");
|
|
5204
5558
|
|
|
5205
5559
|
switch (src0->type) {
|
|
5206
5560
|
case GGML_TYPE_F32:
|
|
5207
5561
|
{
|
|
5208
5562
|
ggml_compute_forward_mul_f32(params, dst);
|
|
5209
5563
|
} break;
|
|
5564
|
+
case GGML_TYPE_F16:
|
|
5565
|
+
{
|
|
5566
|
+
ggml_compute_forward_mul_f16(params, dst);
|
|
5567
|
+
} break;
|
|
5210
5568
|
default:
|
|
5211
5569
|
{
|
|
5212
5570
|
GGML_ABORT("fatal error");
|
|
@@ -5287,18 +5645,71 @@ static void ggml_compute_forward_div_f32(
|
|
|
5287
5645
|
}
|
|
5288
5646
|
}
|
|
5289
5647
|
|
|
5290
|
-
static void
|
|
5291
|
-
|
|
5292
|
-
|
|
5648
|
+
static void ggml_compute_forward_div_f16(
|
|
5649
|
+
const struct ggml_compute_params * params,
|
|
5650
|
+
struct ggml_tensor * dst) {
|
|
5293
5651
|
|
|
5294
5652
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
5653
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
5295
5654
|
|
|
5296
|
-
|
|
5297
|
-
|
|
5298
|
-
|
|
5299
|
-
|
|
5300
|
-
|
|
5301
|
-
|
|
5655
|
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
5656
|
+
|
|
5657
|
+
const int ith = params->ith;
|
|
5658
|
+
const int nth = params->nth;
|
|
5659
|
+
|
|
5660
|
+
const int64_t nr = ggml_nrows(src0);
|
|
5661
|
+
|
|
5662
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
5663
|
+
|
|
5664
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
5665
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
|
5666
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F16);
|
|
5667
|
+
|
|
5668
|
+
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
|
|
5669
|
+
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
5670
|
+
|
|
5671
|
+
if (nb10 == sizeof(ggml_fp16_t)) {
|
|
5672
|
+
for (int64_t ir = ith; ir < nr; ir += nth) {
|
|
5673
|
+
// src0 and dst are same shape => same indices
|
|
5674
|
+
const int64_t i03 = ir/(ne02*ne01);
|
|
5675
|
+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
|
5676
|
+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
|
5677
|
+
|
|
5678
|
+
const int64_t i13 = i03 % ne13;
|
|
5679
|
+
const int64_t i12 = i02 % ne12;
|
|
5680
|
+
const int64_t i11 = i01 % ne11;
|
|
5681
|
+
const int64_t nr0 = ne00 / ne10;
|
|
5682
|
+
|
|
5683
|
+
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 );
|
|
5684
|
+
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
|
|
5685
|
+
ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
|
|
5686
|
+
|
|
5687
|
+
for (int64_t r = 0; r < nr0; ++r) {
|
|
5688
|
+
ggml_vec_div_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr);
|
|
5689
|
+
}
|
|
5690
|
+
}
|
|
5691
|
+
} else {
|
|
5692
|
+
// src1 is not contiguous
|
|
5693
|
+
GGML_ABORT("unimplemented error");
|
|
5694
|
+
}
|
|
5695
|
+
}
|
|
5696
|
+
|
|
5697
|
+
static void ggml_compute_forward_div(
|
|
5698
|
+
const struct ggml_compute_params * params,
|
|
5699
|
+
struct ggml_tensor * dst) {
|
|
5700
|
+
|
|
5701
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5702
|
+
|
|
5703
|
+
switch (src0->type) {
|
|
5704
|
+
case GGML_TYPE_F32:
|
|
5705
|
+
{
|
|
5706
|
+
ggml_compute_forward_div_f32(params, dst);
|
|
5707
|
+
} break;
|
|
5708
|
+
case GGML_TYPE_F16:
|
|
5709
|
+
{
|
|
5710
|
+
ggml_compute_forward_div_f16(params, dst);
|
|
5711
|
+
} break;
|
|
5712
|
+
default:
|
|
5302
5713
|
{
|
|
5303
5714
|
GGML_ABORT("fatal error");
|
|
5304
5715
|
}
|
|
@@ -5332,6 +5743,31 @@ static void ggml_compute_forward_sqr_f32(
|
|
|
5332
5743
|
}
|
|
5333
5744
|
}
|
|
5334
5745
|
|
|
5746
|
+
static void ggml_compute_forward_sqr_f16(
|
|
5747
|
+
const struct ggml_compute_params * params,
|
|
5748
|
+
struct ggml_tensor * dst) {
|
|
5749
|
+
|
|
5750
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5751
|
+
|
|
5752
|
+
if (params->ith != 0) {
|
|
5753
|
+
return;
|
|
5754
|
+
}
|
|
5755
|
+
|
|
5756
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
5757
|
+
|
|
5758
|
+
const int n = ggml_nrows(src0);
|
|
5759
|
+
const int nc = src0->ne[0];
|
|
5760
|
+
|
|
5761
|
+
assert( dst->nb[0] == sizeof(ggml_fp16_t));
|
|
5762
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
|
5763
|
+
|
|
5764
|
+
for (int i = 0; i < n; i++) {
|
|
5765
|
+
ggml_vec_sqr_f16(nc,
|
|
5766
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
5767
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
5768
|
+
}
|
|
5769
|
+
}
|
|
5770
|
+
|
|
5335
5771
|
static void ggml_compute_forward_sqr(
|
|
5336
5772
|
const struct ggml_compute_params * params,
|
|
5337
5773
|
struct ggml_tensor * dst) {
|
|
@@ -5343,6 +5779,10 @@ static void ggml_compute_forward_sqr(
|
|
|
5343
5779
|
{
|
|
5344
5780
|
ggml_compute_forward_sqr_f32(params, dst);
|
|
5345
5781
|
} break;
|
|
5782
|
+
case GGML_TYPE_F16:
|
|
5783
|
+
{
|
|
5784
|
+
ggml_compute_forward_sqr_f16(params, dst);
|
|
5785
|
+
} break;
|
|
5346
5786
|
default:
|
|
5347
5787
|
{
|
|
5348
5788
|
GGML_ABORT("fatal error");
|
|
@@ -5377,6 +5817,31 @@ static void ggml_compute_forward_sqrt_f32(
|
|
|
5377
5817
|
}
|
|
5378
5818
|
}
|
|
5379
5819
|
|
|
5820
|
+
static void ggml_compute_forward_sqrt_f16(
|
|
5821
|
+
const struct ggml_compute_params * params,
|
|
5822
|
+
struct ggml_tensor * dst) {
|
|
5823
|
+
|
|
5824
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5825
|
+
|
|
5826
|
+
if (params->ith != 0) {
|
|
5827
|
+
return;
|
|
5828
|
+
}
|
|
5829
|
+
|
|
5830
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
5831
|
+
|
|
5832
|
+
const int n = ggml_nrows(src0);
|
|
5833
|
+
const int nc = src0->ne[0];
|
|
5834
|
+
|
|
5835
|
+
assert( dst->nb[0] == sizeof(ggml_fp16_t));
|
|
5836
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
|
5837
|
+
|
|
5838
|
+
for (int i = 0; i < n; i++) {
|
|
5839
|
+
ggml_vec_sqrt_f16(nc,
|
|
5840
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
5841
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
5842
|
+
}
|
|
5843
|
+
}
|
|
5844
|
+
|
|
5380
5845
|
static void ggml_compute_forward_sqrt(
|
|
5381
5846
|
const struct ggml_compute_params * params,
|
|
5382
5847
|
struct ggml_tensor * dst) {
|
|
@@ -5388,6 +5853,10 @@ static void ggml_compute_forward_sqrt(
|
|
|
5388
5853
|
{
|
|
5389
5854
|
ggml_compute_forward_sqrt_f32(params, dst);
|
|
5390
5855
|
} break;
|
|
5856
|
+
case GGML_TYPE_F16:
|
|
5857
|
+
{
|
|
5858
|
+
ggml_compute_forward_sqrt_f16(params, dst);
|
|
5859
|
+
} break;
|
|
5391
5860
|
default:
|
|
5392
5861
|
{
|
|
5393
5862
|
GGML_ABORT("fatal error");
|
|
@@ -5422,6 +5891,31 @@ static void ggml_compute_forward_log_f32(
|
|
|
5422
5891
|
}
|
|
5423
5892
|
}
|
|
5424
5893
|
|
|
5894
|
+
static void ggml_compute_forward_log_f16(
|
|
5895
|
+
const struct ggml_compute_params * params,
|
|
5896
|
+
struct ggml_tensor * dst) {
|
|
5897
|
+
|
|
5898
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5899
|
+
|
|
5900
|
+
if (params->ith != 0) {
|
|
5901
|
+
return;
|
|
5902
|
+
}
|
|
5903
|
+
|
|
5904
|
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
5905
|
+
|
|
5906
|
+
const int n = ggml_nrows(src0);
|
|
5907
|
+
const int nc = src0->ne[0];
|
|
5908
|
+
|
|
5909
|
+
GGML_ASSERT( dst->nb[0] == sizeof(ggml_fp16_t));
|
|
5910
|
+
GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t));
|
|
5911
|
+
|
|
5912
|
+
for (int i = 0; i < n; i++) {
|
|
5913
|
+
ggml_vec_log_f16(nc,
|
|
5914
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
5915
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
5916
|
+
}
|
|
5917
|
+
}
|
|
5918
|
+
|
|
5425
5919
|
static void ggml_compute_forward_log(
|
|
5426
5920
|
const struct ggml_compute_params * params,
|
|
5427
5921
|
struct ggml_tensor * dst) {
|
|
@@ -5433,6 +5927,10 @@ static void ggml_compute_forward_log(
|
|
|
5433
5927
|
{
|
|
5434
5928
|
ggml_compute_forward_log_f32(params, dst);
|
|
5435
5929
|
} break;
|
|
5930
|
+
case GGML_TYPE_F16:
|
|
5931
|
+
{
|
|
5932
|
+
ggml_compute_forward_log_f16(params, dst);
|
|
5933
|
+
} break;
|
|
5436
5934
|
default:
|
|
5437
5935
|
{
|
|
5438
5936
|
GGML_ABORT("fatal error");
|
|
@@ -5467,6 +5965,31 @@ static void ggml_compute_forward_sin_f32(
|
|
|
5467
5965
|
}
|
|
5468
5966
|
}
|
|
5469
5967
|
|
|
5968
|
+
static void ggml_compute_forward_sin_f16(
|
|
5969
|
+
const struct ggml_compute_params * params,
|
|
5970
|
+
struct ggml_tensor * dst) {
|
|
5971
|
+
|
|
5972
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
5973
|
+
|
|
5974
|
+
if (params->ith != 0) {
|
|
5975
|
+
return;
|
|
5976
|
+
}
|
|
5977
|
+
|
|
5978
|
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
5979
|
+
|
|
5980
|
+
const int n = ggml_nrows(src0);
|
|
5981
|
+
const int nc = src0->ne[0];
|
|
5982
|
+
|
|
5983
|
+
GGML_ASSERT( dst->nb[0] == sizeof(ggml_fp16_t));
|
|
5984
|
+
GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t));
|
|
5985
|
+
|
|
5986
|
+
for (int i = 0; i < n; i++) {
|
|
5987
|
+
ggml_vec_sin_f16(nc,
|
|
5988
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
5989
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
5990
|
+
}
|
|
5991
|
+
}
|
|
5992
|
+
|
|
5470
5993
|
static void ggml_compute_forward_sin(
|
|
5471
5994
|
const struct ggml_compute_params * params,
|
|
5472
5995
|
struct ggml_tensor * dst) {
|
|
@@ -5478,6 +6001,10 @@ static void ggml_compute_forward_sin(
|
|
|
5478
6001
|
{
|
|
5479
6002
|
ggml_compute_forward_sin_f32(params, dst);
|
|
5480
6003
|
} break;
|
|
6004
|
+
case GGML_TYPE_F16:
|
|
6005
|
+
{
|
|
6006
|
+
ggml_compute_forward_sin_f16(params, dst);
|
|
6007
|
+
} break;
|
|
5481
6008
|
default:
|
|
5482
6009
|
{
|
|
5483
6010
|
GGML_ABORT("fatal error");
|
|
@@ -5512,6 +6039,31 @@ static void ggml_compute_forward_cos_f32(
|
|
|
5512
6039
|
}
|
|
5513
6040
|
}
|
|
5514
6041
|
|
|
6042
|
+
static void ggml_compute_forward_cos_f16(
|
|
6043
|
+
const struct ggml_compute_params * params,
|
|
6044
|
+
struct ggml_tensor * dst) {
|
|
6045
|
+
|
|
6046
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6047
|
+
|
|
6048
|
+
if (params->ith != 0) {
|
|
6049
|
+
return;
|
|
6050
|
+
}
|
|
6051
|
+
|
|
6052
|
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
|
6053
|
+
|
|
6054
|
+
const int n = ggml_nrows(src0);
|
|
6055
|
+
const int nc = src0->ne[0];
|
|
6056
|
+
|
|
6057
|
+
GGML_ASSERT( dst->nb[0] == sizeof(ggml_fp16_t));
|
|
6058
|
+
GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t));
|
|
6059
|
+
|
|
6060
|
+
for (int i = 0; i < n; i++) {
|
|
6061
|
+
ggml_vec_cos_f16(nc,
|
|
6062
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
6063
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
6064
|
+
}
|
|
6065
|
+
}
|
|
6066
|
+
|
|
5515
6067
|
static void ggml_compute_forward_cos(
|
|
5516
6068
|
const struct ggml_compute_params * params,
|
|
5517
6069
|
struct ggml_tensor * dst) {
|
|
@@ -5523,6 +6075,10 @@ static void ggml_compute_forward_cos(
|
|
|
5523
6075
|
{
|
|
5524
6076
|
ggml_compute_forward_cos_f32(params, dst);
|
|
5525
6077
|
} break;
|
|
6078
|
+
case GGML_TYPE_F16:
|
|
6079
|
+
{
|
|
6080
|
+
ggml_compute_forward_cos_f16(params, dst);
|
|
6081
|
+
} break;
|
|
5526
6082
|
default:
|
|
5527
6083
|
{
|
|
5528
6084
|
GGML_ABORT("fatal error");
|
|
@@ -6092,14 +6648,14 @@ static void ggml_compute_forward_repeat_back(
|
|
|
6092
6648
|
|
|
6093
6649
|
// ggml_compute_forward_concat
|
|
6094
6650
|
|
|
6095
|
-
static void
|
|
6651
|
+
static void ggml_compute_forward_concat_any(
|
|
6096
6652
|
const struct ggml_compute_params * params,
|
|
6097
6653
|
struct ggml_tensor * dst) {
|
|
6098
6654
|
|
|
6099
6655
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
6100
6656
|
const struct ggml_tensor * src1 = dst->src[1];
|
|
6101
6657
|
|
|
6102
|
-
|
|
6658
|
+
const size_t len = ggml_type_size(src0->type);
|
|
6103
6659
|
|
|
6104
6660
|
const int ith = params->ith;
|
|
6105
6661
|
const int nth = params->nth;
|
|
@@ -6113,7 +6669,7 @@ static void ggml_compute_forward_concat_f32(
|
|
|
6113
6669
|
int64_t o[4] = {0, 0, 0, 0};
|
|
6114
6670
|
o[dim] = src0->ne[dim];
|
|
6115
6671
|
|
|
6116
|
-
const
|
|
6672
|
+
const char * x;
|
|
6117
6673
|
|
|
6118
6674
|
// TODO: smarter multi-theading
|
|
6119
6675
|
for (int i3 = 0; i3 < ne3; i3++) {
|
|
@@ -6121,101 +6677,268 @@ static void ggml_compute_forward_concat_f32(
|
|
|
6121
6677
|
for (int i1 = 0; i1 < ne1; i1++) {
|
|
6122
6678
|
for (int i0 = 0; i0 < ne0; i0++) {
|
|
6123
6679
|
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
|
6124
|
-
x = (const
|
|
6680
|
+
x = (const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03;
|
|
6125
6681
|
} else {
|
|
6126
|
-
x = (const
|
|
6682
|
+
x = (const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13;
|
|
6127
6683
|
}
|
|
6128
6684
|
|
|
6129
|
-
|
|
6685
|
+
char * y = (char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3;
|
|
6130
6686
|
|
|
6131
|
-
|
|
6687
|
+
memcpy(y, x, len);
|
|
6132
6688
|
}
|
|
6133
6689
|
}
|
|
6134
6690
|
}
|
|
6135
6691
|
}
|
|
6136
6692
|
}
|
|
6137
6693
|
|
|
6138
|
-
static void
|
|
6694
|
+
static void ggml_compute_forward_concat_i8(
|
|
6139
6695
|
const struct ggml_compute_params * params,
|
|
6140
6696
|
struct ggml_tensor * dst) {
|
|
6141
6697
|
|
|
6142
6698
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
6699
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
6143
6700
|
|
|
6144
|
-
|
|
6145
|
-
case GGML_TYPE_F32:
|
|
6146
|
-
case GGML_TYPE_I32:
|
|
6147
|
-
{
|
|
6148
|
-
ggml_compute_forward_concat_f32(params, dst);
|
|
6149
|
-
} break;
|
|
6150
|
-
default:
|
|
6151
|
-
{
|
|
6152
|
-
GGML_ABORT("fatal error");
|
|
6153
|
-
}
|
|
6154
|
-
}
|
|
6155
|
-
}
|
|
6156
|
-
|
|
6157
|
-
// ggml_compute_forward_abs
|
|
6701
|
+
GGML_ASSERT(ggml_type_size(src0->type) == sizeof(int8_t));
|
|
6158
6702
|
|
|
6159
|
-
|
|
6160
|
-
|
|
6161
|
-
struct ggml_tensor * dst) {
|
|
6703
|
+
const int ith = params->ith;
|
|
6704
|
+
const int nth = params->nth;
|
|
6162
6705
|
|
|
6163
|
-
|
|
6706
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
6164
6707
|
|
|
6165
|
-
|
|
6166
|
-
return;
|
|
6167
|
-
}
|
|
6708
|
+
const int32_t dim = ggml_get_op_params_i32(dst, 0);
|
|
6168
6709
|
|
|
6169
|
-
|
|
6170
|
-
assert(ggml_is_contiguous_1(dst));
|
|
6171
|
-
assert(ggml_are_same_shape(src0, dst));
|
|
6710
|
+
GGML_ASSERT(dim >= 0 && dim < 4);
|
|
6172
6711
|
|
|
6173
|
-
|
|
6174
|
-
|
|
6712
|
+
int64_t o[4] = {0, 0, 0, 0};
|
|
6713
|
+
o[dim] = src0->ne[dim];
|
|
6175
6714
|
|
|
6176
|
-
|
|
6177
|
-
ggml_vec_abs_f32(nc,
|
|
6178
|
-
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
6179
|
-
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
|
6180
|
-
}
|
|
6181
|
-
}
|
|
6715
|
+
const int8_t * x;
|
|
6182
6716
|
|
|
6183
|
-
|
|
6184
|
-
|
|
6185
|
-
|
|
6717
|
+
// TODO: smarter multi-theading
|
|
6718
|
+
for (int i3 = 0; i3 < ne3; i3++) {
|
|
6719
|
+
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
|
6720
|
+
for (int i1 = 0; i1 < ne1; i1++) {
|
|
6721
|
+
for (int i0 = 0; i0 < ne0; i0++) {
|
|
6722
|
+
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
|
6723
|
+
x = (const int8_t *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03);
|
|
6724
|
+
} else {
|
|
6725
|
+
x = (const int8_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
|
|
6726
|
+
}
|
|
6186
6727
|
|
|
6187
|
-
|
|
6728
|
+
int8_t * y = (int8_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
|
6188
6729
|
|
|
6189
|
-
|
|
6190
|
-
|
|
6191
|
-
{
|
|
6192
|
-
ggml_compute_forward_abs_f32(params, dst);
|
|
6193
|
-
} break;
|
|
6194
|
-
default:
|
|
6195
|
-
{
|
|
6196
|
-
GGML_ABORT("fatal error");
|
|
6730
|
+
*y = *x;
|
|
6731
|
+
}
|
|
6197
6732
|
}
|
|
6733
|
+
}
|
|
6198
6734
|
}
|
|
6199
6735
|
}
|
|
6200
6736
|
|
|
6201
|
-
|
|
6202
|
-
|
|
6203
|
-
|
|
6204
|
-
const struct ggml_compute_params * params,
|
|
6205
|
-
struct ggml_tensor * dst) {
|
|
6737
|
+
static void ggml_compute_forward_concat_f16(
|
|
6738
|
+
const struct ggml_compute_params * params,
|
|
6739
|
+
struct ggml_tensor * dst) {
|
|
6206
6740
|
|
|
6207
6741
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
6742
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
6208
6743
|
|
|
6209
|
-
|
|
6210
|
-
return;
|
|
6211
|
-
}
|
|
6744
|
+
GGML_ASSERT(ggml_type_size(src0->type) == sizeof(ggml_fp16_t));
|
|
6212
6745
|
|
|
6213
|
-
|
|
6214
|
-
|
|
6215
|
-
assert(ggml_are_same_shape(src0, dst));
|
|
6746
|
+
const int ith = params->ith;
|
|
6747
|
+
const int nth = params->nth;
|
|
6216
6748
|
|
|
6217
|
-
|
|
6218
|
-
|
|
6749
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
6750
|
+
|
|
6751
|
+
const int32_t dim = ggml_get_op_params_i32(dst, 0);
|
|
6752
|
+
|
|
6753
|
+
GGML_ASSERT(dim >= 0 && dim < 4);
|
|
6754
|
+
|
|
6755
|
+
int64_t o[4] = {0, 0, 0, 0};
|
|
6756
|
+
o[dim] = src0->ne[dim];
|
|
6757
|
+
|
|
6758
|
+
const ggml_fp16_t * x;
|
|
6759
|
+
|
|
6760
|
+
// TODO: smarter multi-theading
|
|
6761
|
+
for (int i3 = 0; i3 < ne3; i3++) {
|
|
6762
|
+
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
|
6763
|
+
for (int i1 = 0; i1 < ne1; i1++) {
|
|
6764
|
+
for (int i0 = 0; i0 < ne0; i0++) {
|
|
6765
|
+
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
|
6766
|
+
x = (const ggml_fp16_t *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03);
|
|
6767
|
+
} else {
|
|
6768
|
+
x = (const ggml_fp16_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
|
|
6769
|
+
}
|
|
6770
|
+
|
|
6771
|
+
ggml_fp16_t * y = (ggml_fp16_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
|
6772
|
+
|
|
6773
|
+
*y = *x;
|
|
6774
|
+
}
|
|
6775
|
+
}
|
|
6776
|
+
}
|
|
6777
|
+
}
|
|
6778
|
+
}
|
|
6779
|
+
|
|
6780
|
+
static void ggml_compute_forward_concat_f32(
|
|
6781
|
+
const struct ggml_compute_params * params,
|
|
6782
|
+
struct ggml_tensor * dst) {
|
|
6783
|
+
|
|
6784
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6785
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
6786
|
+
|
|
6787
|
+
GGML_ASSERT(ggml_type_size(src0->type) == sizeof(float));
|
|
6788
|
+
|
|
6789
|
+
const int ith = params->ith;
|
|
6790
|
+
const int nth = params->nth;
|
|
6791
|
+
|
|
6792
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
6793
|
+
|
|
6794
|
+
const int32_t dim = ggml_get_op_params_i32(dst, 0);
|
|
6795
|
+
|
|
6796
|
+
GGML_ASSERT(dim >= 0 && dim < 4);
|
|
6797
|
+
|
|
6798
|
+
int64_t o[4] = {0, 0, 0, 0};
|
|
6799
|
+
o[dim] = src0->ne[dim];
|
|
6800
|
+
|
|
6801
|
+
const float * x;
|
|
6802
|
+
|
|
6803
|
+
// TODO: smarter multi-theading
|
|
6804
|
+
for (int i3 = 0; i3 < ne3; i3++) {
|
|
6805
|
+
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
|
6806
|
+
for (int i1 = 0; i1 < ne1; i1++) {
|
|
6807
|
+
for (int i0 = 0; i0 < ne0; i0++) {
|
|
6808
|
+
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
|
6809
|
+
x = (const float *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03);
|
|
6810
|
+
} else {
|
|
6811
|
+
x = (const float *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
|
|
6812
|
+
}
|
|
6813
|
+
|
|
6814
|
+
float * y = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
|
6815
|
+
|
|
6816
|
+
*y = *x;
|
|
6817
|
+
}
|
|
6818
|
+
}
|
|
6819
|
+
}
|
|
6820
|
+
}
|
|
6821
|
+
}
|
|
6822
|
+
|
|
6823
|
+
static void ggml_compute_forward_concat(
|
|
6824
|
+
const struct ggml_compute_params * params,
|
|
6825
|
+
struct ggml_tensor * dst) {
|
|
6826
|
+
|
|
6827
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6828
|
+
|
|
6829
|
+
switch (src0->type) {
|
|
6830
|
+
case GGML_TYPE_F16:
|
|
6831
|
+
case GGML_TYPE_BF16:
|
|
6832
|
+
case GGML_TYPE_I16:
|
|
6833
|
+
{
|
|
6834
|
+
ggml_compute_forward_concat_f16(params, dst);
|
|
6835
|
+
} break;
|
|
6836
|
+
case GGML_TYPE_I8:
|
|
6837
|
+
{
|
|
6838
|
+
ggml_compute_forward_concat_i8(params, dst);
|
|
6839
|
+
} break;
|
|
6840
|
+
case GGML_TYPE_F32:
|
|
6841
|
+
case GGML_TYPE_I32:
|
|
6842
|
+
{
|
|
6843
|
+
ggml_compute_forward_concat_f32(params, dst);
|
|
6844
|
+
} break;
|
|
6845
|
+
default:
|
|
6846
|
+
{
|
|
6847
|
+
ggml_compute_forward_concat_any(params, dst);
|
|
6848
|
+
}
|
|
6849
|
+
}
|
|
6850
|
+
}
|
|
6851
|
+
|
|
6852
|
+
// ggml_compute_forward_abs
|
|
6853
|
+
|
|
6854
|
+
static void ggml_compute_forward_abs_f32(
|
|
6855
|
+
const struct ggml_compute_params * params,
|
|
6856
|
+
struct ggml_tensor * dst) {
|
|
6857
|
+
|
|
6858
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6859
|
+
|
|
6860
|
+
if (params->ith != 0) {
|
|
6861
|
+
return;
|
|
6862
|
+
}
|
|
6863
|
+
|
|
6864
|
+
assert(ggml_is_contiguous_1(src0));
|
|
6865
|
+
assert(ggml_is_contiguous_1(dst));
|
|
6866
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
6867
|
+
|
|
6868
|
+
const int n = ggml_nrows(src0);
|
|
6869
|
+
const int nc = src0->ne[0];
|
|
6870
|
+
|
|
6871
|
+
for (int i = 0; i < n; i++) {
|
|
6872
|
+
ggml_vec_abs_f32(nc,
|
|
6873
|
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
6874
|
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
|
6875
|
+
}
|
|
6876
|
+
}
|
|
6877
|
+
|
|
6878
|
+
static void ggml_compute_forward_abs_f16(
|
|
6879
|
+
const struct ggml_compute_params * params,
|
|
6880
|
+
struct ggml_tensor * dst) {
|
|
6881
|
+
|
|
6882
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6883
|
+
|
|
6884
|
+
if (params->ith != 0) {
|
|
6885
|
+
return;
|
|
6886
|
+
}
|
|
6887
|
+
|
|
6888
|
+
assert(ggml_is_contiguous_1(src0));
|
|
6889
|
+
assert(ggml_is_contiguous_1(dst));
|
|
6890
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
6891
|
+
|
|
6892
|
+
const int n = ggml_nrows(src0);
|
|
6893
|
+
const int nc = src0->ne[0];
|
|
6894
|
+
|
|
6895
|
+
for (int i = 0; i < n; i++) {
|
|
6896
|
+
ggml_vec_abs_f16(nc,
|
|
6897
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
6898
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
6899
|
+
}
|
|
6900
|
+
}
|
|
6901
|
+
|
|
6902
|
+
static void ggml_compute_forward_abs(
|
|
6903
|
+
const struct ggml_compute_params * params,
|
|
6904
|
+
struct ggml_tensor * dst) {
|
|
6905
|
+
|
|
6906
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6907
|
+
|
|
6908
|
+
switch (src0->type) {
|
|
6909
|
+
case GGML_TYPE_F32:
|
|
6910
|
+
{
|
|
6911
|
+
ggml_compute_forward_abs_f32(params, dst);
|
|
6912
|
+
} break;
|
|
6913
|
+
case GGML_TYPE_F16:
|
|
6914
|
+
{
|
|
6915
|
+
ggml_compute_forward_abs_f16(params, dst);
|
|
6916
|
+
} break;
|
|
6917
|
+
default:
|
|
6918
|
+
{
|
|
6919
|
+
GGML_ABORT("fatal error");
|
|
6920
|
+
}
|
|
6921
|
+
}
|
|
6922
|
+
}
|
|
6923
|
+
|
|
6924
|
+
// ggml_compute_forward_sgn
|
|
6925
|
+
|
|
6926
|
+
static void ggml_compute_forward_sgn_f32(
|
|
6927
|
+
const struct ggml_compute_params * params,
|
|
6928
|
+
struct ggml_tensor * dst) {
|
|
6929
|
+
|
|
6930
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6931
|
+
|
|
6932
|
+
if (params->ith != 0) {
|
|
6933
|
+
return;
|
|
6934
|
+
}
|
|
6935
|
+
|
|
6936
|
+
assert(ggml_is_contiguous_1(src0));
|
|
6937
|
+
assert(ggml_is_contiguous_1(dst));
|
|
6938
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
6939
|
+
|
|
6940
|
+
const int n = ggml_nrows(src0);
|
|
6941
|
+
const int nc = src0->ne[0];
|
|
6219
6942
|
|
|
6220
6943
|
for (int i = 0; i < n; i++) {
|
|
6221
6944
|
ggml_vec_sgn_f32(nc,
|
|
@@ -6224,6 +6947,30 @@ static void ggml_compute_forward_sgn_f32(
|
|
|
6224
6947
|
}
|
|
6225
6948
|
}
|
|
6226
6949
|
|
|
6950
|
+
static void ggml_compute_forward_sgn_f16(
|
|
6951
|
+
const struct ggml_compute_params * params,
|
|
6952
|
+
struct ggml_tensor * dst) {
|
|
6953
|
+
|
|
6954
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
6955
|
+
|
|
6956
|
+
if (params->ith != 0) {
|
|
6957
|
+
return;
|
|
6958
|
+
}
|
|
6959
|
+
|
|
6960
|
+
assert(ggml_is_contiguous_1(src0));
|
|
6961
|
+
assert(ggml_is_contiguous_1(dst));
|
|
6962
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
6963
|
+
|
|
6964
|
+
const int n = ggml_nrows(src0);
|
|
6965
|
+
const int nc = src0->ne[0];
|
|
6966
|
+
|
|
6967
|
+
for (int i = 0; i < n; i++) {
|
|
6968
|
+
ggml_vec_sgn_f16(nc,
|
|
6969
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
6970
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
6971
|
+
}
|
|
6972
|
+
}
|
|
6973
|
+
|
|
6227
6974
|
static void ggml_compute_forward_sgn(
|
|
6228
6975
|
const struct ggml_compute_params * params,
|
|
6229
6976
|
struct ggml_tensor * dst) {
|
|
@@ -6235,6 +6982,10 @@ static void ggml_compute_forward_sgn(
|
|
|
6235
6982
|
{
|
|
6236
6983
|
ggml_compute_forward_sgn_f32(params, dst);
|
|
6237
6984
|
} break;
|
|
6985
|
+
case GGML_TYPE_F16:
|
|
6986
|
+
{
|
|
6987
|
+
ggml_compute_forward_sgn_f16(params, dst);
|
|
6988
|
+
} break;
|
|
6238
6989
|
default:
|
|
6239
6990
|
{
|
|
6240
6991
|
GGML_ABORT("fatal error");
|
|
@@ -6268,6 +7019,30 @@ static void ggml_compute_forward_neg_f32(
|
|
|
6268
7019
|
}
|
|
6269
7020
|
}
|
|
6270
7021
|
|
|
7022
|
+
static void ggml_compute_forward_neg_f16(
|
|
7023
|
+
const struct ggml_compute_params * params,
|
|
7024
|
+
struct ggml_tensor * dst) {
|
|
7025
|
+
|
|
7026
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7027
|
+
|
|
7028
|
+
if (params->ith != 0) {
|
|
7029
|
+
return;
|
|
7030
|
+
}
|
|
7031
|
+
|
|
7032
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7033
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7034
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7035
|
+
|
|
7036
|
+
const int n = ggml_nrows(src0);
|
|
7037
|
+
const int nc = src0->ne[0];
|
|
7038
|
+
|
|
7039
|
+
for (int i = 0; i < n; i++) {
|
|
7040
|
+
ggml_vec_neg_f16(nc,
|
|
7041
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7042
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7043
|
+
}
|
|
7044
|
+
}
|
|
7045
|
+
|
|
6271
7046
|
static void ggml_compute_forward_neg(
|
|
6272
7047
|
const struct ggml_compute_params * params,
|
|
6273
7048
|
struct ggml_tensor * dst) {
|
|
@@ -6279,6 +7054,10 @@ static void ggml_compute_forward_neg(
|
|
|
6279
7054
|
{
|
|
6280
7055
|
ggml_compute_forward_neg_f32(params, dst);
|
|
6281
7056
|
} break;
|
|
7057
|
+
case GGML_TYPE_F16:
|
|
7058
|
+
{
|
|
7059
|
+
ggml_compute_forward_neg_f16(params, dst);
|
|
7060
|
+
} break;
|
|
6282
7061
|
default:
|
|
6283
7062
|
{
|
|
6284
7063
|
GGML_ABORT("fatal error");
|
|
@@ -6312,6 +7091,30 @@ static void ggml_compute_forward_step_f32(
|
|
|
6312
7091
|
}
|
|
6313
7092
|
}
|
|
6314
7093
|
|
|
7094
|
+
static void ggml_compute_forward_step_f16(
|
|
7095
|
+
const struct ggml_compute_params * params,
|
|
7096
|
+
struct ggml_tensor * dst) {
|
|
7097
|
+
|
|
7098
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7099
|
+
|
|
7100
|
+
if (params->ith != 0) {
|
|
7101
|
+
return;
|
|
7102
|
+
}
|
|
7103
|
+
|
|
7104
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7105
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7106
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7107
|
+
|
|
7108
|
+
const int n = ggml_nrows(src0);
|
|
7109
|
+
const int nc = src0->ne[0];
|
|
7110
|
+
|
|
7111
|
+
for (int i = 0; i < n; i++) {
|
|
7112
|
+
ggml_vec_step_f16(nc,
|
|
7113
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7114
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7115
|
+
}
|
|
7116
|
+
}
|
|
7117
|
+
|
|
6315
7118
|
static void ggml_compute_forward_step(
|
|
6316
7119
|
const struct ggml_compute_params * params,
|
|
6317
7120
|
struct ggml_tensor * dst) {
|
|
@@ -6323,6 +7126,10 @@ static void ggml_compute_forward_step(
|
|
|
6323
7126
|
{
|
|
6324
7127
|
ggml_compute_forward_step_f32(params, dst);
|
|
6325
7128
|
} break;
|
|
7129
|
+
case GGML_TYPE_F16:
|
|
7130
|
+
{
|
|
7131
|
+
ggml_compute_forward_step_f16(params, dst);
|
|
7132
|
+
} break;
|
|
6326
7133
|
default:
|
|
6327
7134
|
{
|
|
6328
7135
|
GGML_ABORT("fatal error");
|
|
@@ -6356,6 +7163,30 @@ static void ggml_compute_forward_tanh_f32(
|
|
|
6356
7163
|
}
|
|
6357
7164
|
}
|
|
6358
7165
|
|
|
7166
|
+
static void ggml_compute_forward_tanh_f16(
|
|
7167
|
+
const struct ggml_compute_params * params,
|
|
7168
|
+
struct ggml_tensor * dst) {
|
|
7169
|
+
|
|
7170
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7171
|
+
|
|
7172
|
+
if (params->ith != 0) {
|
|
7173
|
+
return;
|
|
7174
|
+
}
|
|
7175
|
+
|
|
7176
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7177
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7178
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7179
|
+
|
|
7180
|
+
const int n = ggml_nrows(src0);
|
|
7181
|
+
const int nc = src0->ne[0];
|
|
7182
|
+
|
|
7183
|
+
for (int i = 0; i < n; i++) {
|
|
7184
|
+
ggml_vec_tanh_f16(nc,
|
|
7185
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7186
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7187
|
+
}
|
|
7188
|
+
}
|
|
7189
|
+
|
|
6359
7190
|
static void ggml_compute_forward_tanh(
|
|
6360
7191
|
const struct ggml_compute_params * params,
|
|
6361
7192
|
struct ggml_tensor * dst) {
|
|
@@ -6367,6 +7198,10 @@ static void ggml_compute_forward_tanh(
|
|
|
6367
7198
|
{
|
|
6368
7199
|
ggml_compute_forward_tanh_f32(params, dst);
|
|
6369
7200
|
} break;
|
|
7201
|
+
case GGML_TYPE_F16:
|
|
7202
|
+
{
|
|
7203
|
+
ggml_compute_forward_tanh_f16(params, dst);
|
|
7204
|
+
} break;
|
|
6370
7205
|
default:
|
|
6371
7206
|
{
|
|
6372
7207
|
GGML_ABORT("fatal error");
|
|
@@ -6400,6 +7235,30 @@ static void ggml_compute_forward_elu_f32(
|
|
|
6400
7235
|
}
|
|
6401
7236
|
}
|
|
6402
7237
|
|
|
7238
|
+
static void ggml_compute_forward_elu_f16(
|
|
7239
|
+
const struct ggml_compute_params * params,
|
|
7240
|
+
struct ggml_tensor * dst) {
|
|
7241
|
+
|
|
7242
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7243
|
+
|
|
7244
|
+
if (params->ith != 0) {
|
|
7245
|
+
return;
|
|
7246
|
+
}
|
|
7247
|
+
|
|
7248
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7249
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7250
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7251
|
+
|
|
7252
|
+
const int n = ggml_nrows(src0);
|
|
7253
|
+
const int nc = src0->ne[0];
|
|
7254
|
+
|
|
7255
|
+
for (int i = 0; i < n; i++) {
|
|
7256
|
+
ggml_vec_elu_f16(nc,
|
|
7257
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7258
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7259
|
+
}
|
|
7260
|
+
}
|
|
7261
|
+
|
|
6403
7262
|
static void ggml_compute_forward_elu(
|
|
6404
7263
|
const struct ggml_compute_params * params,
|
|
6405
7264
|
struct ggml_tensor * dst) {
|
|
@@ -6411,6 +7270,10 @@ static void ggml_compute_forward_elu(
|
|
|
6411
7270
|
{
|
|
6412
7271
|
ggml_compute_forward_elu_f32(params, dst);
|
|
6413
7272
|
} break;
|
|
7273
|
+
case GGML_TYPE_F16:
|
|
7274
|
+
{
|
|
7275
|
+
ggml_compute_forward_elu_f16(params, dst);
|
|
7276
|
+
} break;
|
|
6414
7277
|
default:
|
|
6415
7278
|
{
|
|
6416
7279
|
GGML_ABORT("fatal error");
|
|
@@ -6444,6 +7307,30 @@ static void ggml_compute_forward_relu_f32(
|
|
|
6444
7307
|
}
|
|
6445
7308
|
}
|
|
6446
7309
|
|
|
7310
|
+
static void ggml_compute_forward_relu_f16(
|
|
7311
|
+
const struct ggml_compute_params * params,
|
|
7312
|
+
struct ggml_tensor * dst) {
|
|
7313
|
+
|
|
7314
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7315
|
+
|
|
7316
|
+
if (params->ith != 0) {
|
|
7317
|
+
return;
|
|
7318
|
+
}
|
|
7319
|
+
|
|
7320
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7321
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7322
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7323
|
+
|
|
7324
|
+
const int n = ggml_nrows(src0);
|
|
7325
|
+
const int nc = src0->ne[0];
|
|
7326
|
+
|
|
7327
|
+
for (int i = 0; i < n; i++) {
|
|
7328
|
+
ggml_vec_relu_f16(nc,
|
|
7329
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7330
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7331
|
+
}
|
|
7332
|
+
}
|
|
7333
|
+
|
|
6447
7334
|
static void ggml_compute_forward_relu(
|
|
6448
7335
|
const struct ggml_compute_params * params,
|
|
6449
7336
|
struct ggml_tensor * dst) {
|
|
@@ -6455,6 +7342,10 @@ static void ggml_compute_forward_relu(
|
|
|
6455
7342
|
{
|
|
6456
7343
|
ggml_compute_forward_relu_f32(params, dst);
|
|
6457
7344
|
} break;
|
|
7345
|
+
case GGML_TYPE_F16:
|
|
7346
|
+
{
|
|
7347
|
+
ggml_compute_forward_relu_f16(params, dst);
|
|
7348
|
+
} break;
|
|
6458
7349
|
default:
|
|
6459
7350
|
{
|
|
6460
7351
|
GGML_ABORT("fatal error");
|
|
@@ -6488,6 +7379,30 @@ static void ggml_compute_forward_sigmoid_f32(
|
|
|
6488
7379
|
}
|
|
6489
7380
|
}
|
|
6490
7381
|
|
|
7382
|
+
static void ggml_compute_forward_sigmoid_f16(
|
|
7383
|
+
const struct ggml_compute_params * params,
|
|
7384
|
+
struct ggml_tensor * dst) {
|
|
7385
|
+
|
|
7386
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7387
|
+
|
|
7388
|
+
if (params->ith != 0) {
|
|
7389
|
+
return;
|
|
7390
|
+
}
|
|
7391
|
+
|
|
7392
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7393
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7394
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7395
|
+
|
|
7396
|
+
const int n = ggml_nrows(src0);
|
|
7397
|
+
const int nc = src0->ne[0];
|
|
7398
|
+
|
|
7399
|
+
for (int i = 0; i < n; i++) {
|
|
7400
|
+
ggml_vec_sigmoid_f16(nc,
|
|
7401
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7402
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7403
|
+
}
|
|
7404
|
+
}
|
|
7405
|
+
|
|
6491
7406
|
static void ggml_compute_forward_sigmoid(
|
|
6492
7407
|
const struct ggml_compute_params * params,
|
|
6493
7408
|
struct ggml_tensor * dst) {
|
|
@@ -6499,6 +7414,113 @@ static void ggml_compute_forward_sigmoid(
|
|
|
6499
7414
|
{
|
|
6500
7415
|
ggml_compute_forward_sigmoid_f32(params, dst);
|
|
6501
7416
|
} break;
|
|
7417
|
+
case GGML_TYPE_F16:
|
|
7418
|
+
{
|
|
7419
|
+
ggml_compute_forward_sigmoid_f16(params, dst);
|
|
7420
|
+
} break;
|
|
7421
|
+
default:
|
|
7422
|
+
{
|
|
7423
|
+
GGML_ABORT("fatal error");
|
|
7424
|
+
}
|
|
7425
|
+
}
|
|
7426
|
+
}
|
|
7427
|
+
|
|
7428
|
+
// ggml_compute_forward_gelu
|
|
7429
|
+
|
|
7430
|
+
static void ggml_compute_forward_gelu_f32(
|
|
7431
|
+
const struct ggml_compute_params * params,
|
|
7432
|
+
struct ggml_tensor * dst) {
|
|
7433
|
+
|
|
7434
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7435
|
+
|
|
7436
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7437
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7438
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7439
|
+
|
|
7440
|
+
const int ith = params->ith;
|
|
7441
|
+
const int nth = params->nth;
|
|
7442
|
+
|
|
7443
|
+
const int nc = src0->ne[0];
|
|
7444
|
+
const int nr = ggml_nrows(src0);
|
|
7445
|
+
|
|
7446
|
+
// rows per thread
|
|
7447
|
+
const int dr = (nr + nth - 1)/nth;
|
|
7448
|
+
|
|
7449
|
+
// row range for this thread
|
|
7450
|
+
const int ir0 = dr*ith;
|
|
7451
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
|
7452
|
+
|
|
7453
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
7454
|
+
ggml_vec_gelu_f32(nc,
|
|
7455
|
+
(float *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
7456
|
+
(float *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
7457
|
+
|
|
7458
|
+
#ifndef NDEBUG
|
|
7459
|
+
for (int k = 0; k < nc; k++) {
|
|
7460
|
+
const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
7461
|
+
UNUSED(x);
|
|
7462
|
+
assert(!isnan(x));
|
|
7463
|
+
assert(!isinf(x));
|
|
7464
|
+
}
|
|
7465
|
+
#endif
|
|
7466
|
+
}
|
|
7467
|
+
}
|
|
7468
|
+
|
|
7469
|
+
static void ggml_compute_forward_gelu_f16(
|
|
7470
|
+
const struct ggml_compute_params * params,
|
|
7471
|
+
struct ggml_tensor * dst) {
|
|
7472
|
+
|
|
7473
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7474
|
+
|
|
7475
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7476
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7477
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7478
|
+
|
|
7479
|
+
const int ith = params->ith;
|
|
7480
|
+
const int nth = params->nth;
|
|
7481
|
+
|
|
7482
|
+
const int nc = src0->ne[0];
|
|
7483
|
+
const int nr = ggml_nrows(src0);
|
|
7484
|
+
|
|
7485
|
+
// rows per thread
|
|
7486
|
+
const int dr = (nr + nth - 1)/nth;
|
|
7487
|
+
|
|
7488
|
+
// row range for this thread
|
|
7489
|
+
const int ir0 = dr*ith;
|
|
7490
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
|
7491
|
+
|
|
7492
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
7493
|
+
ggml_vec_gelu_f16(nc,
|
|
7494
|
+
(ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
7495
|
+
(ggml_fp16_t *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
7496
|
+
|
|
7497
|
+
#ifndef NDEBUG
|
|
7498
|
+
for (int k = 0; k < nc; k++) {
|
|
7499
|
+
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
7500
|
+
const float v = GGML_FP16_TO_FP32(x);
|
|
7501
|
+
UNUSED(v);
|
|
7502
|
+
assert(!isnan(v));
|
|
7503
|
+
assert(!isinf(v));
|
|
7504
|
+
}
|
|
7505
|
+
#endif
|
|
7506
|
+
}
|
|
7507
|
+
}
|
|
7508
|
+
|
|
7509
|
+
static void ggml_compute_forward_gelu(
|
|
7510
|
+
const struct ggml_compute_params * params,
|
|
7511
|
+
struct ggml_tensor * dst) {
|
|
7512
|
+
|
|
7513
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7514
|
+
|
|
7515
|
+
switch (src0->type) {
|
|
7516
|
+
case GGML_TYPE_F32:
|
|
7517
|
+
{
|
|
7518
|
+
ggml_compute_forward_gelu_f32(params, dst);
|
|
7519
|
+
} break;
|
|
7520
|
+
case GGML_TYPE_F16:
|
|
7521
|
+
{
|
|
7522
|
+
ggml_compute_forward_gelu_f16(params, dst);
|
|
7523
|
+
} break;
|
|
6502
7524
|
default:
|
|
6503
7525
|
{
|
|
6504
7526
|
GGML_ABORT("fatal error");
|
|
@@ -6506,11 +7528,50 @@ static void ggml_compute_forward_sigmoid(
|
|
|
6506
7528
|
}
|
|
6507
7529
|
}
|
|
6508
7530
|
|
|
6509
|
-
//
|
|
6510
|
-
|
|
6511
|
-
static void
|
|
6512
|
-
const struct ggml_compute_params * params,
|
|
6513
|
-
struct ggml_tensor * dst) {
|
|
7531
|
+
// ggml_compute_forward_gelu_quick
|
|
7532
|
+
|
|
7533
|
+
static void ggml_compute_forward_gelu_quick_f32(
|
|
7534
|
+
const struct ggml_compute_params * params,
|
|
7535
|
+
struct ggml_tensor * dst) {
|
|
7536
|
+
|
|
7537
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7538
|
+
|
|
7539
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7540
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7541
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7542
|
+
|
|
7543
|
+
const int ith = params->ith;
|
|
7544
|
+
const int nth = params->nth;
|
|
7545
|
+
|
|
7546
|
+
const int nc = src0->ne[0];
|
|
7547
|
+
const int nr = ggml_nrows(src0);
|
|
7548
|
+
|
|
7549
|
+
// rows per thread
|
|
7550
|
+
const int dr = (nr + nth - 1)/nth;
|
|
7551
|
+
|
|
7552
|
+
// row range for this thread
|
|
7553
|
+
const int ir0 = dr*ith;
|
|
7554
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
|
7555
|
+
|
|
7556
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
7557
|
+
ggml_vec_gelu_quick_f32(nc,
|
|
7558
|
+
(float *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
7559
|
+
(float *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
7560
|
+
|
|
7561
|
+
#ifndef NDEBUG
|
|
7562
|
+
for (int k = 0; k < nc; k++) {
|
|
7563
|
+
const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
7564
|
+
UNUSED(x);
|
|
7565
|
+
assert(!isnan(x));
|
|
7566
|
+
assert(!isinf(x));
|
|
7567
|
+
}
|
|
7568
|
+
#endif
|
|
7569
|
+
}
|
|
7570
|
+
}
|
|
7571
|
+
|
|
7572
|
+
static void ggml_compute_forward_gelu_quick_f16(
|
|
7573
|
+
const struct ggml_compute_params * params,
|
|
7574
|
+
struct ggml_tensor * dst) {
|
|
6514
7575
|
|
|
6515
7576
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
6516
7577
|
|
|
@@ -6532,22 +7593,23 @@ static void ggml_compute_forward_gelu_f32(
|
|
|
6532
7593
|
const int ir1 = MIN(ir0 + dr, nr);
|
|
6533
7594
|
|
|
6534
7595
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
6535
|
-
|
|
6536
|
-
(
|
|
6537
|
-
(
|
|
7596
|
+
ggml_vec_gelu_quick_f16(nc,
|
|
7597
|
+
(ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
7598
|
+
(ggml_fp16_t *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
6538
7599
|
|
|
6539
7600
|
#ifndef NDEBUG
|
|
6540
7601
|
for (int k = 0; k < nc; k++) {
|
|
6541
|
-
const
|
|
6542
|
-
|
|
6543
|
-
|
|
6544
|
-
assert(!
|
|
7602
|
+
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
7603
|
+
const float v = GGML_FP16_TO_FP32(x);
|
|
7604
|
+
UNUSED(v);
|
|
7605
|
+
assert(!isnan(v));
|
|
7606
|
+
assert(!isinf(v));
|
|
6545
7607
|
}
|
|
6546
7608
|
#endif
|
|
6547
7609
|
}
|
|
6548
7610
|
}
|
|
6549
7611
|
|
|
6550
|
-
static void
|
|
7612
|
+
static void ggml_compute_forward_gelu_quick(
|
|
6551
7613
|
const struct ggml_compute_params * params,
|
|
6552
7614
|
struct ggml_tensor * dst) {
|
|
6553
7615
|
|
|
@@ -6556,7 +7618,11 @@ static void ggml_compute_forward_gelu(
|
|
|
6556
7618
|
switch (src0->type) {
|
|
6557
7619
|
case GGML_TYPE_F32:
|
|
6558
7620
|
{
|
|
6559
|
-
|
|
7621
|
+
ggml_compute_forward_gelu_quick_f32(params, dst);
|
|
7622
|
+
} break;
|
|
7623
|
+
case GGML_TYPE_F16:
|
|
7624
|
+
{
|
|
7625
|
+
ggml_compute_forward_gelu_quick_f16(params, dst);
|
|
6560
7626
|
} break;
|
|
6561
7627
|
default:
|
|
6562
7628
|
{
|
|
@@ -6565,9 +7631,9 @@ static void ggml_compute_forward_gelu(
|
|
|
6565
7631
|
}
|
|
6566
7632
|
}
|
|
6567
7633
|
|
|
6568
|
-
//
|
|
7634
|
+
// ggml_compute_forward_silu
|
|
6569
7635
|
|
|
6570
|
-
static void
|
|
7636
|
+
static void ggml_compute_forward_silu_f32(
|
|
6571
7637
|
const struct ggml_compute_params * params,
|
|
6572
7638
|
struct ggml_tensor * dst) {
|
|
6573
7639
|
|
|
@@ -6591,13 +7657,13 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
|
6591
7657
|
const int ir1 = MIN(ir0 + dr, nr);
|
|
6592
7658
|
|
|
6593
7659
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
6594
|
-
|
|
7660
|
+
ggml_vec_silu_f32(nc,
|
|
6595
7661
|
(float *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
6596
7662
|
(float *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
6597
7663
|
|
|
6598
7664
|
#ifndef NDEBUG
|
|
6599
7665
|
for (int k = 0; k < nc; k++) {
|
|
6600
|
-
const float x = ((float *) ((char *) dst->data + i1*(
|
|
7666
|
+
const float x = ((float *) ((char *) dst->data + i1*(dst->nb[1])))[k];
|
|
6601
7667
|
UNUSED(x);
|
|
6602
7668
|
assert(!isnan(x));
|
|
6603
7669
|
assert(!isinf(x));
|
|
@@ -6606,29 +7672,9 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
|
6606
7672
|
}
|
|
6607
7673
|
}
|
|
6608
7674
|
|
|
6609
|
-
static void
|
|
6610
|
-
|
|
6611
|
-
|
|
6612
|
-
|
|
6613
|
-
const struct ggml_tensor * src0 = dst->src[0];
|
|
6614
|
-
|
|
6615
|
-
switch (src0->type) {
|
|
6616
|
-
case GGML_TYPE_F32:
|
|
6617
|
-
{
|
|
6618
|
-
ggml_compute_forward_gelu_quick_f32(params, dst);
|
|
6619
|
-
} break;
|
|
6620
|
-
default:
|
|
6621
|
-
{
|
|
6622
|
-
GGML_ABORT("fatal error");
|
|
6623
|
-
}
|
|
6624
|
-
}
|
|
6625
|
-
}
|
|
6626
|
-
|
|
6627
|
-
// ggml_compute_forward_silu
|
|
6628
|
-
|
|
6629
|
-
static void ggml_compute_forward_silu_f32(
|
|
6630
|
-
const struct ggml_compute_params * params,
|
|
6631
|
-
struct ggml_tensor * dst) {
|
|
7675
|
+
static void ggml_compute_forward_silu_f16(
|
|
7676
|
+
const struct ggml_compute_params * params,
|
|
7677
|
+
struct ggml_tensor * dst) {
|
|
6632
7678
|
|
|
6633
7679
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
6634
7680
|
|
|
@@ -6650,16 +7696,17 @@ static void ggml_compute_forward_silu_f32(
|
|
|
6650
7696
|
const int ir1 = MIN(ir0 + dr, nr);
|
|
6651
7697
|
|
|
6652
7698
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
6653
|
-
|
|
6654
|
-
(
|
|
6655
|
-
(
|
|
7699
|
+
ggml_vec_silu_f16(nc,
|
|
7700
|
+
(ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
7701
|
+
(ggml_fp16_t *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
6656
7702
|
|
|
6657
7703
|
#ifndef NDEBUG
|
|
6658
7704
|
for (int k = 0; k < nc; k++) {
|
|
6659
|
-
const
|
|
6660
|
-
|
|
6661
|
-
|
|
6662
|
-
assert(!
|
|
7705
|
+
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])))[k];
|
|
7706
|
+
const float v = GGML_FP16_TO_FP32(x);
|
|
7707
|
+
UNUSED(v);
|
|
7708
|
+
assert(!isnan(v));
|
|
7709
|
+
assert(!isinf(v));
|
|
6663
7710
|
}
|
|
6664
7711
|
#endif
|
|
6665
7712
|
}
|
|
@@ -6676,6 +7723,10 @@ static void ggml_compute_forward_silu(
|
|
|
6676
7723
|
{
|
|
6677
7724
|
ggml_compute_forward_silu_f32(params, dst);
|
|
6678
7725
|
} break;
|
|
7726
|
+
case GGML_TYPE_F16:
|
|
7727
|
+
{
|
|
7728
|
+
ggml_compute_forward_silu_f16(params, dst);
|
|
7729
|
+
} break;
|
|
6679
7730
|
default:
|
|
6680
7731
|
{
|
|
6681
7732
|
GGML_ABORT("fatal error");
|
|
@@ -6714,6 +7765,36 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
|
6714
7765
|
}
|
|
6715
7766
|
}
|
|
6716
7767
|
|
|
7768
|
+
static void ggml_compute_forward_leaky_relu_f16(
|
|
7769
|
+
const struct ggml_compute_params * params,
|
|
7770
|
+
struct ggml_tensor * dst) {
|
|
7771
|
+
|
|
7772
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7773
|
+
|
|
7774
|
+
if (params->ith != 0) {
|
|
7775
|
+
return;
|
|
7776
|
+
}
|
|
7777
|
+
|
|
7778
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7779
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7780
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7781
|
+
|
|
7782
|
+
const int n = ggml_nrows(src0);
|
|
7783
|
+
const int nc = src0->ne[0];
|
|
7784
|
+
|
|
7785
|
+
float negative_slope;
|
|
7786
|
+
memcpy(&negative_slope, dst->op_params, sizeof(float));
|
|
7787
|
+
|
|
7788
|
+
assert(dst->nb[0] == sizeof(ggml_fp16_t));
|
|
7789
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
|
7790
|
+
|
|
7791
|
+
for (int i = 0; i < n; i++) {
|
|
7792
|
+
ggml_vec_leaky_relu_f16(nc,
|
|
7793
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7794
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])), negative_slope);
|
|
7795
|
+
}
|
|
7796
|
+
}
|
|
7797
|
+
|
|
6717
7798
|
static void ggml_compute_forward_leaky_relu(
|
|
6718
7799
|
const struct ggml_compute_params * params,
|
|
6719
7800
|
struct ggml_tensor * dst) {
|
|
@@ -6725,6 +7806,10 @@ static void ggml_compute_forward_leaky_relu(
|
|
|
6725
7806
|
{
|
|
6726
7807
|
ggml_compute_forward_leaky_relu_f32(params, dst);
|
|
6727
7808
|
} break;
|
|
7809
|
+
case GGML_TYPE_F16:
|
|
7810
|
+
{
|
|
7811
|
+
ggml_compute_forward_leaky_relu_f16(params, dst);
|
|
7812
|
+
} break;
|
|
6728
7813
|
default:
|
|
6729
7814
|
{
|
|
6730
7815
|
GGML_ABORT("fatal error");
|
|
@@ -6777,6 +7862,50 @@ static void ggml_compute_forward_silu_back_f32(
|
|
|
6777
7862
|
}
|
|
6778
7863
|
}
|
|
6779
7864
|
|
|
7865
|
+
static void ggml_compute_forward_silu_back_f16(
|
|
7866
|
+
const struct ggml_compute_params * params,
|
|
7867
|
+
struct ggml_tensor * dst) {
|
|
7868
|
+
|
|
7869
|
+
const struct ggml_tensor * grad = dst->src[0];
|
|
7870
|
+
const struct ggml_tensor * src1 = dst->src[1];
|
|
7871
|
+
|
|
7872
|
+
assert(ggml_is_contiguous_1(grad));
|
|
7873
|
+
assert(ggml_is_contiguous_1(src1));
|
|
7874
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7875
|
+
assert(ggml_are_same_shape(src1, dst));
|
|
7876
|
+
assert(ggml_are_same_shape(src1, grad));
|
|
7877
|
+
|
|
7878
|
+
const int ith = params->ith;
|
|
7879
|
+
const int nth = params->nth;
|
|
7880
|
+
|
|
7881
|
+
const int nc = src1->ne[0];
|
|
7882
|
+
const int nr = ggml_nrows(src1);
|
|
7883
|
+
|
|
7884
|
+
// rows per thread
|
|
7885
|
+
const int dr = (nr + nth - 1)/nth;
|
|
7886
|
+
|
|
7887
|
+
// row range for this thread
|
|
7888
|
+
const int ir0 = dr*ith;
|
|
7889
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
|
7890
|
+
|
|
7891
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
|
7892
|
+
ggml_vec_silu_backward_f16(nc,
|
|
7893
|
+
(ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
7894
|
+
(ggml_fp16_t *) ((char *) src1->data + i1*(src1->nb[1])),
|
|
7895
|
+
(ggml_fp16_t *) ((char *) grad->data + i1*(grad->nb[1])));
|
|
7896
|
+
|
|
7897
|
+
#ifndef NDEBUG
|
|
7898
|
+
for (int k = 0; k < nc; k++) {
|
|
7899
|
+
const float x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
7900
|
+
const float v = GGML_FP16_TO_FP32(x);
|
|
7901
|
+
UNUSED(v);
|
|
7902
|
+
assert(!isnan(v));
|
|
7903
|
+
assert(!isinf(v));
|
|
7904
|
+
}
|
|
7905
|
+
#endif
|
|
7906
|
+
}
|
|
7907
|
+
}
|
|
7908
|
+
|
|
6780
7909
|
static void ggml_compute_forward_silu_back(
|
|
6781
7910
|
const struct ggml_compute_params * params,
|
|
6782
7911
|
struct ggml_tensor * dst) {
|
|
@@ -6788,6 +7917,10 @@ static void ggml_compute_forward_silu_back(
|
|
|
6788
7917
|
{
|
|
6789
7918
|
ggml_compute_forward_silu_back_f32(params, dst);
|
|
6790
7919
|
} break;
|
|
7920
|
+
case GGML_TYPE_F16:
|
|
7921
|
+
{
|
|
7922
|
+
ggml_compute_forward_silu_back_f16(params, dst);
|
|
7923
|
+
} break;
|
|
6791
7924
|
default:
|
|
6792
7925
|
{
|
|
6793
7926
|
GGML_ABORT("fatal error");
|
|
@@ -6795,7 +7928,6 @@ static void ggml_compute_forward_silu_back(
|
|
|
6795
7928
|
}
|
|
6796
7929
|
}
|
|
6797
7930
|
|
|
6798
|
-
|
|
6799
7931
|
static void ggml_compute_forward_hardswish_f32(
|
|
6800
7932
|
const struct ggml_compute_params * params,
|
|
6801
7933
|
struct ggml_tensor * dst) {
|
|
@@ -6819,6 +7951,31 @@ static void ggml_compute_forward_hardswish_f32(
|
|
|
6819
7951
|
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
|
6820
7952
|
}
|
|
6821
7953
|
}
|
|
7954
|
+
|
|
7955
|
+
static void ggml_compute_forward_hardswish_f16(
|
|
7956
|
+
const struct ggml_compute_params * params,
|
|
7957
|
+
struct ggml_tensor * dst) {
|
|
7958
|
+
|
|
7959
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
7960
|
+
|
|
7961
|
+
if (params->ith != 0) {
|
|
7962
|
+
return;
|
|
7963
|
+
}
|
|
7964
|
+
|
|
7965
|
+
assert(ggml_is_contiguous_1(src0));
|
|
7966
|
+
assert(ggml_is_contiguous_1(dst));
|
|
7967
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
7968
|
+
|
|
7969
|
+
const int n = ggml_nrows(src0);
|
|
7970
|
+
const int nc = src0->ne[0];
|
|
7971
|
+
|
|
7972
|
+
for (int i = 0; i < n; i++) {
|
|
7973
|
+
ggml_vec_hardswish_f16(nc,
|
|
7974
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
7975
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
7976
|
+
}
|
|
7977
|
+
}
|
|
7978
|
+
|
|
6822
7979
|
static void ggml_compute_forward_hardswish(
|
|
6823
7980
|
const struct ggml_compute_params * params,
|
|
6824
7981
|
struct ggml_tensor * dst) {
|
|
@@ -6830,6 +7987,10 @@ static void ggml_compute_forward_hardswish(
|
|
|
6830
7987
|
{
|
|
6831
7988
|
ggml_compute_forward_hardswish_f32(params, dst);
|
|
6832
7989
|
} break;
|
|
7990
|
+
case GGML_TYPE_F16:
|
|
7991
|
+
{
|
|
7992
|
+
ggml_compute_forward_hardswish_f16(params, dst);
|
|
7993
|
+
} break;
|
|
6833
7994
|
default:
|
|
6834
7995
|
{
|
|
6835
7996
|
GGML_ABORT("fatal error");
|
|
@@ -6861,6 +8022,30 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
|
6861
8022
|
}
|
|
6862
8023
|
}
|
|
6863
8024
|
|
|
8025
|
+
static void ggml_compute_forward_hardsigmoid_f16(
|
|
8026
|
+
const struct ggml_compute_params * params,
|
|
8027
|
+
struct ggml_tensor * dst) {
|
|
8028
|
+
|
|
8029
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8030
|
+
|
|
8031
|
+
if (params->ith != 0) {
|
|
8032
|
+
return;
|
|
8033
|
+
}
|
|
8034
|
+
|
|
8035
|
+
assert(ggml_is_contiguous_1(src0));
|
|
8036
|
+
assert(ggml_is_contiguous_1(dst));
|
|
8037
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
8038
|
+
|
|
8039
|
+
const int n = ggml_nrows(src0);
|
|
8040
|
+
const int nc = src0->ne[0];
|
|
8041
|
+
|
|
8042
|
+
for (int i = 0; i < n; i++) {
|
|
8043
|
+
ggml_vec_hardsigmoid_f16(nc,
|
|
8044
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
8045
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
8046
|
+
}
|
|
8047
|
+
}
|
|
8048
|
+
|
|
6864
8049
|
static void ggml_compute_forward_hardsigmoid(
|
|
6865
8050
|
const struct ggml_compute_params * params,
|
|
6866
8051
|
struct ggml_tensor * dst) {
|
|
@@ -6872,6 +8057,10 @@ static void ggml_compute_forward_hardsigmoid(
|
|
|
6872
8057
|
{
|
|
6873
8058
|
ggml_compute_forward_hardsigmoid_f32(params, dst);
|
|
6874
8059
|
} break;
|
|
8060
|
+
case GGML_TYPE_F16:
|
|
8061
|
+
{
|
|
8062
|
+
ggml_compute_forward_hardsigmoid_f16(params, dst);
|
|
8063
|
+
} break;
|
|
6875
8064
|
default:
|
|
6876
8065
|
{
|
|
6877
8066
|
GGML_ABORT("fatal error");
|
|
@@ -6903,6 +8092,30 @@ static void ggml_compute_forward_exp_f32(
|
|
|
6903
8092
|
}
|
|
6904
8093
|
}
|
|
6905
8094
|
|
|
8095
|
+
static void ggml_compute_forward_exp_f16(
|
|
8096
|
+
const struct ggml_compute_params * params,
|
|
8097
|
+
struct ggml_tensor * dst) {
|
|
8098
|
+
|
|
8099
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
8100
|
+
|
|
8101
|
+
if (params->ith != 0) {
|
|
8102
|
+
return;
|
|
8103
|
+
}
|
|
8104
|
+
|
|
8105
|
+
assert(ggml_is_contiguous_1(src0));
|
|
8106
|
+
assert(ggml_is_contiguous_1(dst));
|
|
8107
|
+
assert(ggml_are_same_shape(src0, dst));
|
|
8108
|
+
|
|
8109
|
+
const int n = ggml_nrows(src0);
|
|
8110
|
+
const int nc = src0->ne[0];
|
|
8111
|
+
|
|
8112
|
+
for (int i = 0; i < n; i++) {
|
|
8113
|
+
ggml_vec_exp_f16(nc,
|
|
8114
|
+
(ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])),
|
|
8115
|
+
(ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1])));
|
|
8116
|
+
}
|
|
8117
|
+
}
|
|
8118
|
+
|
|
6906
8119
|
static void ggml_compute_forward_exp(
|
|
6907
8120
|
const struct ggml_compute_params * params,
|
|
6908
8121
|
struct ggml_tensor * dst) {
|
|
@@ -6914,6 +8127,10 @@ static void ggml_compute_forward_exp(
|
|
|
6914
8127
|
{
|
|
6915
8128
|
ggml_compute_forward_exp_f32(params, dst);
|
|
6916
8129
|
} break;
|
|
8130
|
+
case GGML_TYPE_F16:
|
|
8131
|
+
{
|
|
8132
|
+
ggml_compute_forward_exp_f16(params, dst);
|
|
8133
|
+
} break;
|
|
6917
8134
|
default:
|
|
6918
8135
|
{
|
|
6919
8136
|
GGML_ABORT("fatal error");
|
|
@@ -9198,6 +10415,43 @@ static void ggml_compute_forward_clamp_f32(
|
|
|
9198
10415
|
}
|
|
9199
10416
|
}
|
|
9200
10417
|
|
|
10418
|
+
static void ggml_compute_forward_clamp_f16(
|
|
10419
|
+
const struct ggml_compute_params * params,
|
|
10420
|
+
struct ggml_tensor * dst) {
|
|
10421
|
+
|
|
10422
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
|
10423
|
+
|
|
10424
|
+
float min;
|
|
10425
|
+
float max;
|
|
10426
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
|
10427
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
|
10428
|
+
|
|
10429
|
+
const int ith = params->ith;
|
|
10430
|
+
const int nth = params->nth;
|
|
10431
|
+
|
|
10432
|
+
const int n = ggml_nrows(src0);
|
|
10433
|
+
const int nc = src0->ne[0];
|
|
10434
|
+
|
|
10435
|
+
const size_t nb00 = src0->nb[0];
|
|
10436
|
+
const size_t nb01 = src0->nb[1];
|
|
10437
|
+
|
|
10438
|
+
const size_t nb0 = dst->nb[0];
|
|
10439
|
+
const size_t nb1 = dst->nb[1];
|
|
10440
|
+
|
|
10441
|
+
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
|
|
10442
|
+
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
|
10443
|
+
|
|
10444
|
+
for (int j = ith; j < n; j += nth) {
|
|
10445
|
+
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + j*nb1);
|
|
10446
|
+
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + j*nb01);
|
|
10447
|
+
|
|
10448
|
+
for (int i = 0; i < nc; i++) {
|
|
10449
|
+
float v = GGML_FP16_TO_FP32(src0_ptr[i]);
|
|
10450
|
+
dst_ptr[i] = GGML_FP32_TO_FP16(MAX(MIN(v, max), min));
|
|
10451
|
+
}
|
|
10452
|
+
}
|
|
10453
|
+
}
|
|
10454
|
+
|
|
9201
10455
|
static void ggml_compute_forward_clamp(
|
|
9202
10456
|
const struct ggml_compute_params * params,
|
|
9203
10457
|
struct ggml_tensor * dst) {
|
|
@@ -9210,6 +10464,9 @@ static void ggml_compute_forward_clamp(
|
|
|
9210
10464
|
ggml_compute_forward_clamp_f32(params, dst);
|
|
9211
10465
|
} break;
|
|
9212
10466
|
case GGML_TYPE_F16:
|
|
10467
|
+
{
|
|
10468
|
+
ggml_compute_forward_clamp_f16(params, dst);
|
|
10469
|
+
} break;
|
|
9213
10470
|
case GGML_TYPE_BF16:
|
|
9214
10471
|
case GGML_TYPE_Q4_0:
|
|
9215
10472
|
case GGML_TYPE_Q4_1:
|
|
@@ -14322,6 +15579,14 @@ int ggml_cpu_has_amx_int8(void) {
|
|
|
14322
15579
|
#endif
|
|
14323
15580
|
}
|
|
14324
15581
|
|
|
15582
|
+
int ggml_cpu_has_bmi2(void) {
|
|
15583
|
+
#if defined(__BMI2__)
|
|
15584
|
+
return 1;
|
|
15585
|
+
#else
|
|
15586
|
+
return 0;
|
|
15587
|
+
#endif
|
|
15588
|
+
}
|
|
15589
|
+
|
|
14325
15590
|
int ggml_cpu_has_fma(void) {
|
|
14326
15591
|
#if defined(__FMA__)
|
|
14327
15592
|
return 1;
|
|
@@ -14402,6 +15667,14 @@ int ggml_cpu_has_vsx(void) {
|
|
|
14402
15667
|
#endif
|
|
14403
15668
|
}
|
|
14404
15669
|
|
|
15670
|
+
int ggml_cpu_has_vxe(void) {
|
|
15671
|
+
#if defined(__VXE__) || defined(__VXE2__)
|
|
15672
|
+
return 1;
|
|
15673
|
+
#else
|
|
15674
|
+
return 0;
|
|
15675
|
+
#endif
|
|
15676
|
+
}
|
|
15677
|
+
|
|
14405
15678
|
int ggml_cpu_has_neon(void) {
|
|
14406
15679
|
#if defined(__ARM_ARCH) && defined(__ARM_NEON)
|
|
14407
15680
|
return ggml_arm_arch_features.has_neon;
|
|
@@ -14442,6 +15715,14 @@ int ggml_cpu_get_sve_cnt(void) {
|
|
|
14442
15715
|
#endif
|
|
14443
15716
|
}
|
|
14444
15717
|
|
|
15718
|
+
int ggml_cpu_has_sme(void) {
|
|
15719
|
+
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
|
|
15720
|
+
return ggml_arm_arch_features.has_sme;
|
|
15721
|
+
#else
|
|
15722
|
+
return 0;
|
|
15723
|
+
#endif
|
|
15724
|
+
}
|
|
15725
|
+
|
|
14445
15726
|
void ggml_cpu_init(void) {
|
|
14446
15727
|
// needed to initialize f16 tables
|
|
14447
15728
|
{
|