@novastera-oss/llamarn 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/common/arg.cpp +7 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +1 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +118 -20
- package/cpp/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +83 -102
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +192 -67
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +211 -33
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +54 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +84 -31
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +227 -41
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +362 -182
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +240 -535
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +45 -54
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +57 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -13
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +76 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +21 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +64 -0
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -3
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +55 -0
- package/cpp/llama.cpp/src/llama-arch.h +18 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +570 -359
- package/cpp/llama.cpp/src/llama-batch.h +98 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +11 -6
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +199 -252
- package/cpp/llama.cpp/src/llama-graph.h +44 -32
- package/cpp/llama.cpp/src/llama-hparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-hparams.h +8 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +51 -53
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +19 -24
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +110 -104
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +17 -22
- package/cpp/llama.cpp/src/llama-kv-cells.h +35 -11
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +66 -67
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +16 -21
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +69 -68
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.h +18 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1006 -472
- package/cpp/llama.cpp/src/llama-model.h +22 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +87 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +26 -3
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/common.h +1 -0
- package/ios/include/llama.h +8 -3
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
|
@@ -108,7 +108,7 @@ static void ggml_compute_forward_dup_f16(
|
|
|
108
108
|
for (int i01 = ir0; i01 < ir1; i01++) {
|
|
109
109
|
const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
|
110
110
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
111
|
-
dst_ptr[id] =
|
|
111
|
+
dst_ptr[id] = GGML_CPU_FP16_TO_FP32(src0_ptr[i00]);
|
|
112
112
|
id++;
|
|
113
113
|
}
|
|
114
114
|
}
|
|
@@ -130,7 +130,7 @@ static void ggml_compute_forward_dup_f16(
|
|
|
130
130
|
const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
|
131
131
|
|
|
132
132
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
133
|
-
src0_f32[i00] =
|
|
133
|
+
src0_f32[i00] = GGML_CPU_FP16_TO_FP32(src0_ptr[i00]);
|
|
134
134
|
}
|
|
135
135
|
|
|
136
136
|
quantize_row_q(src0_f32, dst_ptr + id, ne00);
|
|
@@ -156,7 +156,7 @@ static void ggml_compute_forward_dup_f16(
|
|
|
156
156
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
157
157
|
const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
158
158
|
|
|
159
|
-
dst_ptr[id] =
|
|
159
|
+
dst_ptr[id] = GGML_CPU_FP16_TO_FP32(*src0_ptr);
|
|
160
160
|
id++;
|
|
161
161
|
}
|
|
162
162
|
}
|
|
@@ -267,7 +267,7 @@ static void ggml_compute_forward_dup_f16(
|
|
|
267
267
|
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
268
268
|
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
|
269
269
|
|
|
270
|
-
*(float *) dst_ptr =
|
|
270
|
+
*(float *) dst_ptr = GGML_CPU_FP16_TO_FP32(*(const ggml_fp16_t *) src0_ptr);
|
|
271
271
|
|
|
272
272
|
if (++i10 == ne0) {
|
|
273
273
|
i10 = 0;
|
|
@@ -372,7 +372,7 @@ static void ggml_compute_forward_dup_bf16(
|
|
|
372
372
|
for (int i01 = ir0; i01 < ir1; i01++) {
|
|
373
373
|
const ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
|
374
374
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
375
|
-
dst_ptr[id] =
|
|
375
|
+
dst_ptr[id] = GGML_CPU_FP32_TO_FP16(GGML_BF16_TO_FP32(src0_ptr[i00]));
|
|
376
376
|
id++;
|
|
377
377
|
}
|
|
378
378
|
}
|
|
@@ -473,7 +473,7 @@ static void ggml_compute_forward_dup_bf16(
|
|
|
473
473
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
474
474
|
const ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
475
475
|
|
|
476
|
-
dst_ptr[id] =
|
|
476
|
+
dst_ptr[id] = GGML_CPU_FP32_TO_FP16(GGML_BF16_TO_FP32(*src0_ptr));
|
|
477
477
|
id++;
|
|
478
478
|
}
|
|
479
479
|
}
|
|
@@ -566,7 +566,7 @@ static void ggml_compute_forward_dup_bf16(
|
|
|
566
566
|
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
567
567
|
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
|
568
568
|
|
|
569
|
-
*(ggml_fp16_t *) dst_ptr =
|
|
569
|
+
*(ggml_fp16_t *) dst_ptr = GGML_CPU_FP32_TO_FP16(GGML_BF16_TO_FP32(*(const ggml_bf16_t *) src0_ptr));
|
|
570
570
|
|
|
571
571
|
if (++i10 == ne0) {
|
|
572
572
|
i10 = 0;
|
|
@@ -696,24 +696,8 @@ static void ggml_compute_forward_dup_f32(
|
|
|
696
696
|
if (ggml_is_contiguous(dst)) {
|
|
697
697
|
// TODO: simplify
|
|
698
698
|
if (nb00 == sizeof(float)) {
|
|
699
|
-
if (dst->type
|
|
700
|
-
|
|
701
|
-
const size_t rs = ne00 * nb00;
|
|
702
|
-
char * dst_ptr = (char *) dst->data;
|
|
703
|
-
|
|
704
|
-
for (int i03 = 0; i03 < ne03; i03++) {
|
|
705
|
-
for (int i02 = 0; i02 < ne02; i02++) {
|
|
706
|
-
id += rs * ir0;
|
|
707
|
-
for (int i01 = ir0; i01 < ir1; i01++) {
|
|
708
|
-
const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03;
|
|
709
|
-
memcpy(dst_ptr + id, src0_ptr, rs);
|
|
710
|
-
id += rs;
|
|
711
|
-
}
|
|
712
|
-
id += rs * (ne01 - ir1);
|
|
713
|
-
}
|
|
714
|
-
}
|
|
715
|
-
} else if (ggml_get_type_traits_cpu(dst->type)->from_float) {
|
|
716
|
-
ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float;
|
|
699
|
+
if (ggml_get_type_traits_cpu(dst->type)->from_float) {
|
|
700
|
+
ggml_from_float_t const from_float = ggml_get_type_traits_cpu(dst->type)->from_float;
|
|
717
701
|
|
|
718
702
|
size_t id = 0;
|
|
719
703
|
size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type));
|
|
@@ -724,7 +708,7 @@ static void ggml_compute_forward_dup_f32(
|
|
|
724
708
|
id += rs * ir0;
|
|
725
709
|
for (int i01 = ir0; i01 < ir1; i01++) {
|
|
726
710
|
const float * src0_ptr = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
|
727
|
-
|
|
711
|
+
from_float(src0_ptr, dst_ptr + id, ne00);
|
|
728
712
|
id += rs;
|
|
729
713
|
}
|
|
730
714
|
id += rs * (ne01 - ir1);
|
|
@@ -765,7 +749,7 @@ static void ggml_compute_forward_dup_f32(
|
|
|
765
749
|
for (int i00 = 0; i00 < ne00; i00++) {
|
|
766
750
|
const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
767
751
|
|
|
768
|
-
dst_ptr[id] =
|
|
752
|
+
dst_ptr[id] = GGML_CPU_FP32_TO_FP16(*src0_ptr);
|
|
769
753
|
id++;
|
|
770
754
|
}
|
|
771
755
|
}
|
|
@@ -878,7 +862,7 @@ static void ggml_compute_forward_dup_f32(
|
|
|
878
862
|
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
|
879
863
|
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
|
880
864
|
|
|
881
|
-
*(ggml_fp16_t *) dst_ptr =
|
|
865
|
+
*(ggml_fp16_t *) dst_ptr = GGML_CPU_FP32_TO_FP16(*(const float *) src0_ptr);
|
|
882
866
|
|
|
883
867
|
if (++i10 == ne0) {
|
|
884
868
|
i10 = 0;
|
|
@@ -1419,7 +1403,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
|
1419
1403
|
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 );
|
|
1420
1404
|
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
|
|
1421
1405
|
for (int i = 0; i < ne0; i++) {
|
|
1422
|
-
dst_ptr[i] =
|
|
1406
|
+
dst_ptr[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(src0_ptr[i]) + v);
|
|
1423
1407
|
}
|
|
1424
1408
|
}
|
|
1425
1409
|
}
|
|
@@ -1435,7 +1419,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
|
1435
1419
|
GGML_ASSERT(ggml_is_scalar(src1));
|
|
1436
1420
|
|
|
1437
1421
|
// scalar to add
|
|
1438
|
-
const float v =
|
|
1422
|
+
const float v = GGML_CPU_FP16_TO_FP32(*(ggml_fp16_t *) src1->data);
|
|
1439
1423
|
|
|
1440
1424
|
const int ith = params->ith;
|
|
1441
1425
|
const int nth = params->nth;
|
|
@@ -1467,7 +1451,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
|
1467
1451
|
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 );
|
|
1468
1452
|
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
|
|
1469
1453
|
for (int i = 0; i < ne0; i++) {
|
|
1470
|
-
dst_ptr[i] =
|
|
1454
|
+
dst_ptr[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(src0_ptr[i]) + v);
|
|
1471
1455
|
}
|
|
1472
1456
|
}
|
|
1473
1457
|
}
|
|
@@ -1889,7 +1873,7 @@ static void ggml_compute_forward_sum_f16(
|
|
|
1889
1873
|
}
|
|
1890
1874
|
}
|
|
1891
1875
|
}
|
|
1892
|
-
((ggml_fp16_t *) dst->data)[0] =
|
|
1876
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_CPU_FP32_TO_FP16(sum);
|
|
1893
1877
|
}
|
|
1894
1878
|
|
|
1895
1879
|
static void ggml_compute_forward_sum_bf16(
|
|
@@ -2300,6 +2284,12 @@ void ggml_compute_forward_repeat(
|
|
|
2300
2284
|
{
|
|
2301
2285
|
ggml_compute_forward_repeat_f32(params, dst);
|
|
2302
2286
|
} break;
|
|
2287
|
+
// TODO: templateify the implemenation and support for I64
|
|
2288
|
+
// ref https://github.com/ggml-org/llama.cpp/pull/14274#discussion_r2169492225
|
|
2289
|
+
//case GGML_TYPE_I64:
|
|
2290
|
+
// {
|
|
2291
|
+
// ggml_compute_forward_repeat_i64(params, dst);
|
|
2292
|
+
// } break;
|
|
2303
2293
|
default:
|
|
2304
2294
|
{
|
|
2305
2295
|
GGML_ABORT("fatal error");
|
|
@@ -2660,7 +2650,7 @@ static void ggml_compute_forward_gelu_f16(
|
|
|
2660
2650
|
#ifndef NDEBUG
|
|
2661
2651
|
for (int k = 0; k < nc; k++) {
|
|
2662
2652
|
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
2663
|
-
const float v =
|
|
2653
|
+
const float v = GGML_CPU_FP16_TO_FP32(x);
|
|
2664
2654
|
GGML_UNUSED(v);
|
|
2665
2655
|
assert(!isnan(v));
|
|
2666
2656
|
assert(!isinf(v));
|
|
@@ -2763,7 +2753,7 @@ static void ggml_compute_forward_gelu_erf_f16(
|
|
|
2763
2753
|
#ifndef NDEBUG
|
|
2764
2754
|
for (int k = 0; k < nc; k++) {
|
|
2765
2755
|
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
2766
|
-
const float v =
|
|
2756
|
+
const float v = GGML_CPU_FP16_TO_FP32(x);
|
|
2767
2757
|
GGML_UNUSED(v);
|
|
2768
2758
|
assert(!isnan(v));
|
|
2769
2759
|
assert(!isinf(v));
|
|
@@ -2866,7 +2856,7 @@ static void ggml_compute_forward_gelu_quick_f16(
|
|
|
2866
2856
|
#ifndef NDEBUG
|
|
2867
2857
|
for (int k = 0; k < nc; k++) {
|
|
2868
2858
|
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
2869
|
-
const float v =
|
|
2859
|
+
const float v = GGML_CPU_FP16_TO_FP32(x);
|
|
2870
2860
|
GGML_UNUSED(v);
|
|
2871
2861
|
assert(!isnan(v));
|
|
2872
2862
|
assert(!isinf(v));
|
|
@@ -2969,7 +2959,7 @@ static void ggml_compute_forward_silu_f16(
|
|
|
2969
2959
|
#ifndef NDEBUG
|
|
2970
2960
|
for (int k = 0; k < nc; k++) {
|
|
2971
2961
|
const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])))[k];
|
|
2972
|
-
const float v =
|
|
2962
|
+
const float v = GGML_CPU_FP16_TO_FP32(x);
|
|
2973
2963
|
GGML_UNUSED(v);
|
|
2974
2964
|
assert(!isnan(v));
|
|
2975
2965
|
assert(!isinf(v));
|
|
@@ -3163,7 +3153,7 @@ static void ggml_compute_forward_silu_back_f16(
|
|
|
3163
3153
|
#ifndef NDEBUG
|
|
3164
3154
|
for (int k = 0; k < nc; k++) {
|
|
3165
3155
|
const float x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
3166
|
-
const float v =
|
|
3156
|
+
const float v = GGML_CPU_FP16_TO_FP32(x);
|
|
3167
3157
|
GGML_UNUSED(v);
|
|
3168
3158
|
assert(!isnan(v));
|
|
3169
3159
|
assert(!isinf(v));
|
|
@@ -4470,6 +4460,74 @@ void ggml_compute_forward_get_rows(
|
|
|
4470
4460
|
//}
|
|
4471
4461
|
}
|
|
4472
4462
|
|
|
4463
|
+
static void ggml_compute_forward_set_rows_f32(
|
|
4464
|
+
const ggml_compute_params * params,
|
|
4465
|
+
ggml_tensor * dst) {
|
|
4466
|
+
|
|
4467
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
4468
|
+
const ggml_tensor * src1 = dst->src[1];
|
|
4469
|
+
|
|
4470
|
+
GGML_TENSOR_BINARY_OP_LOCALS
|
|
4471
|
+
|
|
4472
|
+
const int64_t nc = ne00;
|
|
4473
|
+
const int64_t nr = ne01;
|
|
4474
|
+
|
|
4475
|
+
assert(ne0 == nc);
|
|
4476
|
+
assert(ne2 == ne02);
|
|
4477
|
+
assert(ne3 == ne03);
|
|
4478
|
+
assert(src0->type == GGML_TYPE_F32);
|
|
4479
|
+
assert(ne02 % ne11 == 0);
|
|
4480
|
+
assert(ne03 % ne12 == 0);
|
|
4481
|
+
|
|
4482
|
+
const int ith = params->ith;
|
|
4483
|
+
const int nth = params->nth;
|
|
4484
|
+
|
|
4485
|
+
// rows per thread
|
|
4486
|
+
const int64_t dr = (nr + nth - 1)/nth;
|
|
4487
|
+
|
|
4488
|
+
// row range for this thread
|
|
4489
|
+
const int64_t ir0 = dr*ith;
|
|
4490
|
+
const int64_t ir1 = std::min(ir0 + dr, nr);
|
|
4491
|
+
|
|
4492
|
+
ggml_from_float_t const from_float = ggml_get_type_traits_cpu(dst->type)->from_float;
|
|
4493
|
+
|
|
4494
|
+
for (int64_t i03 = 0; i03 < ne03; ++i03) {
|
|
4495
|
+
for (int64_t i02 = 0; i02 < ne02; ++i02) {
|
|
4496
|
+
for (int64_t i = ir0; i < ir1; ++i) {
|
|
4497
|
+
const int64_t i12 = i03%ne12;
|
|
4498
|
+
const int64_t i11 = i02%ne11;
|
|
4499
|
+
const int64_t i10 = i;
|
|
4500
|
+
|
|
4501
|
+
const int64_t i1 = *(int64_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
|
4502
|
+
|
|
4503
|
+
GGML_ASSERT(i1 >= 0 && i1 < ne1);
|
|
4504
|
+
|
|
4505
|
+
from_float(
|
|
4506
|
+
(const float *) ((char *) src0->data + i*nb01 + i02*nb02 + i03*nb03),
|
|
4507
|
+
((char *) dst->data + i1*nb1 + i02*nb2 + i03*nb3), nc);
|
|
4508
|
+
}
|
|
4509
|
+
}
|
|
4510
|
+
}
|
|
4511
|
+
}
|
|
4512
|
+
|
|
4513
|
+
void ggml_compute_forward_set_rows(
|
|
4514
|
+
const ggml_compute_params * params,
|
|
4515
|
+
ggml_tensor * dst) {
|
|
4516
|
+
|
|
4517
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
4518
|
+
|
|
4519
|
+
switch (src0->type) {
|
|
4520
|
+
case GGML_TYPE_F32:
|
|
4521
|
+
{
|
|
4522
|
+
ggml_compute_forward_set_rows_f32(params, dst);
|
|
4523
|
+
} break;
|
|
4524
|
+
default:
|
|
4525
|
+
{
|
|
4526
|
+
GGML_ABORT("src0->type = %d (%s) not supported", src0->type, ggml_type_name(src0->type));
|
|
4527
|
+
}
|
|
4528
|
+
}
|
|
4529
|
+
}
|
|
4530
|
+
|
|
4473
4531
|
// ggml_compute_forward_get_rows_back
|
|
4474
4532
|
|
|
4475
4533
|
static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
@@ -4500,7 +4558,7 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
|
4500
4558
|
|
|
4501
4559
|
for (int j = 0; j < nc; ++j) {
|
|
4502
4560
|
ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + i*src0->nb[1]))[j];
|
|
4503
|
-
((float *) ((char *) dst->data + r*dst->nb[1]))[j] +=
|
|
4561
|
+
((float *) ((char *) dst->data + r*dst->nb[1]))[j] += GGML_CPU_FP16_TO_FP32(v);
|
|
4504
4562
|
}
|
|
4505
4563
|
}
|
|
4506
4564
|
}
|
|
@@ -4792,7 +4850,7 @@ static void ggml_compute_forward_soft_max_f32(
|
|
|
4792
4850
|
if (mp_f32) {
|
|
4793
4851
|
if (use_f16) {
|
|
4794
4852
|
for (int i = 0; i < nc; ++i) {
|
|
4795
|
-
wp[i] += slope*
|
|
4853
|
+
wp[i] += slope*GGML_CPU_FP16_TO_FP32(mp_f16[i]);
|
|
4796
4854
|
}
|
|
4797
4855
|
} else {
|
|
4798
4856
|
for (int i = 0; i < nc; ++i) {
|
|
@@ -5018,8 +5076,8 @@ static void ggml_compute_forward_clamp_f16(
|
|
|
5018
5076
|
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + j*nb01);
|
|
5019
5077
|
|
|
5020
5078
|
for (int i = 0; i < nc; i++) {
|
|
5021
|
-
float v =
|
|
5022
|
-
dst_ptr[i] =
|
|
5079
|
+
float v = GGML_CPU_FP16_TO_FP32(src0_ptr[i]);
|
|
5080
|
+
dst_ptr[i] = GGML_CPU_FP32_TO_FP16(MAX(MIN(v, max), min));
|
|
5023
5081
|
}
|
|
5024
5082
|
}
|
|
5025
5083
|
}
|
|
@@ -5476,11 +5534,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5476
5534
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5477
5535
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5478
5536
|
|
|
5479
|
-
const float x0 =
|
|
5480
|
-
const float x1 =
|
|
5537
|
+
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5538
|
+
const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims]);
|
|
5481
5539
|
|
|
5482
|
-
dst_data[0] =
|
|
5483
|
-
dst_data[n_dims] =
|
|
5540
|
+
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5541
|
+
dst_data[n_dims] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5484
5542
|
}
|
|
5485
5543
|
} else {
|
|
5486
5544
|
for (int64_t i0 = 0; i0 < n_dims; i0 += 2) {
|
|
@@ -5492,11 +5550,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5492
5550
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5493
5551
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5494
5552
|
|
|
5495
|
-
const float x0 =
|
|
5496
|
-
const float x1 =
|
|
5553
|
+
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5554
|
+
const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims/2]);
|
|
5497
5555
|
|
|
5498
|
-
dst_data[0] =
|
|
5499
|
-
dst_data[n_dims/2] =
|
|
5556
|
+
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5557
|
+
dst_data[n_dims/2] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5500
5558
|
}
|
|
5501
5559
|
}
|
|
5502
5560
|
} else {
|
|
@@ -5507,11 +5565,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5507
5565
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
|
5508
5566
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
5509
5567
|
|
|
5510
|
-
const float x0 =
|
|
5511
|
-
const float x1 =
|
|
5568
|
+
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5569
|
+
const float x1 = GGML_CPU_FP16_TO_FP32(src[1]);
|
|
5512
5570
|
|
|
5513
|
-
dst_data[0] =
|
|
5514
|
-
dst_data[1] =
|
|
5571
|
+
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5572
|
+
dst_data[1] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5515
5573
|
}
|
|
5516
5574
|
}
|
|
5517
5575
|
|
|
@@ -5525,11 +5583,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5525
5583
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5526
5584
|
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5527
5585
|
|
|
5528
|
-
const float x0 =
|
|
5529
|
-
const float x1 =
|
|
5586
|
+
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5587
|
+
const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims]);
|
|
5530
5588
|
|
|
5531
|
-
dst_data[0] =
|
|
5532
|
-
dst_data[n_dims] =
|
|
5589
|
+
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5590
|
+
dst_data[n_dims] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5533
5591
|
}
|
|
5534
5592
|
} else {
|
|
5535
5593
|
for (int64_t i0 = n_dims; i0 < ne0; i0 += 2) {
|
|
@@ -5640,7 +5698,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
|
5640
5698
|
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
|
5641
5699
|
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
|
5642
5700
|
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
|
5643
|
-
dst_data[i10*ne11 + i11] =
|
|
5701
|
+
dst_data[i10*ne11 + i11] = GGML_CPU_FP32_TO_FP16(src[i10]);
|
|
5644
5702
|
}
|
|
5645
5703
|
}
|
|
5646
5704
|
}
|
|
@@ -5933,7 +5991,7 @@ static void ggml_compute_forward_im2col_f16(
|
|
|
5933
5991
|
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
5934
5992
|
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
|
5935
5993
|
} else {
|
|
5936
|
-
dst_data[iic*(KH*KW) + ikh*KW + ikw] =
|
|
5994
|
+
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
|
5937
5995
|
}
|
|
5938
5996
|
}
|
|
5939
5997
|
}
|
|
@@ -6109,7 +6167,7 @@ void ggml_compute_forward_conv_transpose_2d(
|
|
|
6109
6167
|
const float * const src = (float *)((char *) src1->data + i12*nb12 + i11*nb11);
|
|
6110
6168
|
ggml_fp16_t * dst_data = wdata + i11*ne10*ne12;
|
|
6111
6169
|
for (int i10 = 0; i10 < ne10; i10++) {
|
|
6112
|
-
dst_data[i10*ne12 + i12] =
|
|
6170
|
+
dst_data[i10*ne12 + i12] = GGML_CPU_FP32_TO_FP16(src[i10]);
|
|
6113
6171
|
}
|
|
6114
6172
|
}
|
|
6115
6173
|
}
|
|
@@ -6358,7 +6416,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
|
6358
6416
|
case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
|
|
6359
6417
|
}
|
|
6360
6418
|
for (int ki = 0; ki < k; ++ki) {
|
|
6361
|
-
const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] :
|
|
6419
|
+
const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
|
|
6362
6420
|
switch (op) {
|
|
6363
6421
|
case GGML_OP_POOL_AVG: drow[i] += srow_j; break;
|
|
6364
6422
|
case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break;
|
|
@@ -6450,7 +6508,7 @@ void ggml_compute_forward_pool_2d(
|
|
|
6450
6508
|
for (int kx = 0; kx < k0; ++kx) {
|
|
6451
6509
|
int j = ix + kx;
|
|
6452
6510
|
if (j < 0 || j >= src->ne[0]) continue;
|
|
6453
|
-
const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] :
|
|
6511
|
+
const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
|
|
6454
6512
|
switch (op) {
|
|
6455
6513
|
case GGML_OP_POOL_AVG: *out += srow_j; break;
|
|
6456
6514
|
case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break;
|
|
@@ -6538,7 +6596,7 @@ void ggml_compute_forward_pool_2d_back(
|
|
|
6538
6596
|
}
|
|
6539
6597
|
|
|
6540
6598
|
const float val = dst->type == GGML_TYPE_F32 ?
|
|
6541
|
-
((const float *) drowf)[j] :
|
|
6599
|
+
((const float *) drowf)[j] : GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]);
|
|
6542
6600
|
if (val <= maxval) {
|
|
6543
6601
|
continue;
|
|
6544
6602
|
}
|
|
@@ -6558,7 +6616,7 @@ void ggml_compute_forward_pool_2d_back(
|
|
|
6558
6616
|
if (dst->type == GGML_TYPE_F32) {
|
|
6559
6617
|
((float *) drow)[j] += grad0;
|
|
6560
6618
|
} else {
|
|
6561
|
-
((ggml_fp16_t *) drow)[j] =
|
|
6619
|
+
((ggml_fp16_t *) drow)[j] = GGML_CPU_FP32_TO_FP16(grad0 + GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j]));
|
|
6562
6620
|
}
|
|
6563
6621
|
} else if (op == GGML_OP_POOL_AVG) {
|
|
6564
6622
|
const float grad = grad0 / ka;
|
|
@@ -6577,7 +6635,7 @@ void ggml_compute_forward_pool_2d_back(
|
|
|
6577
6635
|
if (dst->type == GGML_TYPE_F32) {
|
|
6578
6636
|
((float *) drow)[j] += grad;
|
|
6579
6637
|
} else {
|
|
6580
|
-
((ggml_fp16_t *) drow)[j] +=
|
|
6638
|
+
((ggml_fp16_t *) drow)[j] += GGML_CPU_FP32_TO_FP16(grad);
|
|
6581
6639
|
}
|
|
6582
6640
|
}
|
|
6583
6641
|
}
|
|
@@ -6793,6 +6851,73 @@ void ggml_compute_forward_pad_reflect_1d(
|
|
|
6793
6851
|
}
|
|
6794
6852
|
}
|
|
6795
6853
|
|
|
6854
|
+
// ggml_compute_forward_roll
|
|
6855
|
+
|
|
6856
|
+
static int64_t ggml_wrap_index(int64_t i, int64_t ne) {
|
|
6857
|
+
if (i < 0) {
|
|
6858
|
+
return i + ne;
|
|
6859
|
+
} else if (i >= ne) {
|
|
6860
|
+
return i - ne;
|
|
6861
|
+
}
|
|
6862
|
+
return i;
|
|
6863
|
+
}
|
|
6864
|
+
|
|
6865
|
+
static void ggml_compute_forward_roll_f32(
|
|
6866
|
+
const ggml_compute_params * params,
|
|
6867
|
+
ggml_tensor * dst) {
|
|
6868
|
+
|
|
6869
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
6870
|
+
const float * src_data = (const float *) src0->data;
|
|
6871
|
+
float * dst_data = (float *) dst->data;
|
|
6872
|
+
|
|
6873
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
|
6874
|
+
|
|
6875
|
+
const int s0 = ggml_get_op_params_i32(dst, 0);
|
|
6876
|
+
const int s1 = ggml_get_op_params_i32(dst, 1);
|
|
6877
|
+
const int s2 = ggml_get_op_params_i32(dst, 2);
|
|
6878
|
+
const int s3 = ggml_get_op_params_i32(dst, 3);
|
|
6879
|
+
|
|
6880
|
+
const int64_t total = ne1 * ne2 * ne3;
|
|
6881
|
+
const int64_t per_thread = (total + params->nth) / params->nth;
|
|
6882
|
+
const int64_t start = params->ith * per_thread;
|
|
6883
|
+
const int64_t end = std::min(start + per_thread, total);
|
|
6884
|
+
|
|
6885
|
+
for (int64_t i = start; i < end; ++i) {
|
|
6886
|
+
const int64_t i1 = i % ne1;
|
|
6887
|
+
const int64_t i2 = (i / ne1) % ne2;
|
|
6888
|
+
const int64_t i3 = i / (ne2 * ne1);
|
|
6889
|
+
float * dst_row = dst_data + (i3*nb3 + i2*nb2 + i1*nb1) / sizeof(float);
|
|
6890
|
+
|
|
6891
|
+
const int64_t i01 = ggml_wrap_index(i1 - s1, ne01);
|
|
6892
|
+
const int64_t i02 = ggml_wrap_index(i2 - s2, ne02);
|
|
6893
|
+
const int64_t i03 = ggml_wrap_index(i3 - s3, ne03);
|
|
6894
|
+
const float * src_row = src_data + (i03*nb03 + i02*nb02 + i01*nb01) / sizeof(float);
|
|
6895
|
+
|
|
6896
|
+
const int64_t s = ggml_wrap_index(-s0, ne00);
|
|
6897
|
+
const int64_t n = ne00 - s;
|
|
6898
|
+
ggml_vec_cpy_f32(n, dst_row, src_row + s);
|
|
6899
|
+
ggml_vec_cpy_f32(s, dst_row + n, src_row);
|
|
6900
|
+
}
|
|
6901
|
+
}
|
|
6902
|
+
|
|
6903
|
+
void ggml_compute_forward_roll(
|
|
6904
|
+
const ggml_compute_params * params,
|
|
6905
|
+
ggml_tensor * dst) {
|
|
6906
|
+
|
|
6907
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
6908
|
+
|
|
6909
|
+
switch (src0->type) {
|
|
6910
|
+
case GGML_TYPE_F32:
|
|
6911
|
+
{
|
|
6912
|
+
ggml_compute_forward_roll_f32(params, dst);
|
|
6913
|
+
} break;
|
|
6914
|
+
default:
|
|
6915
|
+
{
|
|
6916
|
+
GGML_ABORT("fatal error");
|
|
6917
|
+
}
|
|
6918
|
+
}
|
|
6919
|
+
}
|
|
6920
|
+
|
|
6796
6921
|
// ggml_compute_forward_arange
|
|
6797
6922
|
|
|
6798
6923
|
static void ggml_compute_forward_arange_f32(
|
|
@@ -7075,7 +7200,7 @@ static void ggml_compute_forward_flash_attn_ext_f16(
|
|
|
7075
7200
|
// loop over n_kv and n_head_kv
|
|
7076
7201
|
// ref: https://arxiv.org/pdf/2112.05682.pdf
|
|
7077
7202
|
for (int64_t ic = 0; ic < nek1; ++ic) {
|
|
7078
|
-
const float mv = mp ? slope*
|
|
7203
|
+
const float mv = mp ? slope*GGML_CPU_FP16_TO_FP32(mp[ic]) : 0.0f;
|
|
7079
7204
|
if (mv == -INFINITY) {
|
|
7080
7205
|
continue;
|
|
7081
7206
|
}
|
|
@@ -7143,7 +7268,7 @@ static void ggml_compute_forward_flash_attn_ext_f16(
|
|
|
7143
7268
|
|
|
7144
7269
|
if (v->type == GGML_TYPE_F16) {
|
|
7145
7270
|
for (int64_t d = 0; d < DV; ++d) {
|
|
7146
|
-
VKQ32[d] =
|
|
7271
|
+
VKQ32[d] = GGML_CPU_FP16_TO_FP32(VKQ16[d]);
|
|
7147
7272
|
}
|
|
7148
7273
|
}
|
|
7149
7274
|
|
|
@@ -53,6 +53,7 @@ void ggml_compute_forward_permute(const struct ggml_compute_params * params, str
|
|
|
53
53
|
void ggml_compute_forward_transpose(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
54
54
|
void ggml_compute_forward_get_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
55
55
|
void ggml_compute_forward_get_rows_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
56
|
+
void ggml_compute_forward_set_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
56
57
|
void ggml_compute_forward_diag(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
57
58
|
void ggml_compute_forward_diag_mask_inf(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
58
59
|
void ggml_compute_forward_diag_mask_zero(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
@@ -72,6 +73,7 @@ void ggml_compute_forward_pool_2d_back(const struct ggml_compute_params * params
|
|
|
72
73
|
void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
73
74
|
void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
74
75
|
void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
76
|
+
void ggml_compute_forward_roll(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
75
77
|
void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
76
78
|
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
|
77
79
|
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|