@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -1,11 +1,31 @@
|
|
|
1
|
+
// This file defines tests for various GGML ops and backends.
|
|
2
|
+
// For the forward pass it asserts that the results of multiple backends computing the same GGML ops are consistent.
|
|
3
|
+
// For the backward pass it asserts that the gradients from backpropagation are consistent
|
|
4
|
+
// with the gradients obtained via the method of finite differences ("grad" mode, this is optional).
|
|
5
|
+
// It is also possible to check the performance ("perf" mode).
|
|
6
|
+
//
|
|
7
|
+
// this file has three sections: Section 1 does general setup, section 2 defines the GGML ops to be tested,
|
|
8
|
+
// and section 3 defines which tests to run.
|
|
9
|
+
// Quick start for adding a new GGML op: Go to section 2 and create a struct that inherits from test_case,
|
|
10
|
+
// then go to section 3 and add an instantiation of your struct.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
// ##############################
|
|
14
|
+
// ## Section 1: General Setup ##
|
|
15
|
+
// ##############################
|
|
16
|
+
|
|
17
|
+
|
|
1
18
|
#include <ggml.h>
|
|
19
|
+
#include <ggml-cpu.h>
|
|
2
20
|
#include <ggml-alloc.h>
|
|
3
21
|
#include <ggml-backend.h>
|
|
4
22
|
|
|
5
23
|
#include <algorithm>
|
|
6
24
|
#include <array>
|
|
7
25
|
#include <cfloat>
|
|
26
|
+
#include <cstdint>
|
|
8
27
|
#include <cstring>
|
|
28
|
+
#include <cinttypes>
|
|
9
29
|
#include <functional>
|
|
10
30
|
#include <memory>
|
|
11
31
|
#include <random>
|
|
@@ -13,64 +33,52 @@
|
|
|
13
33
|
#include <stdlib.h>
|
|
14
34
|
#include <string>
|
|
15
35
|
#include <thread>
|
|
36
|
+
#include <future>
|
|
16
37
|
#include <vector>
|
|
17
38
|
|
|
18
|
-
|
|
19
39
|
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
std::
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
40
|
+
size_t nels = ggml_nelements(tensor);
|
|
41
|
+
std::vector<float> data(nels);
|
|
42
|
+
{
|
|
43
|
+
// parallel initialization
|
|
44
|
+
static const size_t n_threads = std::thread::hardware_concurrency();
|
|
45
|
+
// static RNG initialization (revisit if n_threads stops being constant)
|
|
46
|
+
static std::vector<std::default_random_engine> generators = []() {
|
|
47
|
+
std::random_device rd;
|
|
48
|
+
std::vector<std::default_random_engine> vec;
|
|
49
|
+
vec.reserve(n_threads);
|
|
50
|
+
//for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
|
|
51
|
+
for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
|
|
52
|
+
return vec;
|
|
53
|
+
}();
|
|
54
|
+
|
|
55
|
+
auto init_thread = [&](size_t ith, size_t start, size_t end) {
|
|
56
|
+
std::uniform_real_distribution<float> distribution(min, max);
|
|
57
|
+
auto & gen = generators[ith];
|
|
58
|
+
for (size_t i = start; i < end; i++) {
|
|
59
|
+
data[i] = distribution(gen);
|
|
60
|
+
}
|
|
61
|
+
};
|
|
33
62
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
for (size_t i =
|
|
37
|
-
|
|
63
|
+
std::vector<std::future<void>> tasks;
|
|
64
|
+
tasks.reserve(n_threads);
|
|
65
|
+
for (size_t i = 0; i < n_threads; i++) {
|
|
66
|
+
size_t start = i*nels/n_threads;
|
|
67
|
+
size_t end = (i+1)*nels/n_threads;
|
|
68
|
+
tasks.push_back(std::async(std::launch::async, init_thread, i, start, end));
|
|
38
69
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
std::vector<std::thread> threads;
|
|
42
|
-
threads.reserve(n_threads);
|
|
43
|
-
for (size_t i = 0; i < n_threads; i++) {
|
|
44
|
-
size_t start = i*size/n_threads;
|
|
45
|
-
size_t end = (i+1)*size/n_threads;
|
|
46
|
-
threads.emplace_back(init_thread, i, start, end);
|
|
47
|
-
}
|
|
48
|
-
for (auto & t : threads) {
|
|
49
|
-
t.join();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
#if 0
|
|
53
|
-
const char * val_str = getenv("GGML_TEST_EPS");
|
|
54
|
-
float val = 1e-9f;
|
|
55
|
-
if (val_str != nullptr) {
|
|
56
|
-
val = std::stof(val_str);
|
|
57
|
-
printf("GGML_TEST_EPS=%e\n", val);
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// test quantization with very small values that may result in nan scales due to division by zero
|
|
61
|
-
if (ggml_is_quantized(tensor->type)) {
|
|
62
|
-
for (int i = 0; i < 256; i++) {
|
|
63
|
-
data[i] = val;
|
|
70
|
+
for (auto & t : tasks) {
|
|
71
|
+
t.get();
|
|
64
72
|
}
|
|
65
73
|
}
|
|
66
|
-
#endif
|
|
67
74
|
|
|
68
75
|
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
|
69
|
-
ggml_backend_tensor_set(tensor, data.data(), 0,
|
|
76
|
+
ggml_backend_tensor_set(tensor, data.data(), 0, nels * sizeof(float));
|
|
70
77
|
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {
|
|
71
|
-
GGML_ASSERT(
|
|
72
|
-
|
|
73
|
-
|
|
78
|
+
GGML_ASSERT(nels % ggml_blck_size(tensor->type) == 0);
|
|
79
|
+
|
|
80
|
+
// dummy importance matrix
|
|
81
|
+
std::vector<float> imatrix(tensor->ne[0], 1.0f);
|
|
74
82
|
const float * im = imatrix.data();
|
|
75
83
|
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
|
76
84
|
// when the imatrix is optional, we want to test both quantization with and without imatrix
|
|
@@ -80,19 +88,40 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
|
80
88
|
}
|
|
81
89
|
}
|
|
82
90
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
91
|
+
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, nels));
|
|
92
|
+
{
|
|
93
|
+
// parallel quantization by block
|
|
94
|
+
size_t blck_size = ggml_blck_size(tensor->type);
|
|
95
|
+
size_t n_blocks = nels / blck_size;
|
|
96
|
+
|
|
97
|
+
auto quantize_thread = [&](size_t start, size_t end) {
|
|
98
|
+
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
|
|
99
|
+
start * blck_size, end - start, blck_size, im);
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
const size_t min_blocks_per_thread = 1;
|
|
103
|
+
const size_t n_threads = std::min<size_t>(std::thread::hardware_concurrency()/2,
|
|
104
|
+
std::max<size_t>(1, n_blocks / min_blocks_per_thread));
|
|
105
|
+
std::vector<std::future<void>> tasks;
|
|
106
|
+
tasks.reserve(n_threads);
|
|
107
|
+
for (size_t i = 0; i < n_threads; i++) {
|
|
108
|
+
size_t start = i*n_blocks/n_threads;
|
|
109
|
+
size_t end = (i+1)*n_blocks/n_threads;
|
|
110
|
+
tasks.push_back(std::async(std::launch::async, quantize_thread, start, end));
|
|
111
|
+
}
|
|
112
|
+
for (auto & t : tasks) {
|
|
113
|
+
t.get();
|
|
114
|
+
}
|
|
115
|
+
}
|
|
92
116
|
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
|
93
117
|
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
|
94
118
|
// This is going to create some weird integers though.
|
|
95
119
|
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
|
120
|
+
} else if (tensor->type == GGML_TYPE_I64) {
|
|
121
|
+
// Integers with a size of 8 bytes can be set by mirroring the float data, the specific values are again not really meaningful.
|
|
122
|
+
const size_t nbytes_half = ggml_nbytes(tensor)/2;
|
|
123
|
+
ggml_backend_tensor_set(tensor, data.data(), 0*nbytes_half, nbytes_half);
|
|
124
|
+
ggml_backend_tensor_set(tensor, data.data(), 1*nbytes_half, nbytes_half);
|
|
96
125
|
} else {
|
|
97
126
|
GGML_ABORT("fatal error");
|
|
98
127
|
}
|
|
@@ -105,7 +134,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
|
105
134
|
std::vector<uint8_t> buf(ggml_nbytes(t));
|
|
106
135
|
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
|
107
136
|
|
|
108
|
-
|
|
137
|
+
const auto * tt = ggml_get_type_traits(t->type);
|
|
109
138
|
size_t bs = ggml_blck_size(t->type);
|
|
110
139
|
std::vector<float> vq(ggml_blck_size(t->type));
|
|
111
140
|
bool quantized = ggml_is_quantized(t->type);
|
|
@@ -122,6 +151,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
|
122
151
|
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
|
|
123
152
|
} else if (t->type == GGML_TYPE_F32) {
|
|
124
153
|
tv.push_back(*(float *) &buf[i]);
|
|
154
|
+
} else if (t->type == GGML_TYPE_I64) {
|
|
155
|
+
tv.push_back((float)*(int64_t *) &buf[i]);
|
|
125
156
|
} else if (t->type == GGML_TYPE_I32) {
|
|
126
157
|
tv.push_back((float)*(int32_t *) &buf[i]);
|
|
127
158
|
} else if (t->type == GGML_TYPE_I16) {
|
|
@@ -129,7 +160,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
|
129
160
|
} else if (t->type == GGML_TYPE_I8) {
|
|
130
161
|
tv.push_back((float)*(int8_t *) &buf[i]);
|
|
131
162
|
} else if (quantized) {
|
|
132
|
-
tt
|
|
163
|
+
tt->to_float(&buf[i], vq.data(), bs);
|
|
133
164
|
tv.insert(tv.end(), vq.begin(), vq.end());
|
|
134
165
|
} else {
|
|
135
166
|
GGML_ABORT("fatal error");
|
|
@@ -142,60 +173,6 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
|
142
173
|
return tv;
|
|
143
174
|
}
|
|
144
175
|
|
|
145
|
-
/*
|
|
146
|
-
static double cosine_similarity(const float * v1, const float * v2, size_t n) {
|
|
147
|
-
double dot = 0.0;
|
|
148
|
-
double mag1 = 0.0;
|
|
149
|
-
double mag2 = 0.0;
|
|
150
|
-
|
|
151
|
-
for (size_t i = 0; i < n; i++) {
|
|
152
|
-
if (std::isnan(v1[i]) || std::isnan(v2[i])) {
|
|
153
|
-
return -1.0f;
|
|
154
|
-
}
|
|
155
|
-
if (std::isinf(v1[i]) && std::isinf(v2[i])) {
|
|
156
|
-
continue;
|
|
157
|
-
}
|
|
158
|
-
dot += v1[i]*v2[i];
|
|
159
|
-
mag1 += v1[i]*v1[i];
|
|
160
|
-
mag2 += v2[i]*v2[i];
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
return dot/sqrt(mag1*mag2);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
static float distance(const float * v1, const float * v2, size_t n) {
|
|
167
|
-
double d = 0.0;
|
|
168
|
-
|
|
169
|
-
for (size_t i = 0; i < n; i++) {
|
|
170
|
-
if (std::isnan(v1[i]) || std::isnan(v2[i])) {
|
|
171
|
-
return INFINITY;
|
|
172
|
-
}
|
|
173
|
-
if (std::isinf(v1[i]) && std::isinf(v2[i])) {
|
|
174
|
-
continue;
|
|
175
|
-
}
|
|
176
|
-
d += (v1[i] - v2[i])*(v1[i] - v2[i]);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return sqrt(d);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
static float vec_len(const float * v, size_t n) {
|
|
183
|
-
double d = 0.0;
|
|
184
|
-
|
|
185
|
-
for (size_t i = 0; i < n; i++) {
|
|
186
|
-
if (std::isnan(v[i])) {
|
|
187
|
-
return INFINITY;
|
|
188
|
-
}
|
|
189
|
-
if (std::isinf(v[i])) {
|
|
190
|
-
continue;
|
|
191
|
-
}
|
|
192
|
-
d += v[i]*v[i];
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
return sqrt(d);
|
|
196
|
-
}
|
|
197
|
-
*/
|
|
198
|
-
|
|
199
176
|
// normalized mean squared error = mse(a, b) / mse(a, 0)
|
|
200
177
|
static double nmse(const float * a, const float * b, size_t n) {
|
|
201
178
|
double mse_a_b = 0.0;
|
|
@@ -212,8 +189,40 @@ static double nmse(const float * a, const float * b, size_t n) {
|
|
|
212
189
|
return mse_a_b / mse_a_0;
|
|
213
190
|
}
|
|
214
191
|
|
|
192
|
+
// maximum absolute asymmetry between a and b
|
|
193
|
+
// asymmetry: (a - b) / (a + b)
|
|
194
|
+
// This is more stable than relative error if one of the values fluctuates towards zero.
|
|
195
|
+
// n: number of values to compare.
|
|
196
|
+
// expected_vals: optional vector of expected values for a. If expected_vals is not empty, filter out all comparisons where
|
|
197
|
+
// a does not match any of the expected values. Needed for noncontinuous gradients where the numerical calculation can fail.
|
|
198
|
+
static double mean_abs_asymm(const float * a, const float * b, const size_t n, const std::vector<float> & expected_vals) {
|
|
199
|
+
double sum = 0.0f;
|
|
200
|
+
|
|
201
|
+
size_t nvalid = 0;
|
|
202
|
+
for (size_t i = 0; i < n; i++) {
|
|
203
|
+
if (!expected_vals.empty()) {
|
|
204
|
+
bool matches_any = false;
|
|
205
|
+
for (const float & ev : expected_vals) {
|
|
206
|
+
if (fabsf(a[i] - ev) < 1e-3f) {
|
|
207
|
+
matches_any = true;
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
if (!matches_any) {
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const float asymm = (a[i] - b[i]) / (a[i] + b[i]);
|
|
217
|
+
|
|
218
|
+
sum += fabsf(asymm);
|
|
219
|
+
nvalid++;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return sum/nvalid;
|
|
223
|
+
}
|
|
224
|
+
|
|
215
225
|
// utils for printing the variables of the test cases
|
|
216
|
-
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
217
226
|
|
|
218
227
|
template<typename T>
|
|
219
228
|
static std::string var_to_str(const T & x) {
|
|
@@ -246,10 +255,6 @@ static std::string var_to_str(const std::array<T, N> & x) {
|
|
|
246
255
|
return s;
|
|
247
256
|
}
|
|
248
257
|
|
|
249
|
-
//static std::string var_to_str(ggml_unary_op unary_op) {
|
|
250
|
-
// return ggml_unary_op_name(unary_op);
|
|
251
|
-
//}
|
|
252
|
-
|
|
253
258
|
static std::string var_to_str(ggml_type type) {
|
|
254
259
|
return ggml_type_name(type);
|
|
255
260
|
}
|
|
@@ -262,6 +267,8 @@ static std::string var_to_str(ggml_op_pool pool) {
|
|
|
262
267
|
}
|
|
263
268
|
}
|
|
264
269
|
|
|
270
|
+
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
271
|
+
|
|
265
272
|
#define VARS_TO_STR1(a) VAR_TO_STR(a)
|
|
266
273
|
#define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b)
|
|
267
274
|
#define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c)
|
|
@@ -295,6 +302,7 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
|
|
295
302
|
enum test_mode {
|
|
296
303
|
MODE_TEST,
|
|
297
304
|
MODE_PERF,
|
|
305
|
+
MODE_GRAD,
|
|
298
306
|
};
|
|
299
307
|
|
|
300
308
|
struct test_case {
|
|
@@ -314,6 +322,32 @@ struct test_case {
|
|
|
314
322
|
return 1e-7;
|
|
315
323
|
}
|
|
316
324
|
|
|
325
|
+
virtual double max_maa_err() {
|
|
326
|
+
return 1e-4;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
virtual float grad_eps() {
|
|
330
|
+
return 1e-1f;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// If false, estimate gradient with 2 points, neglects 3rd order derivative and higher.
|
|
334
|
+
// If true, estimate gradient with 4 points, neglects 5th order derivative and higher.
|
|
335
|
+
virtual bool grad_precise() {
|
|
336
|
+
return false;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Skip gradient checks if total number of gradients to be checked is larger than this (to speed up the tests).
|
|
340
|
+
virtual int64_t grad_nmax() {
|
|
341
|
+
return 10000;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// No effect if empty.
|
|
345
|
+
// If not empty, skip all gradient checks where the numerical result does not match any of the values.
|
|
346
|
+
// Needed for dealing with noncontinuous gradients (e.g. ReLU) where estimation using finite differences is unreliable.
|
|
347
|
+
virtual std::vector<float> grad_expect() {
|
|
348
|
+
return {};
|
|
349
|
+
}
|
|
350
|
+
|
|
317
351
|
virtual void initialize_tensors(ggml_context * ctx) {
|
|
318
352
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
|
319
353
|
init_tensor_uniform(t);
|
|
@@ -331,7 +365,13 @@ struct test_case {
|
|
|
331
365
|
return size;
|
|
332
366
|
}
|
|
333
367
|
|
|
368
|
+
virtual uint64_t op_flops(ggml_tensor * t) {
|
|
369
|
+
GGML_UNUSED(t);
|
|
370
|
+
return 0;
|
|
371
|
+
}
|
|
372
|
+
|
|
334
373
|
ggml_cgraph * gf = nullptr;
|
|
374
|
+
ggml_cgraph * gb = nullptr;
|
|
335
375
|
|
|
336
376
|
static const int sentinel_size = 1024;
|
|
337
377
|
|
|
@@ -340,7 +380,7 @@ struct test_case {
|
|
|
340
380
|
std::vector<ggml_tensor *> sentinels;
|
|
341
381
|
|
|
342
382
|
void add_sentinel(ggml_context * ctx) {
|
|
343
|
-
if (mode == MODE_PERF) {
|
|
383
|
+
if (mode == MODE_PERF || mode == MODE_GRAD) {
|
|
344
384
|
return;
|
|
345
385
|
}
|
|
346
386
|
ggml_tensor * sentinel = ::ggml_new_tensor_1d(ctx, GGML_TYPE_F32, sentinel_size);
|
|
@@ -389,6 +429,7 @@ struct test_case {
|
|
|
389
429
|
/* .no_alloc = */ true,
|
|
390
430
|
};
|
|
391
431
|
ggml_context * ctx = ggml_init(params);
|
|
432
|
+
GGML_ASSERT(ctx);
|
|
392
433
|
|
|
393
434
|
gf = ggml_new_graph(ctx);
|
|
394
435
|
|
|
@@ -439,7 +480,7 @@ struct test_case {
|
|
|
439
480
|
|
|
440
481
|
// add sentinels as graph nodes so that they are checked in the callback
|
|
441
482
|
for (ggml_tensor * sentinel : sentinels) {
|
|
442
|
-
gf
|
|
483
|
+
ggml_graph_add_node(gf, sentinel);
|
|
443
484
|
}
|
|
444
485
|
|
|
445
486
|
// randomize tensors
|
|
@@ -550,6 +591,7 @@ struct test_case {
|
|
|
550
591
|
/* .no_alloc = */ true,
|
|
551
592
|
};
|
|
552
593
|
ggml_context * ctx = ggml_init(params);
|
|
594
|
+
GGML_ASSERT(ctx);
|
|
553
595
|
|
|
554
596
|
ggml_tensor * out = build_graph(ctx);
|
|
555
597
|
|
|
@@ -570,12 +612,11 @@ struct test_case {
|
|
|
570
612
|
}
|
|
571
613
|
|
|
572
614
|
// align while also leaving some margin for variations in parameters
|
|
573
|
-
int align =
|
|
615
|
+
int align = 8;
|
|
574
616
|
int last = (len + align - 1) / align * align;
|
|
575
617
|
if (last - len < 5) {
|
|
576
618
|
last += align;
|
|
577
619
|
}
|
|
578
|
-
last = std::max(last, 60);
|
|
579
620
|
printf("%*s", last - len, "");
|
|
580
621
|
|
|
581
622
|
// allocate
|
|
@@ -596,11 +637,27 @@ struct test_case {
|
|
|
596
637
|
// warmup run
|
|
597
638
|
ggml_backend_graph_compute(backend, gf);
|
|
598
639
|
|
|
640
|
+
// determine number of runs
|
|
641
|
+
int n_runs;
|
|
642
|
+
if (op_flops(out) > 0) {
|
|
643
|
+
// based on flops
|
|
644
|
+
const uint64_t GFLOP = 1000 * 1000 * 1000;
|
|
645
|
+
const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
|
646
|
+
const uint64_t target_flops_gpu = 100ULL * GFLOP;
|
|
647
|
+
uint64_t target_flops = ggml_backend_is_cpu(backend) ? target_flops_cpu : target_flops_gpu;
|
|
648
|
+
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
|
649
|
+
} else {
|
|
650
|
+
// based on memory size
|
|
651
|
+
const size_t GB = 1ULL << 30;
|
|
652
|
+
const size_t target_size_cpu = 8 * GB;
|
|
653
|
+
const size_t target_size_gpu = 32 * GB;
|
|
654
|
+
size_t target_size = ggml_backend_is_cpu(backend) ? target_size_cpu : target_size_gpu;
|
|
655
|
+
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
|
656
|
+
}
|
|
657
|
+
|
|
599
658
|
// duplicate the op
|
|
600
|
-
size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
|
|
601
|
-
int n_runs = std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1;
|
|
602
659
|
for (int i = 1; i < n_runs; i++) {
|
|
603
|
-
gf
|
|
660
|
+
ggml_graph_add_node(gf, out);
|
|
604
661
|
}
|
|
605
662
|
|
|
606
663
|
// calculate memory
|
|
@@ -615,36 +672,338 @@ struct test_case {
|
|
|
615
672
|
}
|
|
616
673
|
return size;
|
|
617
674
|
};
|
|
618
|
-
for (int i = 0; i < gf
|
|
619
|
-
if (ggml_is_view_op(gf
|
|
675
|
+
for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
|
|
676
|
+
if (ggml_is_view_op(ggml_graph_node(gf, i)->op) || ggml_graph_node(gf, i) == out) {
|
|
620
677
|
continue;
|
|
621
678
|
}
|
|
622
|
-
mem += tensor_op_size(gf
|
|
679
|
+
mem += tensor_op_size(ggml_graph_node(gf, i));
|
|
623
680
|
}
|
|
624
681
|
|
|
625
682
|
// run
|
|
626
|
-
|
|
683
|
+
int64_t total_time_us = 0;
|
|
684
|
+
int64_t total_mem = 0;
|
|
685
|
+
int total_runs = 0;
|
|
686
|
+
do {
|
|
687
|
+
int64_t start_time = ggml_time_us();
|
|
688
|
+
ggml_backend_graph_compute(backend, gf);
|
|
689
|
+
int64_t end_time = ggml_time_us();
|
|
690
|
+
|
|
691
|
+
total_time_us += end_time - start_time;
|
|
692
|
+
total_mem += mem;
|
|
693
|
+
total_runs += n_runs;
|
|
694
|
+
} while (total_time_us < 1000*1000); // run for at least 1 second
|
|
695
|
+
|
|
696
|
+
printf(" %8d runs - %8.2f us/run - ",
|
|
697
|
+
total_runs,
|
|
698
|
+
(double)total_time_us / total_runs);
|
|
699
|
+
|
|
700
|
+
if (op_flops(out) > 0) {
|
|
701
|
+
double flops_per_sec = (op_flops(out) * total_runs) / (total_time_us / 1e6);
|
|
702
|
+
auto format_flops = [](double flops) -> std::string {
|
|
703
|
+
char buf[256];
|
|
704
|
+
if (flops >= 1e12) {
|
|
705
|
+
snprintf(buf, sizeof(buf), "%6.2f TFLOP", flops / 1e12);
|
|
706
|
+
} else if (flops >= 1e9) {
|
|
707
|
+
snprintf(buf, sizeof(buf), "%6.2f GFLOP", flops / 1e9);
|
|
708
|
+
} else if (flops >= 1e6) {
|
|
709
|
+
snprintf(buf, sizeof(buf), "%6.2f MFLOP", flops / 1e6);
|
|
710
|
+
} else {
|
|
711
|
+
snprintf(buf, sizeof(buf), "%6.2f KFLOP", flops / 1e3);
|
|
712
|
+
}
|
|
713
|
+
return buf;
|
|
714
|
+
};
|
|
715
|
+
printf("%s/run - \033[1;34m%sS\033[0m",
|
|
716
|
+
format_flops(op_flops(out)).c_str(),
|
|
717
|
+
format_flops(flops_per_sec).c_str());
|
|
718
|
+
|
|
719
|
+
} else {
|
|
720
|
+
printf("%8zu kB/run - \033[1;34m%7.2f GB/s\033[0m",
|
|
721
|
+
op_size(out) / 1024,
|
|
722
|
+
total_mem / (total_time_us / 1e6) / 1024.0 / 1024.0 / 1024.0);
|
|
723
|
+
}
|
|
724
|
+
printf("\n");
|
|
725
|
+
|
|
726
|
+
ggml_backend_buffer_free(buf);
|
|
727
|
+
|
|
728
|
+
ggml_free(ctx);
|
|
729
|
+
|
|
730
|
+
return true;
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
bool eval_grad(ggml_backend_t backend, const char * op_name) {
|
|
734
|
+
mode = MODE_GRAD;
|
|
735
|
+
const std::vector<float> expect = grad_expect();
|
|
736
|
+
|
|
737
|
+
ggml_init_params params = {
|
|
738
|
+
/* .mem_size = */ ggml_tensor_overhead()*128 + 2*ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, true),
|
|
739
|
+
/* .mem_base = */ NULL,
|
|
740
|
+
/* .no_alloc = */ true,
|
|
741
|
+
};
|
|
742
|
+
ggml_context * ctx = ggml_init(params);
|
|
743
|
+
GGML_ASSERT(ctx);
|
|
744
|
+
|
|
745
|
+
gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
|
|
746
|
+
gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
|
|
747
|
+
|
|
748
|
+
ggml_tensor * out = build_graph(ctx);
|
|
749
|
+
|
|
750
|
+
if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) {
|
|
751
|
+
//printf(" %s: skipping\n", op_desc(out).c_str());
|
|
752
|
+
ggml_free(ctx);
|
|
753
|
+
return true;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
printf(" %s(%s): ", op_desc(out).c_str(), vars().c_str());
|
|
757
|
+
fflush(stdout);
|
|
758
|
+
|
|
759
|
+
if (out->type != GGML_TYPE_F32) {
|
|
760
|
+
ggml_free(ctx);
|
|
761
|
+
printf("not supported [%s->type != FP32]\n", out->name);
|
|
762
|
+
return true;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
// check if the backend supports the ops
|
|
766
|
+
bool supported = true;
|
|
767
|
+
bool any_params = false;
|
|
768
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
769
|
+
if (!ggml_backend_supports_op(backend, t)) {
|
|
770
|
+
printf("not supported [%s] ", ggml_backend_name(backend));
|
|
771
|
+
supported = false;
|
|
772
|
+
break;
|
|
773
|
+
}
|
|
774
|
+
if ((t->flags & GGML_TENSOR_FLAG_PARAM)) {
|
|
775
|
+
any_params = true;
|
|
776
|
+
if (t->type != GGML_TYPE_F32) {
|
|
777
|
+
printf("not supported [%s->type != FP32] ", t->name);
|
|
778
|
+
supported = false;
|
|
779
|
+
break;
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
if (!any_params) {
|
|
784
|
+
printf("not supported [%s] \n", op_name);
|
|
785
|
+
supported = false;
|
|
786
|
+
}
|
|
787
|
+
if (!supported) {
|
|
788
|
+
printf("\n");
|
|
789
|
+
ggml_free(ctx);
|
|
790
|
+
return true;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
int64_t ngrads = 0;
|
|
794
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
795
|
+
if (t->flags & GGML_TENSOR_FLAG_PARAM) {
|
|
796
|
+
ngrads += ggml_nelements(t);
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
if (ngrads > grad_nmax()) {
|
|
800
|
+
printf("skipping large tensors for speed \n");
|
|
801
|
+
ggml_free(ctx);
|
|
802
|
+
return true;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
if (!ggml_is_scalar(out)) {
|
|
807
|
+
out = ggml_sum(ctx, out);
|
|
808
|
+
ggml_set_name(out, "sum_of_out");
|
|
809
|
+
}
|
|
810
|
+
ggml_set_loss(out);
|
|
811
|
+
|
|
812
|
+
ggml_build_forward_expand(gf, out);
|
|
813
|
+
ggml_graph_cpy(gf, gb);
|
|
814
|
+
ggml_build_backward_expand(ctx, ctx, gb, false);
|
|
815
|
+
if (expect.size() != 1 || expect[0] != 0.0f) {
|
|
816
|
+
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
|
|
817
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
818
|
+
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE);
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// TODO: refactor so that this check is only needed once
|
|
823
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
824
|
+
if (!ggml_backend_supports_op(backend, t)) {
|
|
825
|
+
printf("not supported [%s] ", ggml_backend_name(backend));
|
|
826
|
+
supported = false;
|
|
827
|
+
break;
|
|
828
|
+
}
|
|
829
|
+
if ((t->flags & GGML_TENSOR_FLAG_PARAM) && t->type != GGML_TYPE_F32) {
|
|
830
|
+
printf("not supported [%s->type != FP32] ", t->name);
|
|
831
|
+
supported = false;
|
|
832
|
+
break;
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
if (!supported) {
|
|
836
|
+
printf("\n");
|
|
837
|
+
ggml_free(ctx);
|
|
838
|
+
return true;
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
// allocate
|
|
842
|
+
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
|
|
843
|
+
if (buf == NULL) {
|
|
844
|
+
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
|
|
845
|
+
ggml_free(ctx);
|
|
846
|
+
return false;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
initialize_tensors(ctx); // Randomizes all tensors (including gradients).
|
|
851
|
+
ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
|
|
627
852
|
|
|
628
|
-
int64_t start_time = ggml_time_us();
|
|
629
853
|
ggml_backend_graph_compute(backend, gf);
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
854
|
+
ggml_backend_graph_compute(backend, gb);
|
|
855
|
+
|
|
856
|
+
bool ok = true;
|
|
857
|
+
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
|
858
|
+
if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
|
|
859
|
+
continue;
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
const char * bn = ggml_backend_name(backend);
|
|
863
|
+
const int64_t ne = ggml_nelements(t);
|
|
864
|
+
|
|
865
|
+
std::vector<float> ga;
|
|
866
|
+
struct ggml_tensor * grad = ggml_graph_get_grad(gb, t);
|
|
867
|
+
if (grad) {
|
|
868
|
+
ga = tensor_to_float(grad);
|
|
869
|
+
} else {
|
|
870
|
+
ga.resize(ne); // default value is 0.0f
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
for (int64_t i = 0; i < ne; ++i) { // gradient algebraic
|
|
874
|
+
// check for nans
|
|
875
|
+
if (!std::isfinite(ga[i])) {
|
|
876
|
+
printf("[%s] nonfinite gradient at index %" PRId64 " (%s=%f) ", ggml_op_desc(t), i, bn, ga[i]);
|
|
877
|
+
ok = false;
|
|
878
|
+
break;
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
if (!ok) {
|
|
882
|
+
break;
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
std::vector<float> gn(ne); // gradient numeric
|
|
886
|
+
GGML_ASSERT(ga.size() == gn.size());
|
|
887
|
+
|
|
888
|
+
std::vector<float> x0 = tensor_to_float(t); // original t data
|
|
889
|
+
GGML_ASSERT(ggml_is_scalar(out));
|
|
890
|
+
GGML_ASSERT(out->type == GGML_TYPE_F32);
|
|
891
|
+
|
|
892
|
+
const float eps = grad_eps();
|
|
893
|
+
for (int64_t i = 0; i < ne; ++i) {
|
|
894
|
+
const float xiu = x0[i] + 1.0f*eps; // x, index i, up
|
|
895
|
+
const float xiuh = x0[i] + 0.5f*eps; // x, index i, up half
|
|
896
|
+
const float xidh = x0[i] - 0.5f*eps; // x, index i, down half
|
|
897
|
+
const float xid = x0[i] - 1.0f*eps; // x, index i, down
|
|
898
|
+
|
|
899
|
+
float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
|
|
900
|
+
|
|
901
|
+
ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
|
|
902
|
+
ggml_backend_graph_compute(backend, gf);
|
|
903
|
+
ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));
|
|
904
|
+
|
|
905
|
+
ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
|
|
906
|
+
ggml_backend_graph_compute(backend, gf);
|
|
907
|
+
ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));
|
|
908
|
+
|
|
909
|
+
if (grad_precise()) {
|
|
910
|
+
ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
|
|
911
|
+
ggml_backend_graph_compute(backend, gf);
|
|
912
|
+
ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));
|
|
913
|
+
|
|
914
|
+
ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
|
|
915
|
+
ggml_backend_graph_compute(backend, gf);
|
|
916
|
+
ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));
|
|
917
|
+
|
|
918
|
+
gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
|
|
919
|
+
} else {
|
|
920
|
+
gn[i] = (fu - fd) / (2.0f*eps);
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
ggml_backend_tensor_set(t, x0.data(), 0, ggml_nbytes(t));
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
const double err = mean_abs_asymm(gn.data(), ga.data(), gn.size(), expect);
|
|
927
|
+
if (err > max_maa_err()) {
|
|
928
|
+
printf("[%s] MAA = %.9f > %.9f ", ggml_op_desc(t), err, max_maa_err());
|
|
929
|
+
ok = false;
|
|
930
|
+
break;
|
|
931
|
+
}
|
|
932
|
+
if (!ok) {
|
|
933
|
+
break;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
633
936
|
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
op_size(out) / 1024,
|
|
638
|
-
mem / (time_us/1e6) / 1024.0 / 1024.0 / 1024.0);
|
|
937
|
+
if (!ok) {
|
|
938
|
+
printf("compare failed ");
|
|
939
|
+
}
|
|
639
940
|
|
|
640
941
|
ggml_backend_buffer_free(buf);
|
|
641
942
|
|
|
642
943
|
ggml_free(ctx);
|
|
643
944
|
|
|
644
|
-
|
|
945
|
+
if (ok) {
|
|
946
|
+
printf("\033[1;32mOK\033[0m\n");
|
|
947
|
+
return true;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
printf("\033[1;31mFAIL\033[0m\n");
|
|
951
|
+
return false;
|
|
952
|
+
}
|
|
953
|
+
};
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
// ###################################
|
|
957
|
+
// ## Section 2: GGML Op Defintions ##
|
|
958
|
+
// ###################################
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
// The following is an example showing the bare minimum for creating a test for a GGML op.
|
|
962
|
+
|
|
963
|
+
// GGML_OP_EXAMPLE
|
|
964
|
+
struct test_example : public test_case {
|
|
965
|
+
// Always define these 2 or variants thereof:
|
|
966
|
+
const ggml_type type; // The type of the input tensors.
|
|
967
|
+
const std::array<int64_t, 4> ne; // The shape of the input tensors.
|
|
968
|
+
// For some ops it's necessary to define multiple types or shapes for the inputs.
|
|
969
|
+
// Or they may need additional parameters.
|
|
970
|
+
|
|
971
|
+
// Put all parameters needed to fully define the test into one of the VARS_TO_STR macros.
|
|
972
|
+
// In most cases these are just the properties of the struct that you defined above.
|
|
973
|
+
// This is needed for info prints.
|
|
974
|
+
std::string vars() override {
|
|
975
|
+
return VARS_TO_STR2(type, ne);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Define a constructor for the struct.
|
|
979
|
+
// In most cases it will be sufficient to have the same arguments as the struct has properties
|
|
980
|
+
// and just use initializer lists.
|
|
981
|
+
test_example(ggml_type type = GGML_TYPE_F32,
|
|
982
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
983
|
+
: type(type), ne(ne) {}
|
|
984
|
+
|
|
985
|
+
// Define how a simple GGML compute graph can be constructed for the new GGML op.
|
|
986
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
987
|
+
// Step 1: create input tensors that don't depend on any other tensors:
|
|
988
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
989
|
+
ggml_set_name(a, "a"); // Setting names is optional but it's useful for debugging.
|
|
990
|
+
|
|
991
|
+
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
992
|
+
ggml_set_name(b, "b");
|
|
993
|
+
|
|
994
|
+
// Step 2: use the op that you want to test in the GGML compute graph.
|
|
995
|
+
ggml_tensor * out = ggml_add(ctx, a, b); // For this example we're just doing a simple addition.
|
|
996
|
+
ggml_set_name(out, "out");
|
|
997
|
+
|
|
998
|
+
// Step 3: return the output tensor.
|
|
999
|
+
return out;
|
|
645
1000
|
}
|
|
1001
|
+
// In order to also check the gradients for your op, add calls like ggml_set_param(ctx, a)
|
|
1002
|
+
// immediately after you create the tensors.
|
|
1003
|
+
// This is optional and only makes sense if a backward pass has actually been implemented for the new op.
|
|
646
1004
|
};
|
|
647
1005
|
|
|
1006
|
+
|
|
648
1007
|
// GGML_OP_UNARY
|
|
649
1008
|
struct test_unary : public test_case {
|
|
650
1009
|
const ggml_unary_op op;
|
|
@@ -658,20 +1017,36 @@ struct test_unary : public test_case {
|
|
|
658
1017
|
|
|
659
1018
|
test_unary(ggml_unary_op op,
|
|
660
1019
|
ggml_type type = GGML_TYPE_F32,
|
|
661
|
-
std::array<int64_t, 4> ne_a = {128,
|
|
1020
|
+
std::array<int64_t, 4> ne_a = {128, 2, 2, 2},
|
|
662
1021
|
int v = 0)
|
|
663
1022
|
: op(op), type(type), ne_a(ne_a), v(v) {}
|
|
664
1023
|
|
|
665
1024
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1025
|
+
const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG ||
|
|
1026
|
+
op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU;
|
|
1027
|
+
|
|
666
1028
|
ggml_tensor * a;
|
|
667
1029
|
if (v & 1) {
|
|
668
1030
|
auto ne = ne_a; ne[0] *= 3;
|
|
669
1031
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1032
|
+
if (grad_supported) {
|
|
1033
|
+
ggml_set_param(ctx, a);
|
|
1034
|
+
}
|
|
1035
|
+
ggml_set_name(a, "a");
|
|
1036
|
+
|
|
670
1037
|
a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
1038
|
+
ggml_set_name(a, "view_of_a");
|
|
671
1039
|
} else {
|
|
672
1040
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
1041
|
+
if (grad_supported) {
|
|
1042
|
+
ggml_set_param(ctx, a);
|
|
1043
|
+
}
|
|
1044
|
+
ggml_set_name(a, "a");
|
|
673
1045
|
}
|
|
1046
|
+
|
|
674
1047
|
ggml_tensor * out = ggml_unary(ctx, a, op);
|
|
1048
|
+
ggml_set_name(out, "out");
|
|
1049
|
+
|
|
675
1050
|
return out;
|
|
676
1051
|
}
|
|
677
1052
|
|
|
@@ -681,6 +1056,24 @@ struct test_unary : public test_case {
|
|
|
681
1056
|
init_tensor_uniform(t, -150.f, 150.f);
|
|
682
1057
|
}
|
|
683
1058
|
}
|
|
1059
|
+
|
|
1060
|
+
float grad_eps() override {
|
|
1061
|
+
return 15.0f;
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
std::vector<float> grad_expect() override {
|
|
1065
|
+
if (op == GGML_UNARY_OP_ABS) {
|
|
1066
|
+
return {-1.0f, 1.0f};
|
|
1067
|
+
}
|
|
1068
|
+
if (op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_STEP) {
|
|
1069
|
+
return {0.0f};
|
|
1070
|
+
}
|
|
1071
|
+
if (op == GGML_UNARY_OP_RELU) {
|
|
1072
|
+
return {0.0f, 1.0f};
|
|
1073
|
+
}
|
|
1074
|
+
return {};
|
|
1075
|
+
}
|
|
1076
|
+
|
|
684
1077
|
};
|
|
685
1078
|
|
|
686
1079
|
// GGML_OP_GET_ROWS
|
|
@@ -701,11 +1094,24 @@ struct test_get_rows : public test_case {
|
|
|
701
1094
|
|
|
702
1095
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
703
1096
|
ggml_tensor * in = ggml_new_tensor_3d(ctx, type, n, m, b);
|
|
1097
|
+
ggml_set_name(in, "in");
|
|
1098
|
+
|
|
704
1099
|
ggml_tensor * rows = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, r, b);
|
|
1100
|
+
ggml_set_name(rows, "rows");
|
|
705
1101
|
if (v) {
|
|
706
1102
|
rows = ggml_view_2d(ctx, rows, r/2, b, rows->nb[1], 0);
|
|
1103
|
+
ggml_set_name(rows, "view_of_rows");
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
const bool grad_supported = ggml_is_matrix(in) && ggml_is_vector(rows);
|
|
1107
|
+
if (grad_supported) {
|
|
1108
|
+
ggml_set_param(ctx, in);
|
|
1109
|
+
// rows is a constant input -> no gradients
|
|
707
1110
|
}
|
|
1111
|
+
|
|
708
1112
|
ggml_tensor * out = ggml_get_rows(ctx, in, rows);
|
|
1113
|
+
ggml_set_name(out, "out");
|
|
1114
|
+
|
|
709
1115
|
return out;
|
|
710
1116
|
}
|
|
711
1117
|
|
|
@@ -726,14 +1132,79 @@ struct test_get_rows : public test_case {
|
|
|
726
1132
|
}
|
|
727
1133
|
};
|
|
728
1134
|
|
|
729
|
-
//
|
|
730
|
-
struct
|
|
1135
|
+
// GGML_OP_ARGMAX
|
|
1136
|
+
struct test_argmax : public test_case {
|
|
731
1137
|
const ggml_type type;
|
|
732
1138
|
const std::array<int64_t, 4> ne;
|
|
733
|
-
const std::array<int, 4> nr;
|
|
734
1139
|
|
|
735
1140
|
std::string vars() override {
|
|
736
|
-
return
|
|
1141
|
+
return VARS_TO_STR2(type, ne);
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
test_argmax(ggml_type type = GGML_TYPE_F32,
|
|
1145
|
+
std::array<int64_t, 4> ne = {10, 100, 1, 1})
|
|
1146
|
+
: type(type), ne(ne) {}
|
|
1147
|
+
|
|
1148
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1149
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1150
|
+
ggml_set_name(a, "a");
|
|
1151
|
+
|
|
1152
|
+
ggml_tensor * out = ggml_argmax(ctx, a);
|
|
1153
|
+
ggml_set_name(out, "out");
|
|
1154
|
+
|
|
1155
|
+
return out;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
double max_nmse_err() override {
|
|
1159
|
+
return 0.0;
|
|
1160
|
+
}
|
|
1161
|
+
};
|
|
1162
|
+
|
|
1163
|
+
// GGML_OP_COUNT_EQUAL
|
|
1164
|
+
struct test_count_equal : public test_case {
|
|
1165
|
+
const ggml_type type;
|
|
1166
|
+
const std::array<int64_t, 4> ne;
|
|
1167
|
+
|
|
1168
|
+
std::string vars() override {
|
|
1169
|
+
return VARS_TO_STR2(type, ne);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
test_count_equal(ggml_type type = GGML_TYPE_F32,
|
|
1173
|
+
std::array<int64_t, 4> ne = {4, 500, 1, 1})
|
|
1174
|
+
: type(type), ne(ne) {}
|
|
1175
|
+
|
|
1176
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1177
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1178
|
+
ggml_set_name(a, "a");
|
|
1179
|
+
|
|
1180
|
+
ggml_tensor * a_argmax = ggml_argmax(ctx, a);
|
|
1181
|
+
ggml_set_name(a_argmax, "a_argmax");
|
|
1182
|
+
|
|
1183
|
+
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1184
|
+
ggml_set_name(b, "b");
|
|
1185
|
+
|
|
1186
|
+
ggml_tensor * b_argmax = ggml_argmax(ctx, a);
|
|
1187
|
+
ggml_set_name(b_argmax, "b_argmax");
|
|
1188
|
+
|
|
1189
|
+
ggml_tensor * out = ggml_count_equal(ctx, a_argmax, b_argmax);
|
|
1190
|
+
ggml_set_name(out, "out");
|
|
1191
|
+
|
|
1192
|
+
return out;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
double max_nmse_err() override {
|
|
1196
|
+
return 0.0;
|
|
1197
|
+
}
|
|
1198
|
+
};
|
|
1199
|
+
|
|
1200
|
+
// GGML_OP_REPEAT
|
|
1201
|
+
struct test_repeat : public test_case {
|
|
1202
|
+
const ggml_type type;
|
|
1203
|
+
const std::array<int64_t, 4> ne;
|
|
1204
|
+
const std::array<int, 4> nr;
|
|
1205
|
+
|
|
1206
|
+
std::string vars() override {
|
|
1207
|
+
return VARS_TO_STR3(type, ne, nr);
|
|
737
1208
|
}
|
|
738
1209
|
|
|
739
1210
|
size_t op_size(ggml_tensor * t) override {
|
|
@@ -741,14 +1212,21 @@ struct test_repeat : public test_case {
|
|
|
741
1212
|
}
|
|
742
1213
|
|
|
743
1214
|
test_repeat(ggml_type type = GGML_TYPE_F32,
|
|
744
|
-
std::array<int64_t, 4> ne = {10,
|
|
1215
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
745
1216
|
std::array<int, 4> nr = {2, 2, 2, 2})
|
|
746
1217
|
: type(type), ne(ne), nr(nr) {}
|
|
747
1218
|
|
|
748
1219
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
749
1220
|
ggml_tensor * target = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
1221
|
+
ggml_set_name(target, "target");
|
|
1222
|
+
|
|
750
1223
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1224
|
+
ggml_set_param(ctx, src);
|
|
1225
|
+
ggml_set_name(src, "src");
|
|
1226
|
+
|
|
751
1227
|
ggml_tensor * out = ggml_repeat(ctx, src, target);
|
|
1228
|
+
ggml_set_name(out, "out");
|
|
1229
|
+
|
|
752
1230
|
return out;
|
|
753
1231
|
}
|
|
754
1232
|
};
|
|
@@ -774,10 +1252,62 @@ struct test_dup : public test_case {
|
|
|
774
1252
|
|
|
775
1253
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
776
1254
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1255
|
+
ggml_set_param(ctx, src);
|
|
1256
|
+
ggml_set_name(src, "src");
|
|
1257
|
+
|
|
777
1258
|
if (_use_permute) {
|
|
778
1259
|
src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
|
|
1260
|
+
ggml_set_name(src, "src_permuted");
|
|
779
1261
|
}
|
|
1262
|
+
|
|
780
1263
|
ggml_tensor * out = ggml_dup(ctx, src);
|
|
1264
|
+
ggml_set_name(out, "out");
|
|
1265
|
+
|
|
1266
|
+
return out;
|
|
1267
|
+
}
|
|
1268
|
+
};
|
|
1269
|
+
|
|
1270
|
+
// GGML_OP_SET
|
|
1271
|
+
struct test_set : public test_case {
|
|
1272
|
+
const ggml_type type_src;
|
|
1273
|
+
const ggml_type type_dst;
|
|
1274
|
+
const std::array<int64_t, 4> ne;
|
|
1275
|
+
const int dim;
|
|
1276
|
+
|
|
1277
|
+
std::string vars() override {
|
|
1278
|
+
return VARS_TO_STR4(type_src, type_dst, ne, dim);
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
size_t op_size(ggml_tensor * t) override {
|
|
1282
|
+
return ggml_nbytes(t) + ggml_nbytes(t->src[0]);
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
test_set(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
|
1286
|
+
std::array<int64_t, 4> ne = {6, 5, 4, 3}, int dim = 1)
|
|
1287
|
+
: type_src(type_src), type_dst(type_dst), ne(ne), dim(dim) {}
|
|
1288
|
+
|
|
1289
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1290
|
+
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1291
|
+
ggml_set_param(ctx, src);
|
|
1292
|
+
ggml_set_name(src, "src");
|
|
1293
|
+
|
|
1294
|
+
auto ne_dst = ne;
|
|
1295
|
+
for (int i = 0; i < dim; ++i) {
|
|
1296
|
+
ne_dst[i] *= 2;
|
|
1297
|
+
}
|
|
1298
|
+
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
|
1299
|
+
ggml_set_param(ctx, dst);
|
|
1300
|
+
ggml_set_name(dst, "dst");
|
|
1301
|
+
|
|
1302
|
+
size_t offset = 0;
|
|
1303
|
+
for (int i = 0; i < dim; ++i) {
|
|
1304
|
+
offset += ((ne_dst[i] - ne[i])/2)*dst->nb[i];
|
|
1305
|
+
}
|
|
1306
|
+
ggml_tensor * out = ggml_set(ctx, dst, src,
|
|
1307
|
+
// The backward pass requires setting a contiguous region:
|
|
1308
|
+
src->nb[1], src->nb[2], src->nb[3], offset);
|
|
1309
|
+
ggml_set_name(out, "out");
|
|
1310
|
+
|
|
781
1311
|
return out;
|
|
782
1312
|
}
|
|
783
1313
|
};
|
|
@@ -804,18 +1334,26 @@ struct test_cpy : public test_case {
|
|
|
804
1334
|
|
|
805
1335
|
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
|
806
1336
|
std::array<int64_t, 4> ne = {10, 10, 10, 1},
|
|
807
|
-
std::array<int64_t, 4> permute = {0, 0, 0, 0}
|
|
808
|
-
bool _dst_use_permute = false)
|
|
1337
|
+
std::array<int64_t, 4> permute = {0, 0, 0, 0})
|
|
809
1338
|
: type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
|
|
810
1339
|
_src_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
|
811
1340
|
|
|
812
1341
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
813
1342
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1343
|
+
ggml_set_param(ctx, src);
|
|
1344
|
+
ggml_set_name(src, "src");
|
|
1345
|
+
|
|
814
1346
|
if (_src_use_permute) {
|
|
815
1347
|
src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
|
|
1348
|
+
ggml_set_name(src, "src_permuted");
|
|
816
1349
|
}
|
|
1350
|
+
|
|
817
1351
|
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, src->ne);
|
|
1352
|
+
ggml_set_name(dst, "dst");
|
|
1353
|
+
|
|
818
1354
|
ggml_tensor * out = ggml_cpy(ctx, src, dst);
|
|
1355
|
+
ggml_set_name(out, "out");
|
|
1356
|
+
|
|
819
1357
|
return out;
|
|
820
1358
|
}
|
|
821
1359
|
};
|
|
@@ -835,8 +1373,14 @@ struct test_cont : public test_case {
|
|
|
835
1373
|
|
|
836
1374
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
837
1375
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1376
|
+
ggml_set_param(ctx, src);
|
|
1377
|
+
ggml_set_name(src, "src");
|
|
1378
|
+
|
|
838
1379
|
src = ggml_transpose(ctx, src);
|
|
1380
|
+
ggml_set_name(src, "src_transposed");
|
|
1381
|
+
|
|
839
1382
|
ggml_tensor * out = ggml_cont(ctx, src);
|
|
1383
|
+
ggml_set_name(out, "out");
|
|
840
1384
|
|
|
841
1385
|
return out;
|
|
842
1386
|
}
|
|
@@ -867,21 +1411,79 @@ struct test_bin_bcast : public test_case {
|
|
|
867
1411
|
|
|
868
1412
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
869
1413
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
1414
|
+
ggml_set_name(a, "a");
|
|
1415
|
+
|
|
870
1416
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1417
|
+
ggml_set_name(b, "b");
|
|
1418
|
+
|
|
1419
|
+
// The backward pass supports broadcasting only for GGML_ADD:
|
|
1420
|
+
const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
|
|
1421
|
+
if (grad_supported) {
|
|
1422
|
+
ggml_set_param(ctx, a);
|
|
1423
|
+
ggml_set_param(ctx, b);
|
|
1424
|
+
}
|
|
1425
|
+
|
|
871
1426
|
ggml_tensor * out = op(ctx, a, b);
|
|
1427
|
+
ggml_set_name(out, "out");
|
|
1428
|
+
|
|
872
1429
|
return out;
|
|
873
1430
|
}
|
|
874
1431
|
|
|
875
1432
|
void initialize_tensors(ggml_context * ctx) override {
|
|
876
1433
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
877
|
-
if (op == ggml_div) {
|
|
878
|
-
//
|
|
879
|
-
init_tensor_uniform(t,
|
|
1434
|
+
if (op == ggml_mul || op == ggml_div) {
|
|
1435
|
+
// MUL and DIV have numerical issues around zero:
|
|
1436
|
+
init_tensor_uniform(t, 0.9f, 1.1f);
|
|
880
1437
|
} else {
|
|
881
1438
|
init_tensor_uniform(t);
|
|
882
1439
|
}
|
|
883
1440
|
}
|
|
884
1441
|
}
|
|
1442
|
+
|
|
1443
|
+
float grad_eps() override {
|
|
1444
|
+
return 0.1f * (op == ggml_mul ? ne[0]*ne[1]*ne[2]*ne[3] : 1);
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
bool grad_precise() override {
|
|
1448
|
+
return op == ggml_div;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
double max_maa_err() override {
|
|
1452
|
+
return op == ggml_add ? 1e-4 : 1e-3;
|
|
1453
|
+
}
|
|
1454
|
+
};
|
|
1455
|
+
|
|
1456
|
+
// GGML_OP_ADD1
|
|
1457
|
+
struct test_add1 : public test_case {
|
|
1458
|
+
const ggml_type type;
|
|
1459
|
+
const std::array<int64_t, 4> ne;
|
|
1460
|
+
|
|
1461
|
+
std::string vars() override {
|
|
1462
|
+
return VARS_TO_STR2(type, ne);
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
test_add1(ggml_type type = GGML_TYPE_F32,
|
|
1466
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1467
|
+
: type(type), ne(ne) {}
|
|
1468
|
+
|
|
1469
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1470
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1471
|
+
ggml_set_param(ctx, a);
|
|
1472
|
+
ggml_set_name(a, "a");
|
|
1473
|
+
|
|
1474
|
+
ggml_tensor * b = ggml_new_tensor_1d(ctx, type, 1);
|
|
1475
|
+
// ggml_set_param(ctx, b); // TODO: implement
|
|
1476
|
+
ggml_set_name(b, "b");
|
|
1477
|
+
|
|
1478
|
+
ggml_tensor * out = ggml_add1(ctx, a, b);
|
|
1479
|
+
ggml_set_name(out, "out");
|
|
1480
|
+
|
|
1481
|
+
return out;
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
float grad_eps() override {
|
|
1485
|
+
return 0.1f * ne[0]*ne[1]*ne[2]*ne[3];
|
|
1486
|
+
}
|
|
885
1487
|
};
|
|
886
1488
|
|
|
887
1489
|
// GGML_OP_SCALE
|
|
@@ -901,7 +1503,12 @@ struct test_scale : public test_case {
|
|
|
901
1503
|
|
|
902
1504
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
903
1505
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1506
|
+
ggml_set_param(ctx, a);
|
|
1507
|
+
ggml_set_name(a, "a");
|
|
1508
|
+
|
|
904
1509
|
ggml_tensor * out = ggml_scale(ctx, a, scale);
|
|
1510
|
+
ggml_set_name(out, "out");
|
|
1511
|
+
|
|
905
1512
|
return out;
|
|
906
1513
|
}
|
|
907
1514
|
};
|
|
@@ -917,13 +1524,17 @@ struct test_norm : public test_case {
|
|
|
917
1524
|
}
|
|
918
1525
|
|
|
919
1526
|
test_norm(ggml_type type = GGML_TYPE_F32,
|
|
920
|
-
std::array<int64_t, 4> ne = {64,
|
|
1527
|
+
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
921
1528
|
float eps = 1e-6f)
|
|
922
1529
|
: type(type), ne(ne), eps(eps) {}
|
|
923
1530
|
|
|
924
1531
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
925
1532
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1533
|
+
ggml_set_name(a, "a");
|
|
1534
|
+
|
|
926
1535
|
ggml_tensor * out = ggml_norm(ctx, a, eps);
|
|
1536
|
+
ggml_set_name(out, "out");
|
|
1537
|
+
|
|
927
1538
|
return out;
|
|
928
1539
|
}
|
|
929
1540
|
};
|
|
@@ -939,13 +1550,104 @@ struct test_rms_norm : public test_case {
|
|
|
939
1550
|
}
|
|
940
1551
|
|
|
941
1552
|
test_rms_norm(ggml_type type = GGML_TYPE_F32,
|
|
942
|
-
std::array<int64_t, 4> ne = {64,
|
|
1553
|
+
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
943
1554
|
float eps = 1e-6f)
|
|
944
1555
|
: type(type), ne(ne), eps(eps) {}
|
|
945
1556
|
|
|
946
1557
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
947
1558
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1559
|
+
ggml_set_param(ctx, a);
|
|
1560
|
+
ggml_set_name(a, "a");
|
|
1561
|
+
|
|
948
1562
|
ggml_tensor * out = ggml_rms_norm(ctx, a, eps);
|
|
1563
|
+
ggml_set_name(out, "out");
|
|
1564
|
+
|
|
1565
|
+
return out;
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
bool grad_precise() override {
|
|
1569
|
+
return true;
|
|
1570
|
+
}
|
|
1571
|
+
};
|
|
1572
|
+
|
|
1573
|
+
// GGML_OP_SSM_CONV
|
|
1574
|
+
struct test_ssm_conv : public test_case {
|
|
1575
|
+
const ggml_type type;
|
|
1576
|
+
const std::array<int64_t, 4> ne_a;
|
|
1577
|
+
const std::array<int64_t, 4> ne_b;
|
|
1578
|
+
|
|
1579
|
+
std::string vars() override {
|
|
1580
|
+
return VARS_TO_STR3(type, ne_a, ne_b);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
test_ssm_conv(ggml_type type = GGML_TYPE_F32,
|
|
1584
|
+
std::array<int64_t, 4> ne_a = {10, 10, 10, 1},
|
|
1585
|
+
std::array<int64_t, 4> ne_b = {3, 3, 1, 1})
|
|
1586
|
+
: type(type), ne_a(ne_a), ne_b(ne_b) {}
|
|
1587
|
+
|
|
1588
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1589
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
1590
|
+
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
1591
|
+
ggml_tensor * out = ggml_ssm_conv(ctx, a, b);
|
|
1592
|
+
return out;
|
|
1593
|
+
}
|
|
1594
|
+
};
|
|
1595
|
+
|
|
1596
|
+
// GGML_OP_SSM_SCAN
|
|
1597
|
+
struct test_ssm_scan : public test_case {
|
|
1598
|
+
const ggml_type type;
|
|
1599
|
+
|
|
1600
|
+
const int64_t d_state;
|
|
1601
|
+
const int64_t d_inner;
|
|
1602
|
+
const int64_t n_seq_tokens;
|
|
1603
|
+
const int64_t n_seqs;
|
|
1604
|
+
|
|
1605
|
+
std::string vars() override {
|
|
1606
|
+
return VARS_TO_STR5(type, d_state, d_inner, n_seq_tokens, n_seqs);
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
test_ssm_scan(ggml_type type = GGML_TYPE_F32,
|
|
1610
|
+
int64_t d_state = 32, int64_t d_inner = 32, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
1611
|
+
: type(type), d_state(d_state), d_inner(d_inner), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
1612
|
+
|
|
1613
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1614
|
+
ggml_tensor * s = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, n_seqs, 1 }.data());
|
|
1615
|
+
ggml_tensor * x = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
1616
|
+
ggml_tensor * dt = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
1617
|
+
ggml_tensor * A = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, 1 , 1 }.data());
|
|
1618
|
+
ggml_tensor * B = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
1619
|
+
ggml_tensor * C = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
1620
|
+
ggml_tensor * out = ggml_ssm_scan(ctx, s, x, dt, A, B, C);
|
|
1621
|
+
return out;
|
|
1622
|
+
}
|
|
1623
|
+
};
|
|
1624
|
+
|
|
1625
|
+
// GGML_OP_RWKV_WKV6
|
|
1626
|
+
struct test_rwkv_wkv6 : public test_case {
|
|
1627
|
+
const ggml_type type;
|
|
1628
|
+
|
|
1629
|
+
const int64_t head_count;
|
|
1630
|
+
const int64_t head_size;
|
|
1631
|
+
const int64_t n_seq_tokens;
|
|
1632
|
+
const int64_t n_seqs;
|
|
1633
|
+
|
|
1634
|
+
std::string vars() override {
|
|
1635
|
+
return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs);
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
test_rwkv_wkv6(ggml_type type = GGML_TYPE_F32,
|
|
1639
|
+
int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
1640
|
+
: type(type), head_count(head_count), head_size(head_size), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
1641
|
+
|
|
1642
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1643
|
+
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
|
1644
|
+
ggml_tensor * r = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
|
1645
|
+
ggml_tensor * k = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ head_size, 1, head_count, n_tokens }.data());
|
|
1646
|
+
ggml_tensor * v = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
|
1647
|
+
ggml_tensor * tf = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size, head_count }.data());
|
|
1648
|
+
ggml_tensor * td = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
|
1649
|
+
ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
1650
|
+
ggml_tensor * out = ggml_rwkv_wkv6(ctx, k, v, r, tf, td, s);
|
|
949
1651
|
return out;
|
|
950
1652
|
}
|
|
951
1653
|
};
|
|
@@ -957,37 +1659,68 @@ struct test_mul_mat : public test_case {
|
|
|
957
1659
|
const int64_t m;
|
|
958
1660
|
const int64_t n;
|
|
959
1661
|
const int64_t k;
|
|
960
|
-
const std::array<int64_t, 2> bs;
|
|
961
|
-
const std::array<int64_t, 2> nr;
|
|
1662
|
+
const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
1663
|
+
const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
1664
|
+
const std::array<int64_t, 4> per; // permutation of dimensions
|
|
962
1665
|
|
|
963
1666
|
std::string vars() override {
|
|
964
|
-
return
|
|
1667
|
+
return VARS_TO_STR8(type_a, type_b, m, n, k, bs, nr, per);
|
|
965
1668
|
}
|
|
966
1669
|
|
|
967
1670
|
double max_nmse_err() override {
|
|
968
1671
|
return 5e-4;
|
|
969
1672
|
}
|
|
970
1673
|
|
|
971
|
-
|
|
972
|
-
size_t a = ggml_nbytes(t->src[0]) * n * nr[0] * nr[1];
|
|
973
|
-
size_t b = ggml_nbytes(t->src[1]) * m;
|
|
974
|
-
size_t c = ggml_nbytes(t);
|
|
975
|
-
return a + b + c;
|
|
976
|
-
|
|
1674
|
+
uint64_t op_flops(ggml_tensor * t) override {
|
|
977
1675
|
GGML_UNUSED(t);
|
|
1676
|
+
return 2 * m * n * k * bs[0] * nr[0] * bs[1] * nr[1];
|
|
978
1677
|
}
|
|
979
1678
|
|
|
980
1679
|
test_mul_mat(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
981
1680
|
int64_t m = 32, int64_t n = 32, int64_t k = 32,
|
|
982
1681
|
std::array<int64_t, 2> bs = {10, 10},
|
|
983
|
-
std::array<int64_t, 2> nr = {2, 2}
|
|
984
|
-
|
|
1682
|
+
std::array<int64_t, 2> nr = {2, 2},
|
|
1683
|
+
std::array<int64_t, 4> per = {0, 1, 2, 3})
|
|
1684
|
+
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), per(per) {}
|
|
985
1685
|
|
|
986
1686
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
987
1687
|
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
|
988
|
-
ggml_tensor * a
|
|
989
|
-
ggml_tensor * b
|
|
1688
|
+
ggml_tensor * a;
|
|
1689
|
+
ggml_tensor * b;
|
|
1690
|
+
|
|
1691
|
+
const int npermuted = (per[0] != 0) + (per[1] != 1) + (per[2] != 2) + (per[3] != 3);
|
|
1692
|
+
if (npermuted > 0) {
|
|
1693
|
+
GGML_ASSERT(npermuted == 2);
|
|
1694
|
+
GGML_ASSERT(!ggml_is_quantized(type_a) || per[0] == 0);
|
|
1695
|
+
GGML_ASSERT(!ggml_is_quantized(type_b) || per[0] == 0);
|
|
1696
|
+
|
|
1697
|
+
// Create tensors with the permuted dimensions, then permute them back to the dimensions given by m,n,k.
|
|
1698
|
+
const int64_t ne_a[4] = {k, m, bs[0], bs[1]};
|
|
1699
|
+
const int64_t ne_b[4] = {k, n, bs[0]*nr[0], bs[1]*nr[1]};
|
|
1700
|
+
|
|
1701
|
+
a = ggml_new_tensor_4d(ctx, type_a, ne_a[per[0]], ne_a[per[1]], ne_a[per[2]], ne_a[per[3]]);
|
|
1702
|
+
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
|
|
1703
|
+
ggml_set_param(ctx, a);
|
|
1704
|
+
ggml_set_param(ctx, b);
|
|
1705
|
+
ggml_set_name(a, "a");
|
|
1706
|
+
ggml_set_name(b, "b");
|
|
1707
|
+
|
|
1708
|
+
a = ggml_permute(ctx, a, per[0], per[1], per[2], per[3]);
|
|
1709
|
+
b = ggml_permute(ctx, b, per[0], per[1], per[2], per[3]);
|
|
1710
|
+
ggml_set_name(a, "a_permuted");
|
|
1711
|
+
ggml_set_name(b, "b_permuted");
|
|
1712
|
+
} else {
|
|
1713
|
+
a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
|
|
1714
|
+
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
1715
|
+
ggml_set_param(ctx, a);
|
|
1716
|
+
ggml_set_param(ctx, b);
|
|
1717
|
+
ggml_set_name(a, "a");
|
|
1718
|
+
ggml_set_name(b, "b");
|
|
1719
|
+
}
|
|
1720
|
+
|
|
990
1721
|
ggml_tensor * out = ggml_mul_mat(ctx, a, b);
|
|
1722
|
+
ggml_set_name(out, "out");
|
|
1723
|
+
|
|
991
1724
|
return out;
|
|
992
1725
|
}
|
|
993
1726
|
};
|
|
@@ -1011,13 +1744,9 @@ struct test_mul_mat_id : public test_case {
|
|
|
1011
1744
|
return 5e-4;
|
|
1012
1745
|
}
|
|
1013
1746
|
|
|
1014
|
-
|
|
1015
|
-
size_t a = ggml_nbytes(t->src[2]) * n;
|
|
1016
|
-
size_t b = ggml_nbytes(t->src[1]) * m;
|
|
1017
|
-
size_t c = ggml_nbytes(t);
|
|
1018
|
-
return a + b + c;
|
|
1019
|
-
|
|
1747
|
+
uint64_t op_flops(ggml_tensor * t) override {
|
|
1020
1748
|
GGML_UNUSED(t);
|
|
1749
|
+
return 2 * m * k * n * n_used;
|
|
1021
1750
|
}
|
|
1022
1751
|
|
|
1023
1752
|
test_mul_mat_id(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
@@ -1031,12 +1760,21 @@ struct test_mul_mat_id : public test_case {
|
|
|
1031
1760
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1032
1761
|
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
|
1033
1762
|
ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
|
|
1763
|
+
ggml_set_name(as, "as");
|
|
1764
|
+
|
|
1034
1765
|
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
|
|
1766
|
+
ggml_set_name(ids, "ids");
|
|
1035
1767
|
if (n_used != n_mats) {
|
|
1036
1768
|
ids = ggml_view_2d(ctx, ids, n_used, n, ids->nb[1], 0);
|
|
1769
|
+
ggml_set_name(ids, "view_of_ids");
|
|
1037
1770
|
}
|
|
1771
|
+
|
|
1038
1772
|
ggml_tensor * b = ggml_new_tensor_3d(ctx, type_b, k, this->b ? 1 : n_used, n);
|
|
1773
|
+
ggml_set_name(b, "b");
|
|
1774
|
+
|
|
1039
1775
|
ggml_tensor * out = ggml_mul_mat_id(ctx, as, b, ids);
|
|
1776
|
+
ggml_set_name(out, "out");
|
|
1777
|
+
|
|
1040
1778
|
return out;
|
|
1041
1779
|
}
|
|
1042
1780
|
|
|
@@ -1062,6 +1800,50 @@ struct test_mul_mat_id : public test_case {
|
|
|
1062
1800
|
}
|
|
1063
1801
|
};
|
|
1064
1802
|
|
|
1803
|
+
// GGML_OP_OUT_PROD
|
|
1804
|
+
struct test_out_prod : public test_case {
|
|
1805
|
+
const ggml_type type_a;
|
|
1806
|
+
const ggml_type type_b;
|
|
1807
|
+
const int64_t m;
|
|
1808
|
+
const int64_t n;
|
|
1809
|
+
const int64_t k;
|
|
1810
|
+
const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
1811
|
+
const bool trans_b;
|
|
1812
|
+
|
|
1813
|
+
std::string vars() override {
|
|
1814
|
+
return VARS_TO_STR7(type_a, type_b, m, n, k, bs, trans_b);
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1817
|
+
double max_nmse_err() override {
|
|
1818
|
+
return 5e-4;
|
|
1819
|
+
}
|
|
1820
|
+
|
|
1821
|
+
test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
1822
|
+
int64_t m = 32, int64_t n = 32, int64_t k = 32,
|
|
1823
|
+
std::array<int64_t, 2> bs = {10, 10},
|
|
1824
|
+
bool trans_b = false)
|
|
1825
|
+
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), trans_b(trans_b) {}
|
|
1826
|
+
|
|
1827
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1828
|
+
ggml_tensor * a = ggml_new_tensor_4d(ctx, type_a, m, k, bs[0], bs[1]);
|
|
1829
|
+
ggml_set_name(a, "a");
|
|
1830
|
+
|
|
1831
|
+
ggml_tensor * b;
|
|
1832
|
+
if (trans_b) {
|
|
1833
|
+
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0], bs[1]);
|
|
1834
|
+
b = ggml_transpose(ctx, b);
|
|
1835
|
+
} else {
|
|
1836
|
+
b = ggml_new_tensor_4d(ctx, type_b, n, k, bs[0], bs[1]);
|
|
1837
|
+
}
|
|
1838
|
+
ggml_set_name(b, "b");
|
|
1839
|
+
|
|
1840
|
+
ggml_tensor * out = ggml_out_prod(ctx, a, b);
|
|
1841
|
+
ggml_set_name(out, "out");
|
|
1842
|
+
|
|
1843
|
+
return out;
|
|
1844
|
+
}
|
|
1845
|
+
};
|
|
1846
|
+
|
|
1065
1847
|
// GGML_OP_SQR
|
|
1066
1848
|
struct test_sqr : public test_case {
|
|
1067
1849
|
const ggml_type type;
|
|
@@ -1072,14 +1854,23 @@ struct test_sqr : public test_case {
|
|
|
1072
1854
|
}
|
|
1073
1855
|
|
|
1074
1856
|
test_sqr(ggml_type type = GGML_TYPE_F32,
|
|
1075
|
-
std::array<int64_t, 4> ne = {10,
|
|
1857
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1076
1858
|
: type(type), ne(ne) {}
|
|
1077
1859
|
|
|
1078
1860
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1079
1861
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1862
|
+
ggml_set_param(ctx, a);
|
|
1863
|
+
ggml_set_name(a, "a");
|
|
1864
|
+
|
|
1080
1865
|
ggml_tensor * out = ggml_sqr(ctx, a);
|
|
1866
|
+
ggml_set_name(out, "out");
|
|
1867
|
+
|
|
1081
1868
|
return out;
|
|
1082
1869
|
}
|
|
1870
|
+
|
|
1871
|
+
float grad_eps() override {
|
|
1872
|
+
return 0.1f * 0.25f*ne[0]*ne[1]*ne[2]*ne[3]; // 10% of expected value of sum.
|
|
1873
|
+
}
|
|
1083
1874
|
};
|
|
1084
1875
|
|
|
1085
1876
|
// GGML_OP_SQRT
|
|
@@ -1092,21 +1883,156 @@ struct test_sqrt : public test_case {
|
|
|
1092
1883
|
}
|
|
1093
1884
|
|
|
1094
1885
|
test_sqrt(ggml_type type = GGML_TYPE_F32,
|
|
1095
|
-
std::array<int64_t, 4> ne = {10,
|
|
1886
|
+
std::array<int64_t, 4> ne = {10, 3, 3, 2})
|
|
1096
1887
|
: type(type), ne(ne) {}
|
|
1097
1888
|
|
|
1098
1889
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1099
1890
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1891
|
+
ggml_set_param(ctx, a);
|
|
1892
|
+
ggml_set_name(a, "a");
|
|
1893
|
+
|
|
1100
1894
|
ggml_tensor * out = ggml_sqrt(ctx, a);
|
|
1895
|
+
ggml_set_name(out, "out");
|
|
1896
|
+
|
|
1101
1897
|
return out;
|
|
1102
1898
|
}
|
|
1103
1899
|
|
|
1104
1900
|
void initialize_tensors(ggml_context * ctx) override {
|
|
1105
1901
|
// fill with positive values
|
|
1106
1902
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1107
|
-
init_tensor_uniform(t,
|
|
1903
|
+
init_tensor_uniform(t, 50.0f, 100.0f);
|
|
1108
1904
|
}
|
|
1109
1905
|
}
|
|
1906
|
+
|
|
1907
|
+
float grad_eps() override {
|
|
1908
|
+
return 20.0f;
|
|
1909
|
+
}
|
|
1910
|
+
|
|
1911
|
+
bool grad_precise() override {
|
|
1912
|
+
return true;
|
|
1913
|
+
}
|
|
1914
|
+
};
|
|
1915
|
+
|
|
1916
|
+
// GGML_OP_LOG
|
|
1917
|
+
struct test_log : public test_case {
|
|
1918
|
+
const ggml_type type;
|
|
1919
|
+
const std::array<int64_t, 4> ne;
|
|
1920
|
+
|
|
1921
|
+
std::string vars() override {
|
|
1922
|
+
return VARS_TO_STR2(type, ne);
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1925
|
+
test_log(ggml_type type = GGML_TYPE_F32,
|
|
1926
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1927
|
+
: type(type), ne(ne) {}
|
|
1928
|
+
|
|
1929
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1930
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1931
|
+
ggml_set_param(ctx, a);
|
|
1932
|
+
ggml_set_name(a, "a");
|
|
1933
|
+
|
|
1934
|
+
ggml_tensor * out = ggml_log(ctx, a);
|
|
1935
|
+
ggml_set_name(out, "out");
|
|
1936
|
+
|
|
1937
|
+
return out;
|
|
1938
|
+
}
|
|
1939
|
+
|
|
1940
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
1941
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1942
|
+
// log(1) == 0, cluster values there to keep the sum low for better precision in the backward pass:
|
|
1943
|
+
init_tensor_uniform(t, 0.9f, 1.1f);
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
|
|
1947
|
+
bool grad_precise() override {
|
|
1948
|
+
return true;
|
|
1949
|
+
}
|
|
1950
|
+
};
|
|
1951
|
+
|
|
1952
|
+
// GGML_OP_SIN
|
|
1953
|
+
struct test_sin : public test_case {
|
|
1954
|
+
const ggml_type type;
|
|
1955
|
+
const std::array<int64_t, 4> ne;
|
|
1956
|
+
|
|
1957
|
+
std::string vars() override {
|
|
1958
|
+
return VARS_TO_STR2(type, ne);
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
test_sin(ggml_type type = GGML_TYPE_F32,
|
|
1962
|
+
std::array<int64_t, 4> ne = {10, 2, 2, 2})
|
|
1963
|
+
: type(type), ne(ne) {}
|
|
1964
|
+
|
|
1965
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1966
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1967
|
+
ggml_set_param(ctx, a);
|
|
1968
|
+
ggml_set_name(a, "a");
|
|
1969
|
+
|
|
1970
|
+
ggml_tensor * out = ggml_sin(ctx, a);
|
|
1971
|
+
ggml_set_name(out, "out");
|
|
1972
|
+
|
|
1973
|
+
return out;
|
|
1974
|
+
}
|
|
1975
|
+
|
|
1976
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
1977
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1978
|
+
init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
1979
|
+
}
|
|
1980
|
+
}
|
|
1981
|
+
|
|
1982
|
+
double max_maa_err() override {
|
|
1983
|
+
return 1e-3;
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1986
|
+
float grad_eps() override {
|
|
1987
|
+
return 0.2f;
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
bool grad_precise() override {
|
|
1991
|
+
return true;
|
|
1992
|
+
}
|
|
1993
|
+
};
|
|
1994
|
+
|
|
1995
|
+
// GGML_OP_COS
|
|
1996
|
+
struct test_cos : public test_case {
|
|
1997
|
+
const ggml_type type;
|
|
1998
|
+
const std::array<int64_t, 4> ne;
|
|
1999
|
+
|
|
2000
|
+
std::string vars() override {
|
|
2001
|
+
return VARS_TO_STR2(type, ne);
|
|
2002
|
+
}
|
|
2003
|
+
|
|
2004
|
+
test_cos(ggml_type type = GGML_TYPE_F32,
|
|
2005
|
+
std::array<int64_t, 4> ne = {10, 2, 2, 2})
|
|
2006
|
+
: type(type), ne(ne) {}
|
|
2007
|
+
|
|
2008
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2009
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2010
|
+
ggml_set_param(ctx, a);
|
|
2011
|
+
ggml_set_name(a, "a");
|
|
2012
|
+
|
|
2013
|
+
ggml_tensor * out = ggml_cos(ctx, a);
|
|
2014
|
+
ggml_set_name(out, "out");
|
|
2015
|
+
|
|
2016
|
+
return out;
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
2020
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
2021
|
+
init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
2022
|
+
}
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
double max_maa_err() override {
|
|
2026
|
+
return 1e-3;
|
|
2027
|
+
}
|
|
2028
|
+
|
|
2029
|
+
float grad_eps() override {
|
|
2030
|
+
return 0.2f;
|
|
2031
|
+
}
|
|
2032
|
+
|
|
2033
|
+
bool grad_precise() override {
|
|
2034
|
+
return true;
|
|
2035
|
+
}
|
|
1110
2036
|
};
|
|
1111
2037
|
|
|
1112
2038
|
// GGML_OP_CLAMP
|
|
@@ -1121,15 +2047,27 @@ struct test_clamp : public test_case {
|
|
|
1121
2047
|
}
|
|
1122
2048
|
|
|
1123
2049
|
test_clamp(ggml_type type = GGML_TYPE_F32,
|
|
1124
|
-
std::array<int64_t, 4> ne = {10,
|
|
2050
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
1125
2051
|
float min = -0.5f, float max = 0.5f)
|
|
1126
2052
|
: type(type), ne(ne), min(min), max(max) {}
|
|
1127
2053
|
|
|
1128
2054
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1129
2055
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2056
|
+
ggml_set_name(a, "a");
|
|
2057
|
+
|
|
1130
2058
|
ggml_tensor * out = ggml_clamp(ctx, a, min, max);
|
|
2059
|
+
ggml_set_name(out, "out");
|
|
2060
|
+
|
|
1131
2061
|
return out;
|
|
1132
2062
|
}
|
|
2063
|
+
|
|
2064
|
+
float grad_eps() override {
|
|
2065
|
+
return 1e-2f;
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
std::vector<float> grad_expect() override {
|
|
2069
|
+
return {0.0f, 1.0f};
|
|
2070
|
+
}
|
|
1133
2071
|
};
|
|
1134
2072
|
|
|
1135
2073
|
// GGML_OP_DIAG_MASK_INF
|
|
@@ -1143,13 +2081,18 @@ struct test_diag_mask_inf : public test_case {
|
|
|
1143
2081
|
}
|
|
1144
2082
|
|
|
1145
2083
|
test_diag_mask_inf(ggml_type type = GGML_TYPE_F32,
|
|
1146
|
-
std::array<int64_t, 4> ne = {10, 10,
|
|
2084
|
+
std::array<int64_t, 4> ne = {10, 10, 3, 2},
|
|
1147
2085
|
int n_past = 5)
|
|
1148
2086
|
: type(type), ne(ne), n_past(n_past) {}
|
|
1149
2087
|
|
|
1150
2088
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1151
2089
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2090
|
+
ggml_set_param(ctx, a);
|
|
2091
|
+
ggml_set_name(a, "a");
|
|
2092
|
+
|
|
1152
2093
|
ggml_tensor * out = ggml_diag_mask_inf(ctx, a, n_past);
|
|
2094
|
+
ggml_set_name(out, "out");
|
|
2095
|
+
|
|
1153
2096
|
return out;
|
|
1154
2097
|
}
|
|
1155
2098
|
};
|
|
@@ -1173,7 +2116,7 @@ struct test_soft_max : public test_case {
|
|
|
1173
2116
|
}
|
|
1174
2117
|
|
|
1175
2118
|
test_soft_max(ggml_type type = GGML_TYPE_F32,
|
|
1176
|
-
std::array<int64_t, 4> ne = {10,
|
|
2119
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
1177
2120
|
bool mask = false,
|
|
1178
2121
|
float scale = 1.0f,
|
|
1179
2122
|
float max_bias = 0.0f)
|
|
@@ -1181,13 +2124,24 @@ struct test_soft_max : public test_case {
|
|
|
1181
2124
|
|
|
1182
2125
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1183
2126
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2127
|
+
ggml_set_param(ctx, a);
|
|
2128
|
+
ggml_set_name(a, "a");
|
|
2129
|
+
|
|
1184
2130
|
ggml_tensor * mask = nullptr;
|
|
1185
2131
|
if (this->mask) {
|
|
1186
2132
|
mask = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne[0], ne[1]);
|
|
2133
|
+
ggml_set_name(mask, "mask");
|
|
1187
2134
|
}
|
|
2135
|
+
|
|
1188
2136
|
ggml_tensor * out = ggml_soft_max_ext(ctx, a, mask, scale, max_bias);
|
|
2137
|
+
ggml_set_name(out, "out");
|
|
2138
|
+
|
|
1189
2139
|
return out;
|
|
1190
2140
|
}
|
|
2141
|
+
|
|
2142
|
+
bool grad_precise() override {
|
|
2143
|
+
return true;
|
|
2144
|
+
}
|
|
1191
2145
|
};
|
|
1192
2146
|
|
|
1193
2147
|
|
|
@@ -1209,7 +2163,7 @@ struct test_rope : public test_case {
|
|
|
1209
2163
|
}
|
|
1210
2164
|
|
|
1211
2165
|
test_rope(ggml_type type = GGML_TYPE_F32,
|
|
1212
|
-
std::array<int64_t, 4> ne_a = {10,
|
|
2166
|
+
std::array<int64_t, 4> ne_a = {10, 5, 3, 1},
|
|
1213
2167
|
int n_dims = 10, int mode = 0, int n_ctx = 512, float fs = 1.0f, float ef = 0.0f, float af = 0.0f, bool ff = false, int v = 0)
|
|
1214
2168
|
: type(type), ne_a(ne_a), n_dims(n_dims), mode(mode), n_ctx(n_ctx), fs(fs), ef(ef), af(af), ff(ff), v(v) {}
|
|
1215
2169
|
|
|
@@ -1218,13 +2172,29 @@ struct test_rope : public test_case {
|
|
|
1218
2172
|
if (v & 1) {
|
|
1219
2173
|
auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
1220
2174
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2175
|
+
ggml_set_param(ctx, a);
|
|
2176
|
+
ggml_set_name(a, "a");
|
|
2177
|
+
|
|
1221
2178
|
a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
2179
|
+
ggml_set_name(a, "view_of_a");
|
|
1222
2180
|
} else {
|
|
1223
2181
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2182
|
+
ggml_set_param(ctx, a);
|
|
2183
|
+
ggml_set_name(a, "a");
|
|
1224
2184
|
}
|
|
2185
|
+
|
|
1225
2186
|
ggml_tensor * pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, ne_a[2]);
|
|
1226
|
-
|
|
2187
|
+
ggml_set_name(pos, "pos");
|
|
2188
|
+
|
|
2189
|
+
ggml_tensor * freq = nullptr;
|
|
2190
|
+
if (ff) {
|
|
2191
|
+
freq = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_dims/2);
|
|
2192
|
+
ggml_set_name(freq, "freq");
|
|
2193
|
+
}
|
|
2194
|
+
|
|
1227
2195
|
ggml_tensor * out = ggml_rope_ext(ctx, a, pos, freq, n_dims, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
2196
|
+
ggml_set_name(out, "out");
|
|
2197
|
+
|
|
1228
2198
|
return out;
|
|
1229
2199
|
}
|
|
1230
2200
|
|
|
@@ -1247,6 +2217,14 @@ struct test_rope : public test_case {
|
|
|
1247
2217
|
}
|
|
1248
2218
|
}
|
|
1249
2219
|
}
|
|
2220
|
+
|
|
2221
|
+
double max_maa_err() override {
|
|
2222
|
+
return 1e-3;
|
|
2223
|
+
}
|
|
2224
|
+
|
|
2225
|
+
bool grad_precise() override {
|
|
2226
|
+
return true;
|
|
2227
|
+
}
|
|
1250
2228
|
};
|
|
1251
2229
|
|
|
1252
2230
|
// GGML_OP_POOL2D
|
|
@@ -1278,7 +2256,12 @@ struct test_pool2d : public test_case {
|
|
|
1278
2256
|
|
|
1279
2257
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1280
2258
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2259
|
+
ggml_set_param(ctx, input);
|
|
2260
|
+
ggml_set_name(input, "input");
|
|
2261
|
+
|
|
1281
2262
|
ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1);
|
|
2263
|
+
ggml_set_name(out, "out");
|
|
2264
|
+
|
|
1282
2265
|
return out;
|
|
1283
2266
|
}
|
|
1284
2267
|
};
|
|
@@ -1303,8 +2286,14 @@ struct test_conv_transpose_1d : public test_case {
|
|
|
1303
2286
|
|
|
1304
2287
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1305
2288
|
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
|
2289
|
+
ggml_set_name(input, "input");
|
|
2290
|
+
|
|
1306
2291
|
ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
|
|
2292
|
+
ggml_set_name(kernel, "kernel");
|
|
2293
|
+
|
|
1307
2294
|
ggml_tensor * out = ggml_conv_transpose_1d(ctx, kernel, input, s0, p0, d0);
|
|
2295
|
+
ggml_set_name(out, "out");
|
|
2296
|
+
|
|
1308
2297
|
return out;
|
|
1309
2298
|
}
|
|
1310
2299
|
};
|
|
@@ -1343,8 +2332,15 @@ struct test_im2col : public test_case {
|
|
|
1343
2332
|
|
|
1344
2333
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1345
2334
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2335
|
+
ggml_set_param(ctx, input);
|
|
2336
|
+
ggml_set_name(input, "input");
|
|
2337
|
+
|
|
1346
2338
|
ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
|
|
2339
|
+
ggml_set_name(kernel, "kernel");
|
|
2340
|
+
|
|
1347
2341
|
ggml_tensor * out = ggml_im2col(ctx, kernel, input, s0, s1, p0, p1, d0, d1, is_2D, dst_type);
|
|
2342
|
+
ggml_set_name(out, "out");
|
|
2343
|
+
|
|
1348
2344
|
return out;
|
|
1349
2345
|
}
|
|
1350
2346
|
};
|
|
@@ -1362,8 +2358,8 @@ struct test_concat : public test_case {
|
|
|
1362
2358
|
}
|
|
1363
2359
|
|
|
1364
2360
|
test_concat(ggml_type type = GGML_TYPE_F32,
|
|
1365
|
-
std::array<int64_t, 4> ne_a = {10,
|
|
1366
|
-
int64_t ne_b_d =
|
|
2361
|
+
std::array<int64_t, 4> ne_a = {10, 5, 5, 5},
|
|
2362
|
+
int64_t ne_b_d = 5,
|
|
1367
2363
|
int dim = 2, int v = 0)
|
|
1368
2364
|
: type(type), ne_a(ne_a), ne_b_d(ne_b_d), dim(dim), v(v) {}
|
|
1369
2365
|
|
|
@@ -1374,19 +2370,30 @@ struct test_concat : public test_case {
|
|
|
1374
2370
|
if (v & 1) {
|
|
1375
2371
|
auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
1376
2372
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2373
|
+
ggml_set_name(a, "a");
|
|
2374
|
+
|
|
1377
2375
|
a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
2376
|
+
ggml_set_name(a, "view_of_a");
|
|
1378
2377
|
} else {
|
|
1379
2378
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2379
|
+
ggml_set_name(a, "a");
|
|
1380
2380
|
}
|
|
1381
2381
|
ggml_tensor * b;
|
|
1382
2382
|
if (v & 2) {
|
|
1383
2383
|
auto ne = ne_b; ne[0] *= 3; ne[1] *= 2; ne[2] *= 4;
|
|
1384
2384
|
b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2385
|
+
ggml_set_name(b, "b");
|
|
2386
|
+
|
|
1385
2387
|
b = ggml_view_4d(ctx, b, ne_b[0], ne_b[1], ne_b[2], ne_b[3], b->nb[1], b->nb[2], b->nb[3], 0);
|
|
2388
|
+
ggml_set_name(b, "view_of_b");
|
|
1386
2389
|
} else {
|
|
1387
2390
|
b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
2391
|
+
ggml_set_name(b, "b");
|
|
1388
2392
|
}
|
|
2393
|
+
|
|
1389
2394
|
ggml_tensor * out = ggml_concat(ctx, a, b, dim);
|
|
2395
|
+
ggml_set_name(out, "out");
|
|
2396
|
+
|
|
1390
2397
|
return out;
|
|
1391
2398
|
}
|
|
1392
2399
|
};
|
|
@@ -1408,7 +2415,11 @@ struct test_argsort : public test_case {
|
|
|
1408
2415
|
|
|
1409
2416
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1410
2417
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2418
|
+
ggml_set_name(a, "a");
|
|
2419
|
+
|
|
1411
2420
|
ggml_tensor * out = ggml_argsort(ctx, a, order);
|
|
2421
|
+
ggml_set_name(out, "out");
|
|
2422
|
+
|
|
1412
2423
|
return out;
|
|
1413
2424
|
}
|
|
1414
2425
|
|
|
@@ -1441,6 +2452,35 @@ struct test_argsort : public test_case {
|
|
|
1441
2452
|
}
|
|
1442
2453
|
};
|
|
1443
2454
|
|
|
2455
|
+
// GGML_OP_SUM
|
|
2456
|
+
struct test_sum : public test_case {
|
|
2457
|
+
const ggml_type type;
|
|
2458
|
+
const std::array<int64_t, 4> ne;
|
|
2459
|
+
|
|
2460
|
+
std::string vars() override {
|
|
2461
|
+
return VARS_TO_STR2(type, ne);
|
|
2462
|
+
}
|
|
2463
|
+
|
|
2464
|
+
test_sum(ggml_type type = GGML_TYPE_F32,
|
|
2465
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
2466
|
+
: type(type), ne(ne) {}
|
|
2467
|
+
|
|
2468
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2469
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2470
|
+
ggml_set_param(ctx, a);
|
|
2471
|
+
ggml_set_name(a, "a");
|
|
2472
|
+
|
|
2473
|
+
ggml_tensor * out = ggml_sum(ctx, a);
|
|
2474
|
+
ggml_set_name(out, "out");
|
|
2475
|
+
|
|
2476
|
+
return out;
|
|
2477
|
+
}
|
|
2478
|
+
|
|
2479
|
+
float grad_eps() override {
|
|
2480
|
+
return 0.1f * sqrtf(ne[0]*ne[1]*ne[2]*ne[3]);
|
|
2481
|
+
}
|
|
2482
|
+
};
|
|
2483
|
+
|
|
1444
2484
|
// GGML_OP_SUM_ROWS
|
|
1445
2485
|
struct test_sum_rows : public test_case {
|
|
1446
2486
|
const ggml_type type;
|
|
@@ -1451,16 +2491,50 @@ struct test_sum_rows : public test_case {
|
|
|
1451
2491
|
}
|
|
1452
2492
|
|
|
1453
2493
|
test_sum_rows(ggml_type type = GGML_TYPE_F32,
|
|
1454
|
-
std::array<int64_t, 4> ne = {10,
|
|
2494
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1455
2495
|
: type(type), ne(ne) {}
|
|
1456
2496
|
|
|
1457
2497
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1458
2498
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2499
|
+
ggml_set_param(ctx, a);
|
|
2500
|
+
ggml_set_name(a, "a");
|
|
2501
|
+
|
|
1459
2502
|
ggml_tensor * out = ggml_sum_rows(ctx, a);
|
|
2503
|
+
ggml_set_name(out, "out");
|
|
2504
|
+
|
|
1460
2505
|
return out;
|
|
1461
2506
|
}
|
|
1462
2507
|
};
|
|
1463
2508
|
|
|
2509
|
+
// GGML_OP_MEAN
|
|
2510
|
+
struct test_mean : public test_case {
|
|
2511
|
+
const ggml_type type;
|
|
2512
|
+
const std::array<int64_t, 4> ne;
|
|
2513
|
+
|
|
2514
|
+
std::string vars() override {
|
|
2515
|
+
return VARS_TO_STR2(type, ne);
|
|
2516
|
+
}
|
|
2517
|
+
|
|
2518
|
+
test_mean(ggml_type type = GGML_TYPE_F32,
|
|
2519
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
2520
|
+
: type(type), ne(ne) {}
|
|
2521
|
+
|
|
2522
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2523
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2524
|
+
ggml_set_param(ctx, a);
|
|
2525
|
+
ggml_set_name(a, "a");
|
|
2526
|
+
|
|
2527
|
+
ggml_tensor * out = ggml_mean(ctx, a);
|
|
2528
|
+
ggml_set_name(out, "out");
|
|
2529
|
+
|
|
2530
|
+
return out;
|
|
2531
|
+
}
|
|
2532
|
+
|
|
2533
|
+
float grad_eps() override {
|
|
2534
|
+
return 0.1f * ne[0]*ne[1]*ne[2]*ne[3];
|
|
2535
|
+
}
|
|
2536
|
+
};
|
|
2537
|
+
|
|
1464
2538
|
// GGML_OP_UPSCALE
|
|
1465
2539
|
struct test_upscale : public test_case {
|
|
1466
2540
|
const ggml_type type;
|
|
@@ -1479,8 +2553,16 @@ struct test_upscale : public test_case {
|
|
|
1479
2553
|
|
|
1480
2554
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1481
2555
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1482
|
-
|
|
2556
|
+
ggml_set_name(a, "a");
|
|
2557
|
+
|
|
2558
|
+
if (transpose) {
|
|
2559
|
+
a = ggml_transpose(ctx, a);
|
|
2560
|
+
ggml_set_name(a, "a_transposed");
|
|
2561
|
+
}
|
|
2562
|
+
|
|
1483
2563
|
ggml_tensor * out = ggml_upscale(ctx, a, scale_factor);
|
|
2564
|
+
ggml_set_name(out, "out");
|
|
2565
|
+
|
|
1484
2566
|
return out;
|
|
1485
2567
|
}
|
|
1486
2568
|
};
|
|
@@ -1502,7 +2584,11 @@ struct test_upscale_ext : public test_case {
|
|
|
1502
2584
|
|
|
1503
2585
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1504
2586
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2587
|
+
ggml_set_name(a, "a");
|
|
2588
|
+
|
|
1505
2589
|
ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3]);
|
|
2590
|
+
ggml_set_name(out, "out");
|
|
2591
|
+
|
|
1506
2592
|
return out;
|
|
1507
2593
|
}
|
|
1508
2594
|
};
|
|
@@ -1512,6 +2598,7 @@ struct test_group_norm : public test_case {
|
|
|
1512
2598
|
const ggml_type type;
|
|
1513
2599
|
const std::array<int64_t, 4> ne;
|
|
1514
2600
|
const int32_t num_groups;
|
|
2601
|
+
const float eps;
|
|
1515
2602
|
|
|
1516
2603
|
std::string vars() override {
|
|
1517
2604
|
return VARS_TO_STR3(type, ne, num_groups);
|
|
@@ -1519,12 +2606,17 @@ struct test_group_norm : public test_case {
|
|
|
1519
2606
|
|
|
1520
2607
|
test_group_norm(ggml_type type = GGML_TYPE_F32,
|
|
1521
2608
|
std::array<int64_t, 4> ne = {64, 64, 320, 1},
|
|
1522
|
-
int32_t num_groups = 32
|
|
1523
|
-
|
|
2609
|
+
int32_t num_groups = 32,
|
|
2610
|
+
float eps = 1e-6f)
|
|
2611
|
+
: type(type), ne(ne), num_groups(num_groups), eps(eps) {}
|
|
1524
2612
|
|
|
1525
2613
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1526
2614
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1527
|
-
|
|
2615
|
+
ggml_set_name(a, "a");
|
|
2616
|
+
|
|
2617
|
+
ggml_tensor * out = ggml_group_norm(ctx, a, num_groups, eps);
|
|
2618
|
+
ggml_set_name(out, "out");
|
|
2619
|
+
|
|
1528
2620
|
return out;
|
|
1529
2621
|
}
|
|
1530
2622
|
};
|
|
@@ -1540,14 +2632,22 @@ struct test_acc : public test_case {
|
|
|
1540
2632
|
}
|
|
1541
2633
|
|
|
1542
2634
|
test_acc(ggml_type type = GGML_TYPE_F32,
|
|
1543
|
-
std::array<int64_t, 4> ne_a = {
|
|
1544
|
-
std::array<int64_t, 4> ne_b = {
|
|
2635
|
+
std::array<int64_t, 4> ne_a = {256, 17, 1, 1},
|
|
2636
|
+
std::array<int64_t, 4> ne_b = {256, 16, 1, 1})
|
|
1545
2637
|
: type(type), ne_a(ne_a), ne_b(ne_b) {}
|
|
1546
2638
|
|
|
1547
2639
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1548
2640
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2641
|
+
ggml_set_param(ctx, a);
|
|
2642
|
+
ggml_set_name(a, "a");
|
|
2643
|
+
|
|
1549
2644
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
2645
|
+
ggml_set_param(ctx, b);
|
|
2646
|
+
ggml_set_name(b, "b");
|
|
2647
|
+
|
|
1550
2648
|
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
|
|
2649
|
+
ggml_set_name(out, "out");
|
|
2650
|
+
|
|
1551
2651
|
return out;
|
|
1552
2652
|
}
|
|
1553
2653
|
};
|
|
@@ -1570,7 +2670,11 @@ struct test_pad : public test_case {
|
|
|
1570
2670
|
|
|
1571
2671
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1572
2672
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2673
|
+
ggml_set_name(a, "a");
|
|
2674
|
+
|
|
1573
2675
|
ggml_tensor * out = ggml_pad(ctx, a, pad_0, pad_1, 0, 0);
|
|
2676
|
+
ggml_set_name(out, "out");
|
|
2677
|
+
|
|
1574
2678
|
return out;
|
|
1575
2679
|
}
|
|
1576
2680
|
};
|
|
@@ -1592,6 +2696,8 @@ struct test_arange : public test_case {
|
|
|
1592
2696
|
|
|
1593
2697
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1594
2698
|
ggml_tensor * out = ggml_arange(ctx, start, stop, step);
|
|
2699
|
+
ggml_set_name(out, "out");
|
|
2700
|
+
|
|
1595
2701
|
return out;
|
|
1596
2702
|
}
|
|
1597
2703
|
};
|
|
@@ -1614,7 +2720,11 @@ struct test_timestep_embedding : public test_case {
|
|
|
1614
2720
|
|
|
1615
2721
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1616
2722
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2723
|
+
ggml_set_name(a, "a");
|
|
2724
|
+
|
|
1617
2725
|
ggml_tensor * out = ggml_timestep_embedding(ctx, a, dim, max_period);
|
|
2726
|
+
ggml_set_name(out, "out");
|
|
2727
|
+
|
|
1618
2728
|
return out;
|
|
1619
2729
|
}
|
|
1620
2730
|
};
|
|
@@ -1630,13 +2740,17 @@ struct test_leaky_relu : public test_case {
|
|
|
1630
2740
|
}
|
|
1631
2741
|
|
|
1632
2742
|
test_leaky_relu(ggml_type type = GGML_TYPE_F32,
|
|
1633
|
-
std::array<int64_t, 4> ne_a = {10,
|
|
2743
|
+
std::array<int64_t, 4> ne_a = {10, 5, 4, 3},
|
|
1634
2744
|
float negative_slope = 0.1f)
|
|
1635
2745
|
: type(type), ne_a(ne_a), negative_slope(negative_slope) {}
|
|
1636
2746
|
|
|
1637
2747
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1638
2748
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2749
|
+
ggml_set_name(a, "a");
|
|
2750
|
+
|
|
1639
2751
|
ggml_tensor * out = ggml_leaky_relu(ctx, a, negative_slope, true);
|
|
2752
|
+
ggml_set_name(out, "out");
|
|
2753
|
+
|
|
1640
2754
|
return out;
|
|
1641
2755
|
}
|
|
1642
2756
|
};
|
|
@@ -1651,30 +2765,151 @@ struct test_flash_attn_ext : public test_case {
|
|
|
1651
2765
|
const bool mask; // use mask
|
|
1652
2766
|
|
|
1653
2767
|
const float max_bias; // ALiBi
|
|
2768
|
+
const float logit_softcap; // Gemma 2
|
|
1654
2769
|
|
|
1655
2770
|
const ggml_type type_KV;
|
|
1656
2771
|
|
|
1657
2772
|
std::string vars() override {
|
|
1658
|
-
return
|
|
2773
|
+
return VARS_TO_STR8(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV);
|
|
1659
2774
|
}
|
|
1660
2775
|
|
|
1661
2776
|
double max_nmse_err() override {
|
|
1662
2777
|
return 5e-4;
|
|
1663
2778
|
}
|
|
1664
2779
|
|
|
1665
|
-
|
|
1666
|
-
|
|
2780
|
+
uint64_t op_flops(ggml_tensor * t) override {
|
|
2781
|
+
GGML_UNUSED(t);
|
|
2782
|
+
// Just counting matmul costs:
|
|
2783
|
+
// Q*K^T is nb x hs x kv, P*V is nb x kv x hs, per head
|
|
2784
|
+
return 2 * 2 * nh * nb * hs * kv;
|
|
2785
|
+
}
|
|
2786
|
+
|
|
2787
|
+
test_flash_attn_ext(int64_t hs = 128, int64_t nh = 32, int64_t kv = 96, int64_t nb = 8,
|
|
2788
|
+
bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_type type_KV = GGML_TYPE_F16)
|
|
2789
|
+
: hs(hs), nh(nh), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), type_KV(type_KV) {}
|
|
1667
2790
|
|
|
1668
2791
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1669
2792
|
const int64_t hs_padded = GGML_PAD(hs, ggml_blck_size(type_KV));
|
|
1670
2793
|
|
|
1671
2794
|
ggml_tensor * q = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, hs_padded, nb, nh, 1);
|
|
2795
|
+
ggml_set_name(q, "q");
|
|
2796
|
+
|
|
1672
2797
|
ggml_tensor * k = ggml_new_tensor_4d(ctx, type_KV, hs_padded, kv, nh, 1);
|
|
2798
|
+
ggml_set_name(k, "k");
|
|
2799
|
+
|
|
1673
2800
|
ggml_tensor * v = ggml_new_tensor_4d(ctx, type_KV, hs_padded, kv, nh, 1);
|
|
1674
|
-
|
|
1675
|
-
|
|
2801
|
+
ggml_set_name(v, "v");
|
|
2802
|
+
|
|
2803
|
+
ggml_tensor * m = nullptr;
|
|
2804
|
+
if (mask) {
|
|
2805
|
+
m = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kv, GGML_PAD(nb, GGML_KQ_MASK_PAD), 1, 1);
|
|
2806
|
+
ggml_set_name(m, "m");
|
|
2807
|
+
}
|
|
2808
|
+
|
|
2809
|
+
ggml_tensor * out = ggml_flash_attn_ext(ctx, q, k, v, m, 1.0f/sqrtf(hs), max_bias, logit_softcap);
|
|
2810
|
+
ggml_set_name(out, "out");
|
|
2811
|
+
|
|
2812
|
+
return out;
|
|
2813
|
+
}
|
|
2814
|
+
|
|
2815
|
+
bool grad_precise() override {
|
|
2816
|
+
return true;
|
|
2817
|
+
}
|
|
2818
|
+
};
|
|
2819
|
+
|
|
2820
|
+
// GGML_OP_CROSS_ENTROPY_LOSS
|
|
2821
|
+
struct test_cross_entropy_loss : public test_case {
|
|
2822
|
+
const ggml_type type;
|
|
2823
|
+
const std::array<int64_t, 4> ne;
|
|
2824
|
+
|
|
2825
|
+
std::string vars() override {
|
|
2826
|
+
return VARS_TO_STR2(type, ne);
|
|
2827
|
+
}
|
|
2828
|
+
|
|
2829
|
+
test_cross_entropy_loss(ggml_type type = GGML_TYPE_F32,
|
|
2830
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
2831
|
+
: type(type), ne(ne) {}
|
|
2832
|
+
|
|
2833
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2834
|
+
ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2835
|
+
ggml_set_param(ctx, logits);
|
|
2836
|
+
ggml_set_name(logits, "logits");
|
|
2837
|
+
|
|
2838
|
+
ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2839
|
+
// The labels are assumed to be constant -> no gradients.
|
|
2840
|
+
ggml_set_name(labels, "labels");
|
|
2841
|
+
|
|
2842
|
+
// Ensure labels add up to 1:
|
|
2843
|
+
labels = ggml_soft_max(ctx, labels);
|
|
2844
|
+
ggml_set_name(labels, "labels_normalized");
|
|
2845
|
+
|
|
2846
|
+
ggml_tensor * out = ggml_cross_entropy_loss(ctx, logits, labels);
|
|
2847
|
+
ggml_set_name(out, "out");
|
|
2848
|
+
|
|
2849
|
+
return out;
|
|
2850
|
+
}
|
|
2851
|
+
|
|
2852
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
2853
|
+
// For larger abs. diffs between logits softmax is more linear, therefore more precise num. gradients.
|
|
2854
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
2855
|
+
init_tensor_uniform(t, -100.0f, 100.0f);
|
|
2856
|
+
}
|
|
2857
|
+
}
|
|
2858
|
+
|
|
2859
|
+
float grad_eps() override {
|
|
2860
|
+
return 1.0f;
|
|
2861
|
+
}
|
|
2862
|
+
|
|
2863
|
+
bool grad_precise() override {
|
|
2864
|
+
return true;
|
|
2865
|
+
}
|
|
2866
|
+
};
|
|
2867
|
+
|
|
2868
|
+
// GGML_OP_OPT_STEP_ADAMW
|
|
2869
|
+
struct test_opt_step_adamw : public test_case {
|
|
2870
|
+
const ggml_type type;
|
|
2871
|
+
const std::array<int64_t, 4> ne;
|
|
2872
|
+
|
|
2873
|
+
std::string vars() override {
|
|
2874
|
+
return VARS_TO_STR2(type, ne);
|
|
2875
|
+
}
|
|
2876
|
+
|
|
2877
|
+
test_opt_step_adamw(ggml_type type = GGML_TYPE_F32,
|
|
2878
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
2879
|
+
: type(type), ne(ne) {}
|
|
2880
|
+
|
|
2881
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2882
|
+
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
2883
|
+
ggml_set_param(ctx, a); // Despite tensor a having gradients the output tensor will not.
|
|
2884
|
+
ggml_set_name(a, "a");
|
|
2885
|
+
|
|
2886
|
+
ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
2887
|
+
ggml_set_name(grad, "grad");
|
|
2888
|
+
|
|
2889
|
+
ggml_tensor * grad_m = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
2890
|
+
ggml_set_name(grad_m, "grad_m");
|
|
2891
|
+
|
|
2892
|
+
ggml_tensor * grad_v = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
2893
|
+
ggml_set_name(grad_v, "grad_v");
|
|
2894
|
+
|
|
2895
|
+
ggml_tensor * adamw_params = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 7);
|
|
2896
|
+
ggml_set_name(adamw_params, "adamw_params");
|
|
2897
|
+
|
|
2898
|
+
ggml_tensor * out = ggml_opt_step_adamw(ctx, a, grad, grad_m, grad_v, adamw_params);
|
|
2899
|
+
ggml_set_name(out, "out");
|
|
2900
|
+
|
|
1676
2901
|
return out;
|
|
1677
2902
|
}
|
|
2903
|
+
|
|
2904
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
2905
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
2906
|
+
init_tensor_uniform(t, 0.0f, 1.0f); // grad_v and adamw_params need non-negative values.
|
|
2907
|
+
}
|
|
2908
|
+
}
|
|
2909
|
+
|
|
2910
|
+
bool grad_precise() override {
|
|
2911
|
+
return true;
|
|
2912
|
+
}
|
|
1678
2913
|
};
|
|
1679
2914
|
|
|
1680
2915
|
enum llm_norm_type {
|
|
@@ -2061,48 +3296,55 @@ struct test_falcon : public test_llm {
|
|
|
2061
3296
|
}
|
|
2062
3297
|
};
|
|
2063
3298
|
|
|
2064
|
-
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name) {
|
|
2065
|
-
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
2066
|
-
std::default_random_engine rng(0);
|
|
2067
3299
|
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
3300
|
+
// ###########################################
|
|
3301
|
+
// ## Section 3: GGML Op Test Instantiation ##
|
|
3302
|
+
// ###########################################
|
|
3303
|
+
static const ggml_type all_types[] = {
|
|
3304
|
+
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
|
|
3305
|
+
GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
|
|
3306
|
+
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
3307
|
+
GGML_TYPE_Q8_0,
|
|
3308
|
+
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
|
|
3309
|
+
GGML_TYPE_Q4_K, GGML_TYPE_Q5_K,
|
|
3310
|
+
GGML_TYPE_Q6_K,
|
|
3311
|
+
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
|
|
3312
|
+
GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
|
3313
|
+
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
|
3314
|
+
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
3315
|
+
};
|
|
2080
3316
|
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
3317
|
+
static const ggml_type base_types[] = {
|
|
3318
|
+
GGML_TYPE_F32, GGML_TYPE_F16,
|
|
3319
|
+
GGML_TYPE_Q4_0,
|
|
3320
|
+
GGML_TYPE_Q4_K,
|
|
3321
|
+
GGML_TYPE_IQ2_XXS
|
|
3322
|
+
};
|
|
2087
3323
|
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
3324
|
+
static const ggml_type other_types[] = {
|
|
3325
|
+
GGML_TYPE_Q4_1,
|
|
3326
|
+
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
3327
|
+
GGML_TYPE_Q8_0,
|
|
3328
|
+
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
|
|
3329
|
+
GGML_TYPE_Q5_K,
|
|
3330
|
+
GGML_TYPE_Q6_K,
|
|
3331
|
+
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
|
|
3332
|
+
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
|
3333
|
+
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
|
3334
|
+
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
3335
|
+
GGML_TYPE_BF16,
|
|
3336
|
+
};
|
|
3337
|
+
|
|
3338
|
+
// Test cases for evaluation: should try to cover edge cases while using small input sizes to keep the runtime low
|
|
3339
|
+
static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
3340
|
+
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
3341
|
+
std::default_random_engine rng(0);
|
|
2100
3342
|
|
|
2101
3343
|
// unary ops
|
|
2102
3344
|
for (int v : {0, 1}) {
|
|
2103
3345
|
for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) {
|
|
2104
|
-
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 128,
|
|
2105
|
-
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, {
|
|
3346
|
+
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 128, 2, 2, 2 }, v));
|
|
3347
|
+
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 5, 7, 11, 13 }, v));
|
|
2106
3348
|
}
|
|
2107
3349
|
}
|
|
2108
3350
|
|
|
@@ -2138,8 +3380,56 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2138
3380
|
}
|
|
2139
3381
|
}
|
|
2140
3382
|
|
|
3383
|
+
// im2col 1D
|
|
3384
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
3385
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
3386
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
3387
|
+
for (int s0 : {1, 3}) {
|
|
3388
|
+
for (int p0 : {0, 3}) {
|
|
3389
|
+
for (int d0 : {1, 3}) {
|
|
3390
|
+
test_cases.emplace_back(new test_im2col(
|
|
3391
|
+
GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {20, 2, 2, 1}, {3, 2, 2, 1},
|
|
3392
|
+
s0, 0, p0, 0, d0, 0, false));
|
|
3393
|
+
}
|
|
3394
|
+
}
|
|
3395
|
+
}
|
|
3396
|
+
|
|
3397
|
+
// im2col 2D
|
|
3398
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32));
|
|
2141
3399
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
|
|
2142
3400
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
|
|
3401
|
+
for (int s0 : {1, 3}) {
|
|
3402
|
+
for (int s1 : {1, 3}) {
|
|
3403
|
+
for (int p0 : {0, 3}) {
|
|
3404
|
+
for (int p1 : {0, 3}) {
|
|
3405
|
+
for (int d0 : {1, 3}) {
|
|
3406
|
+
for (int d1 : {1, 3}) {
|
|
3407
|
+
test_cases.emplace_back(new test_im2col(
|
|
3408
|
+
GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {20, 20, 2, 2}, {3, 3, 2, 2},
|
|
3409
|
+
s0, s1, p0, p1, d0, d1, true));
|
|
3410
|
+
}
|
|
3411
|
+
}
|
|
3412
|
+
}
|
|
3413
|
+
}
|
|
3414
|
+
}
|
|
3415
|
+
}
|
|
3416
|
+
|
|
3417
|
+
// extra tests for im2col 2D
|
|
3418
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 32}, {3, 3, 1, 32}, 1, 1, 1, 1, 1, 1, true));
|
|
3419
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 32}, {3, 3, 2, 32}, 1, 1, 1, 1, 1, 1, true));
|
|
3420
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 1024}, {3, 3, 1, 1024}, 1, 1, 1, 1, 1, 1, true));
|
|
3421
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 1024}, {3, 3, 2, 1024}, 1, 1, 1, 1, 1, 1, true));
|
|
3422
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 2048}, {3, 3, 1, 2048}, 1, 1, 1, 1, 1, 1, true));
|
|
3423
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 2048}, {3, 3, 2, 2048}, 1, 1, 1, 1, 1, 1, true));
|
|
3424
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 2560}, {3, 3, 1, 2560}, 1, 1, 1, 1, 1, 1, true));
|
|
3425
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 2560}, {3, 3, 2, 2560}, 1, 1, 1, 1, 1, 1, true));
|
|
3426
|
+
|
|
3427
|
+
// sycl backend will limit task global_range < MAX_INT
|
|
3428
|
+
// test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
|
|
3429
|
+
// however these cases need to alloc more memory which may fail in some devices (Intel Arc770, etc.)
|
|
3430
|
+
// these cases are verified (pass) in Intel(R) Data Center GPU Max 1100 (sycl backend) and NV A30 (cuda backend)
|
|
3431
|
+
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
3432
|
+
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
2143
3433
|
|
|
2144
3434
|
test_cases.emplace_back(new test_conv_transpose_1d());
|
|
2145
3435
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
|
|
@@ -2150,14 +3440,18 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2150
3440
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
|
|
2151
3441
|
test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
|
|
2152
3442
|
|
|
3443
|
+
test_cases.emplace_back(new test_argmax());
|
|
3444
|
+
test_cases.emplace_back(new test_count_equal());
|
|
2153
3445
|
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
3446
|
+
for (int ne3 : {1, 3}) { // CUDA backward pass only supports ne3 == 1
|
|
3447
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 1}));
|
|
3448
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
|
3449
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 2, 1, 1}));
|
|
3450
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 2, 1}));
|
|
3451
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
3452
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_I32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
|
3453
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_I16, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
3454
|
+
}
|
|
2161
3455
|
|
|
2162
3456
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32));
|
|
2163
3457
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
|
|
@@ -2167,8 +3461,12 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2167
3461
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {0, 2, 1, 3})); // dup by rows
|
|
2168
3462
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {1, 0, 2, 3}));
|
|
2169
3463
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {1, 0, 2, 3})); // dup dst not-contiguous
|
|
2170
|
-
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10,
|
|
2171
|
-
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10,
|
|
3464
|
+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
|
3465
|
+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
|
3466
|
+
|
|
3467
|
+
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
|
3468
|
+
test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim));
|
|
3469
|
+
}
|
|
2172
3470
|
|
|
2173
3471
|
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
2174
3472
|
for (ggml_type type_dst : all_types) {
|
|
@@ -2183,6 +3481,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2183
3481
|
}
|
|
2184
3482
|
|
|
2185
3483
|
test_cases.emplace_back(new test_cont());
|
|
3484
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
|
|
3485
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
|
|
3486
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}));
|
|
3487
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}));
|
|
3488
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}));
|
|
3489
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));
|
|
3490
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}));
|
|
3491
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}));
|
|
3492
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
|
|
2186
3493
|
|
|
2187
3494
|
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
|
|
2188
3495
|
for (auto op : {ggml_add, ggml_mul, ggml_div}) {
|
|
@@ -2193,16 +3500,16 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2193
3500
|
add_test_bin_bcast(GGML_TYPE_F32, {1, 1, 8, 1}, {1, 1, 1, 1});
|
|
2194
3501
|
add_test_bin_bcast(GGML_TYPE_F32, {1, 1, 1, 1}, {32, 1, 1, 1});
|
|
2195
3502
|
add_test_bin_bcast(GGML_TYPE_F32, {1, 1, 320, 320}, {1, 1, 1, 1});
|
|
2196
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2197
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2198
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2199
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2200
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2201
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2202
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2203
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2204
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2205
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
3503
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 1, 1}, {1, 1, 1, 1});
|
|
3504
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 1}, {1, 1, 1, 1});
|
|
3505
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 1, 1});
|
|
3506
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {2, 1, 1, 1});
|
|
3507
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 2, 1, 1});
|
|
3508
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 1});
|
|
3509
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 1, 2});
|
|
3510
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 2});
|
|
3511
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 2, 2, 2});
|
|
3512
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {2, 2, 2, 2});
|
|
2206
3513
|
|
|
2207
3514
|
// stable diffusion
|
|
2208
3515
|
add_test_bin_bcast(GGML_TYPE_F32, {1280, 1, 1, 1}, {1, 1, 1, 1});
|
|
@@ -2221,23 +3528,36 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2221
3528
|
//add_test_bin_bcast(GGML_TYPE_F32, {3, 3, 2560, 1280}, {1, 1, 1, 1});
|
|
2222
3529
|
//add_test_bin_bcast(GGML_TYPE_F32, {3, 3, 2560, 1280}, {2, 1, 1, 1});
|
|
2223
3530
|
|
|
3531
|
+
test_cases.emplace_back(new test_add1());
|
|
2224
3532
|
test_cases.emplace_back(new test_scale());
|
|
2225
3533
|
|
|
2226
3534
|
for (float eps : {1e-6f, 1e-5f, 1e-3f, 1e-1f}) {
|
|
2227
|
-
test_cases.emplace_back(new test_norm(GGML_TYPE_F32, {64,
|
|
2228
|
-
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64,
|
|
3535
|
+
test_cases.emplace_back(new test_norm(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
3536
|
+
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
2229
3537
|
}
|
|
2230
3538
|
|
|
3539
|
+
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 1, 1}, {4, 1536, 1, 1}));
|
|
3540
|
+
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {8, 1536, 1, 1}, {4, 1536, 1, 1}));
|
|
3541
|
+
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 4, 1}, {4, 1536, 1, 1}));
|
|
3542
|
+
|
|
3543
|
+
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1024, 32, 4));
|
|
3544
|
+
|
|
3545
|
+
test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 1, 1));
|
|
3546
|
+
test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 32, 1));
|
|
3547
|
+
test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 32, 4));
|
|
3548
|
+
test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 128, 4));
|
|
3549
|
+
|
|
2231
3550
|
#if 1
|
|
2232
3551
|
for (ggml_type type_a : base_types) {
|
|
2233
3552
|
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
2234
|
-
|
|
2235
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
2236
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
2237
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
2238
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
2239
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
2240
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
3553
|
+
// test cases without permutation
|
|
3554
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
|
|
3555
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 1}, {1, 1}));
|
|
3556
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 1}, {2, 1}));
|
|
3557
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 10}, {1, 1}));
|
|
3558
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 10}, {2, 1}));
|
|
3559
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 10}, {1, 2}));
|
|
3560
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 10}, {2, 2}));
|
|
2241
3561
|
|
|
2242
3562
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 1, 1}, {1, 1}));
|
|
2243
3563
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 1}, {1, 1}));
|
|
@@ -2246,6 +3566,27 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2246
3566
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {2, 1}));
|
|
2247
3567
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {1, 2}));
|
|
2248
3568
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {2, 2}));
|
|
3569
|
+
|
|
3570
|
+
// test cases with permutation
|
|
3571
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
3572
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
3573
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
3574
|
+
|
|
3575
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
3576
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
3577
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
3578
|
+
|
|
3579
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
3580
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
3581
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
3582
|
+
}
|
|
3583
|
+
}
|
|
3584
|
+
for (ggml_type type_a : other_types) {
|
|
3585
|
+
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
3586
|
+
if (ggml_blck_size(type_a) != 256) {
|
|
3587
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, ggml_blck_size(type_a), {1, 1}, {1, 1}));
|
|
3588
|
+
}
|
|
3589
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
|
|
2249
3590
|
}
|
|
2250
3591
|
}
|
|
2251
3592
|
#else
|
|
@@ -2267,12 +3608,6 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2267
3608
|
}
|
|
2268
3609
|
#endif
|
|
2269
3610
|
|
|
2270
|
-
for (ggml_type type_a : other_types) {
|
|
2271
|
-
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
2272
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
|
|
2273
|
-
}
|
|
2274
|
-
}
|
|
2275
|
-
|
|
2276
3611
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 128, { 8, 1}, {1, 1}));
|
|
2277
3612
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 128, { 8, 1}, {4, 1}));
|
|
2278
3613
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 64, { 8, 1}, {4, 1}));
|
|
@@ -2280,6 +3615,12 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2280
3615
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
|
2281
3616
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
|
2282
3617
|
|
|
3618
|
+
// sycl backend will limit task global_range < MAX_INT
|
|
3619
|
+
// test case for f16-type-convert-to-fp32 kernel with large k under fp32 compute dtype (occurs in stable-diffusion)
|
|
3620
|
+
// however this case needs to alloc more memory which may fail in some devices (Intel Arc770, etc.)
|
|
3621
|
+
// this case is verified (pass) in Intel(R) Data Center GPU Max 1100 (sycl backend) and NV A30 (cuda backend)
|
|
3622
|
+
// test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 512, 262144, 9216, {1, 1}, {1, 1}));
|
|
3623
|
+
|
|
2283
3624
|
for (ggml_type type_a : base_types) {
|
|
2284
3625
|
for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
|
|
2285
3626
|
for (int n_mats : {4, 8}) {
|
|
@@ -2301,7 +3642,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2301
3642
|
for (int n_mats : {4}) {
|
|
2302
3643
|
for (int n_used : {2}) {
|
|
2303
3644
|
for (bool b : {false}) {
|
|
2304
|
-
for (int n : {1}) {
|
|
3645
|
+
for (int n : {1, 32}) {
|
|
2305
3646
|
int m = 512;
|
|
2306
3647
|
int k = 256;
|
|
2307
3648
|
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, n_used, b, m, n, k));
|
|
@@ -2312,13 +3653,37 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2312
3653
|
}
|
|
2313
3654
|
}
|
|
2314
3655
|
|
|
3656
|
+
for (ggml_type type_a : base_types) {
|
|
3657
|
+
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
3658
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, { 1, 1}));
|
|
3659
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 1}));
|
|
3660
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 1}));
|
|
3661
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3662
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3663
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3664
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3665
|
+
|
|
3666
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, { 1, 1}));
|
|
3667
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, { 1, 1}, true));
|
|
3668
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 1}));
|
|
3669
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 1}));
|
|
3670
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3671
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3672
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3673
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3674
|
+
}
|
|
3675
|
+
}
|
|
3676
|
+
|
|
2315
3677
|
test_cases.emplace_back(new test_sqr());
|
|
2316
3678
|
test_cases.emplace_back(new test_sqrt());
|
|
3679
|
+
test_cases.emplace_back(new test_log());
|
|
3680
|
+
test_cases.emplace_back(new test_sin());
|
|
3681
|
+
test_cases.emplace_back(new test_cos());
|
|
2317
3682
|
test_cases.emplace_back(new test_clamp());
|
|
2318
3683
|
|
|
2319
|
-
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10,
|
|
2320
|
-
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10,
|
|
2321
|
-
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10,
|
|
3684
|
+
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));
|
|
3685
|
+
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 3, 1}, 5));
|
|
3686
|
+
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 3, 2}, 5));
|
|
2322
3687
|
|
|
2323
3688
|
#if 0
|
|
2324
3689
|
std::uniform_int_distribution<> dist_ne1(1, 50);
|
|
@@ -2362,23 +3727,23 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2362
3727
|
for (float af : { 1.0f, 1.4245f }) {
|
|
2363
3728
|
for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
2364
3729
|
for (bool ff : {false, true}) { // freq_factors
|
|
2365
|
-
test_cases.emplace_back(new test_rope(type, {128, 32,
|
|
3730
|
+
test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 7B
|
|
2366
3731
|
|
|
2367
3732
|
if (all) {
|
|
2368
|
-
test_cases.emplace_back(new test_rope(type, {128, 40,
|
|
2369
|
-
test_cases.emplace_back(new test_rope(type, {128, 52,
|
|
2370
|
-
test_cases.emplace_back(new test_rope(type, {128, 64,
|
|
3733
|
+
test_cases.emplace_back(new test_rope(type, {128, 40, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 13B
|
|
3734
|
+
test_cases.emplace_back(new test_rope(type, {128, 52, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 30B
|
|
3735
|
+
test_cases.emplace_back(new test_rope(type, {128, 64, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 65B
|
|
2371
3736
|
}
|
|
2372
3737
|
|
|
2373
3738
|
if (all) {
|
|
2374
|
-
test_cases.emplace_back(new test_rope(type, { 64, 1,
|
|
2375
|
-
test_cases.emplace_back(new test_rope(type, { 64, 71,
|
|
2376
|
-
test_cases.emplace_back(new test_rope(type, { 64, 8,
|
|
2377
|
-
test_cases.emplace_back(new test_rope(type, { 80, 32,
|
|
2378
|
-
test_cases.emplace_back(new test_rope(type, { 80, 32,
|
|
3739
|
+
test_cases.emplace_back(new test_rope(type, { 64, 1, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 7B)
|
|
3740
|
+
test_cases.emplace_back(new test_rope(type, { 64, 71, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 7B)
|
|
3741
|
+
test_cases.emplace_back(new test_rope(type, { 64, 8, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 40B)
|
|
3742
|
+
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, 2, 512, fs, ef, af, ff, v)); // neox (stablelm)
|
|
3743
|
+
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, 2, 512, fs, ef, af, ff, v)); // neox (phi-2)
|
|
2379
3744
|
}
|
|
2380
3745
|
|
|
2381
|
-
test_cases.emplace_back(new test_rope(type, { 64, 128,
|
|
3746
|
+
test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 40B)
|
|
2382
3747
|
}
|
|
2383
3748
|
}
|
|
2384
3749
|
|
|
@@ -2402,7 +3767,9 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2402
3767
|
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
|
|
2403
3768
|
}
|
|
2404
3769
|
|
|
3770
|
+
test_cases.emplace_back(new test_sum());
|
|
2405
3771
|
test_cases.emplace_back(new test_sum_rows());
|
|
3772
|
+
test_cases.emplace_back(new test_mean());
|
|
2406
3773
|
test_cases.emplace_back(new test_upscale());
|
|
2407
3774
|
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 1 }, 2, true));
|
|
2408
3775
|
test_cases.emplace_back(new test_upscale_ext());
|
|
@@ -2417,11 +3784,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2417
3784
|
for (bool mask : { true, false } ) {
|
|
2418
3785
|
for (float max_bias : { 0.0f, 8.0f }) {
|
|
2419
3786
|
if (!mask && max_bias > 0.0f) continue;
|
|
2420
|
-
for (
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
3787
|
+
for (float logit_softcap : {0.0f, 10.0f}) {
|
|
3788
|
+
if (hs != 128 && logit_softcap != 0.0f) continue;
|
|
3789
|
+
for (int nh : { 32, }) {
|
|
3790
|
+
for (int kv : { 512, 1024, }) {
|
|
3791
|
+
for (int nb : { 1, 3, 32, 35, }) {
|
|
3792
|
+
for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) {
|
|
3793
|
+
test_cases.emplace_back(new test_flash_attn_ext(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV));
|
|
3794
|
+
}
|
|
2425
3795
|
}
|
|
2426
3796
|
}
|
|
2427
3797
|
}
|
|
@@ -2430,6 +3800,9 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2430
3800
|
}
|
|
2431
3801
|
}
|
|
2432
3802
|
|
|
3803
|
+
test_cases.emplace_back(new test_cross_entropy_loss());
|
|
3804
|
+
test_cases.emplace_back(new test_opt_step_adamw(GGML_TYPE_F32, {10, 5, 4, 3}));
|
|
3805
|
+
|
|
2433
3806
|
// these tests are disabled to save execution time, but they can be handy for debugging
|
|
2434
3807
|
#if 0
|
|
2435
3808
|
test_cases.emplace_back(new test_llama(1));
|
|
@@ -2438,8 +3811,32 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2438
3811
|
test_cases.emplace_back(new test_falcon(2));
|
|
2439
3812
|
#endif
|
|
2440
3813
|
|
|
2441
|
-
|
|
3814
|
+
return test_cases;
|
|
3815
|
+
}
|
|
3816
|
+
|
|
3817
|
+
// Test cases for performance evaluation: should be representative of real-world use cases
|
|
3818
|
+
static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
3819
|
+
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
3820
|
+
|
|
3821
|
+
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 1, 1, 1}));
|
|
3822
|
+
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
|
|
3823
|
+
|
|
3824
|
+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
|
|
3825
|
+
|
|
3826
|
+
for (int bs : {1, 512}) {
|
|
3827
|
+
for (ggml_type type_a : all_types) {
|
|
3828
|
+
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
3829
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 4096, bs, 14336, {1, 1}, {1, 1}));
|
|
3830
|
+
}
|
|
3831
|
+
}
|
|
3832
|
+
}
|
|
3833
|
+
|
|
3834
|
+
return test_cases;
|
|
3835
|
+
}
|
|
3836
|
+
|
|
3837
|
+
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name) {
|
|
2442
3838
|
if (mode == MODE_TEST) {
|
|
3839
|
+
auto test_cases = make_test_cases_eval();
|
|
2443
3840
|
ggml_backend_t backend_cpu = ggml_backend_cpu_init();
|
|
2444
3841
|
|
|
2445
3842
|
size_t n_ok = 0;
|
|
@@ -2455,7 +3852,21 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2455
3852
|
return n_ok == test_cases.size();
|
|
2456
3853
|
}
|
|
2457
3854
|
|
|
3855
|
+
if (mode == MODE_GRAD) {
|
|
3856
|
+
auto test_cases = make_test_cases_eval();
|
|
3857
|
+
size_t n_ok = 0;
|
|
3858
|
+
for (auto & test : test_cases) {
|
|
3859
|
+
if (test->eval_grad(backend, op_name)) {
|
|
3860
|
+
n_ok++;
|
|
3861
|
+
}
|
|
3862
|
+
}
|
|
3863
|
+
printf(" %zu/%zu tests passed\n", n_ok, test_cases.size());
|
|
3864
|
+
|
|
3865
|
+
return n_ok == test_cases.size();
|
|
3866
|
+
}
|
|
3867
|
+
|
|
2458
3868
|
if (mode == MODE_PERF) {
|
|
3869
|
+
auto test_cases = make_test_cases_perf();
|
|
2459
3870
|
for (auto & test : test_cases) {
|
|
2460
3871
|
test->eval_perf(backend, op_name);
|
|
2461
3872
|
}
|
|
@@ -2463,13 +3874,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2463
3874
|
}
|
|
2464
3875
|
|
|
2465
3876
|
GGML_ABORT("fatal error");
|
|
2466
|
-
return false;
|
|
2467
3877
|
}
|
|
2468
3878
|
|
|
2469
3879
|
static void usage(char ** argv) {
|
|
2470
3880
|
printf("Usage: %s [mode] [-o op] [-b backend]\n", argv[0]);
|
|
2471
|
-
printf("
|
|
2472
|
-
printf("
|
|
3881
|
+
printf(" valid modes:\n");
|
|
3882
|
+
printf(" - test (default, compare with CPU backend for correctness)\n");
|
|
3883
|
+
printf(" - grad (compare gradients from backpropagation with method of finite differences)\n");
|
|
3884
|
+
printf(" - perf (performance evaluation)\n");
|
|
3885
|
+
printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc)\n");
|
|
2473
3886
|
}
|
|
2474
3887
|
|
|
2475
3888
|
int main(int argc, char ** argv) {
|
|
@@ -2482,6 +3895,8 @@ int main(int argc, char ** argv) {
|
|
|
2482
3895
|
mode = MODE_TEST;
|
|
2483
3896
|
} else if (strcmp(argv[i], "perf") == 0) {
|
|
2484
3897
|
mode = MODE_PERF;
|
|
3898
|
+
} else if (strcmp(argv[i], "grad") == 0) {
|
|
3899
|
+
mode = MODE_GRAD;
|
|
2485
3900
|
} else if (strcmp(argv[i], "-o") == 0) {
|
|
2486
3901
|
if (i + 1 < argc) {
|
|
2487
3902
|
op_name_filter = argv[++i];
|
|
@@ -2503,30 +3918,43 @@ int main(int argc, char ** argv) {
|
|
|
2503
3918
|
}
|
|
2504
3919
|
|
|
2505
3920
|
// enumerate backends
|
|
2506
|
-
printf("Testing %zu
|
|
3921
|
+
printf("Testing %zu devices\n\n", ggml_backend_dev_count());
|
|
2507
3922
|
|
|
2508
3923
|
size_t n_ok = 0;
|
|
2509
3924
|
|
|
2510
|
-
for (size_t i = 0; i <
|
|
2511
|
-
|
|
3925
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
3926
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
2512
3927
|
|
|
2513
|
-
|
|
3928
|
+
printf("Backend %zu/%zu: %s\n", i + 1, ggml_backend_dev_count(), ggml_backend_dev_name(dev));
|
|
3929
|
+
|
|
3930
|
+
if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_dev_name(dev)) != 0) {
|
|
2514
3931
|
printf(" Skipping\n");
|
|
2515
3932
|
n_ok++;
|
|
2516
3933
|
continue;
|
|
2517
3934
|
}
|
|
2518
3935
|
|
|
2519
|
-
ggml_backend_t backend =
|
|
3936
|
+
ggml_backend_t backend = ggml_backend_dev_init(dev, NULL);
|
|
2520
3937
|
GGML_ASSERT(backend != NULL);
|
|
2521
3938
|
|
|
2522
|
-
if (backend_filter == NULL && ggml_backend_is_cpu(backend)) {
|
|
3939
|
+
if (backend_filter == NULL && ggml_backend_is_cpu(backend) && mode != MODE_GRAD) {
|
|
2523
3940
|
printf(" Skipping CPU backend\n");
|
|
2524
3941
|
ggml_backend_free(backend);
|
|
2525
3942
|
n_ok++;
|
|
2526
3943
|
continue;
|
|
2527
3944
|
}
|
|
2528
3945
|
|
|
2529
|
-
|
|
3946
|
+
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
|
3947
|
+
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
|
3948
|
+
if (ggml_backend_set_n_threads_fn) {
|
|
3949
|
+
// TODO: better value for n_threads
|
|
3950
|
+
ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency());
|
|
3951
|
+
}
|
|
3952
|
+
|
|
3953
|
+
printf(" Device description: %s\n", ggml_backend_dev_description(dev));
|
|
3954
|
+
size_t free, total; // NOLINT
|
|
3955
|
+
ggml_backend_dev_memory(dev, &free, &total);
|
|
3956
|
+
printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
|
|
3957
|
+
printf("\n");
|
|
2530
3958
|
|
|
2531
3959
|
bool ok = test_backend(backend, mode, op_name_filter);
|
|
2532
3960
|
|
|
@@ -2543,15 +3971,15 @@ int main(int argc, char ** argv) {
|
|
|
2543
3971
|
ggml_backend_free(backend);
|
|
2544
3972
|
}
|
|
2545
3973
|
|
|
2546
|
-
|
|
3974
|
+
ggml_quantize_free();
|
|
3975
|
+
|
|
3976
|
+
printf("%zu/%zu backends passed\n", n_ok, ggml_backend_dev_count());
|
|
2547
3977
|
|
|
2548
|
-
if (n_ok !=
|
|
3978
|
+
if (n_ok != ggml_backend_dev_count()) {
|
|
2549
3979
|
printf("\033[1;31mFAIL\033[0m\n");
|
|
2550
3980
|
return 1;
|
|
2551
3981
|
}
|
|
2552
3982
|
|
|
2553
|
-
ggml_quantize_free();
|
|
2554
|
-
|
|
2555
3983
|
printf("\033[1;32mOK\033[0m\n");
|
|
2556
3984
|
return 0;
|
|
2557
3985
|
}
|