@fugood/llama.node 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -10
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +6 -4
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
// This file defines tests for various GGML ops and backends.
|
|
2
|
+
// For the forward pass it asserts that the results of multiple backends computing the same GGML ops are consistent.
|
|
3
|
+
// For the backward pass it asserts that the gradients from backpropagation are consistent
|
|
4
|
+
// with the gradients obtained via the method of finite differences ("grad" mode, this is optional).
|
|
5
|
+
// It is also possible to check the performance ("perf" mode).
|
|
6
|
+
//
|
|
7
|
+
// this file has three sections: Section 1 does general setup, section 2 defines the GGML ops to be tested,
|
|
8
|
+
// and section 3 defines which tests to run.
|
|
9
|
+
// Quick start for adding a new GGML op: Go to section 2 and create a struct that inherits from test_case,
|
|
10
|
+
// then go to section 3 and add an instantiation of your struct.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
// ##############################
|
|
14
|
+
// ## Section 1: General Setup ##
|
|
15
|
+
// ##############################
|
|
16
|
+
|
|
17
|
+
|
|
1
18
|
#include <ggml.h>
|
|
2
19
|
#include <ggml-alloc.h>
|
|
3
20
|
#include <ggml-backend.h>
|
|
@@ -5,7 +22,9 @@
|
|
|
5
22
|
#include <algorithm>
|
|
6
23
|
#include <array>
|
|
7
24
|
#include <cfloat>
|
|
25
|
+
#include <cstdint>
|
|
8
26
|
#include <cstring>
|
|
27
|
+
#include <cinttypes>
|
|
9
28
|
#include <functional>
|
|
10
29
|
#include <memory>
|
|
11
30
|
#include <random>
|
|
@@ -13,64 +32,52 @@
|
|
|
13
32
|
#include <stdlib.h>
|
|
14
33
|
#include <string>
|
|
15
34
|
#include <thread>
|
|
35
|
+
#include <future>
|
|
16
36
|
#include <vector>
|
|
17
37
|
|
|
18
|
-
|
|
19
38
|
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
std::
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
39
|
+
size_t nels = ggml_nelements(tensor);
|
|
40
|
+
std::vector<float> data(nels);
|
|
41
|
+
{
|
|
42
|
+
// parallel initialization
|
|
43
|
+
static const size_t n_threads = std::thread::hardware_concurrency();
|
|
44
|
+
// static RNG initialization (revisit if n_threads stops being constant)
|
|
45
|
+
static std::vector<std::default_random_engine> generators = []() {
|
|
46
|
+
std::random_device rd;
|
|
47
|
+
std::vector<std::default_random_engine> vec;
|
|
48
|
+
vec.reserve(n_threads);
|
|
49
|
+
//for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
|
|
50
|
+
for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
|
|
51
|
+
return vec;
|
|
52
|
+
}();
|
|
53
|
+
|
|
54
|
+
auto init_thread = [&](size_t ith, size_t start, size_t end) {
|
|
55
|
+
std::uniform_real_distribution<float> distribution(min, max);
|
|
56
|
+
auto & gen = generators[ith];
|
|
57
|
+
for (size_t i = start; i < end; i++) {
|
|
58
|
+
data[i] = distribution(gen);
|
|
59
|
+
}
|
|
60
|
+
};
|
|
33
61
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
for (size_t i =
|
|
37
|
-
|
|
62
|
+
std::vector<std::future<void>> tasks;
|
|
63
|
+
tasks.reserve(n_threads);
|
|
64
|
+
for (size_t i = 0; i < n_threads; i++) {
|
|
65
|
+
size_t start = i*nels/n_threads;
|
|
66
|
+
size_t end = (i+1)*nels/n_threads;
|
|
67
|
+
tasks.push_back(std::async(std::launch::async, init_thread, i, start, end));
|
|
38
68
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
std::vector<std::thread> threads;
|
|
42
|
-
threads.reserve(n_threads);
|
|
43
|
-
for (size_t i = 0; i < n_threads; i++) {
|
|
44
|
-
size_t start = i*size/n_threads;
|
|
45
|
-
size_t end = (i+1)*size/n_threads;
|
|
46
|
-
threads.emplace_back(init_thread, i, start, end);
|
|
47
|
-
}
|
|
48
|
-
for (auto & t : threads) {
|
|
49
|
-
t.join();
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
#if 0
|
|
53
|
-
const char * val_str = getenv("GGML_TEST_EPS");
|
|
54
|
-
float val = 1e-9f;
|
|
55
|
-
if (val_str != nullptr) {
|
|
56
|
-
val = std::stof(val_str);
|
|
57
|
-
printf("GGML_TEST_EPS=%e\n", val);
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// test quantization with very small values that may result in nan scales due to division by zero
|
|
61
|
-
if (ggml_is_quantized(tensor->type)) {
|
|
62
|
-
for (int i = 0; i < 256; i++) {
|
|
63
|
-
data[i] = val;
|
|
69
|
+
for (auto & t : tasks) {
|
|
70
|
+
t.get();
|
|
64
71
|
}
|
|
65
72
|
}
|
|
66
|
-
#endif
|
|
67
73
|
|
|
68
74
|
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
|
69
|
-
ggml_backend_tensor_set(tensor, data.data(), 0,
|
|
75
|
+
ggml_backend_tensor_set(tensor, data.data(), 0, nels * sizeof(float));
|
|
70
76
|
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {
|
|
71
|
-
GGML_ASSERT(
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
GGML_ASSERT(nels % ggml_blck_size(tensor->type) == 0);
|
|
78
|
+
|
|
79
|
+
// dummy importance matrix
|
|
80
|
+
std::vector<float> imatrix(tensor->ne[0], 1.0f);
|
|
74
81
|
const float * im = imatrix.data();
|
|
75
82
|
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
|
76
83
|
// when the imatrix is optional, we want to test both quantization with and without imatrix
|
|
@@ -80,19 +87,40 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
|
80
87
|
}
|
|
81
88
|
}
|
|
82
89
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, nels));
|
|
91
|
+
{
|
|
92
|
+
// parallel quantization by block
|
|
93
|
+
size_t blck_size = ggml_blck_size(tensor->type);
|
|
94
|
+
size_t n_blocks = nels / blck_size;
|
|
95
|
+
|
|
96
|
+
auto quantize_thread = [&](size_t start, size_t end) {
|
|
97
|
+
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
|
|
98
|
+
start * blck_size, end - start, blck_size, im);
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const size_t min_blocks_per_thread = 1;
|
|
102
|
+
const size_t n_threads = std::min<size_t>(std::thread::hardware_concurrency()/2,
|
|
103
|
+
std::max<size_t>(1, n_blocks / min_blocks_per_thread));
|
|
104
|
+
std::vector<std::future<void>> tasks;
|
|
105
|
+
tasks.reserve(n_threads);
|
|
106
|
+
for (size_t i = 0; i < n_threads; i++) {
|
|
107
|
+
size_t start = i*n_blocks/n_threads;
|
|
108
|
+
size_t end = (i+1)*n_blocks/n_threads;
|
|
109
|
+
tasks.push_back(std::async(std::launch::async, quantize_thread, start, end));
|
|
110
|
+
}
|
|
111
|
+
for (auto & t : tasks) {
|
|
112
|
+
t.get();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
92
115
|
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
|
93
116
|
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
|
94
117
|
// This is going to create some weird integers though.
|
|
95
118
|
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
|
119
|
+
} else if (tensor->type == GGML_TYPE_I64) {
|
|
120
|
+
// Integers with a size of 8 bytes can be set by mirroring the float data, the specific values are again not really meaningful.
|
|
121
|
+
const size_t nbytes_half = ggml_nbytes(tensor)/2;
|
|
122
|
+
ggml_backend_tensor_set(tensor, data.data(), 0*nbytes_half, nbytes_half);
|
|
123
|
+
ggml_backend_tensor_set(tensor, data.data(), 1*nbytes_half, nbytes_half);
|
|
96
124
|
} else {
|
|
97
125
|
GGML_ABORT("fatal error");
|
|
98
126
|
}
|
|
@@ -122,6 +150,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
|
122
150
|
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
|
|
123
151
|
} else if (t->type == GGML_TYPE_F32) {
|
|
124
152
|
tv.push_back(*(float *) &buf[i]);
|
|
153
|
+
} else if (t->type == GGML_TYPE_I64) {
|
|
154
|
+
tv.push_back((float)*(int64_t *) &buf[i]);
|
|
125
155
|
} else if (t->type == GGML_TYPE_I32) {
|
|
126
156
|
tv.push_back((float)*(int32_t *) &buf[i]);
|
|
127
157
|
} else if (t->type == GGML_TYPE_I16) {
|
|
@@ -142,60 +172,6 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
|
142
172
|
return tv;
|
|
143
173
|
}
|
|
144
174
|
|
|
145
|
-
/*
|
|
146
|
-
static double cosine_similarity(const float * v1, const float * v2, size_t n) {
|
|
147
|
-
double dot = 0.0;
|
|
148
|
-
double mag1 = 0.0;
|
|
149
|
-
double mag2 = 0.0;
|
|
150
|
-
|
|
151
|
-
for (size_t i = 0; i < n; i++) {
|
|
152
|
-
if (std::isnan(v1[i]) || std::isnan(v2[i])) {
|
|
153
|
-
return -1.0f;
|
|
154
|
-
}
|
|
155
|
-
if (std::isinf(v1[i]) && std::isinf(v2[i])) {
|
|
156
|
-
continue;
|
|
157
|
-
}
|
|
158
|
-
dot += v1[i]*v2[i];
|
|
159
|
-
mag1 += v1[i]*v1[i];
|
|
160
|
-
mag2 += v2[i]*v2[i];
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
return dot/sqrt(mag1*mag2);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
static float distance(const float * v1, const float * v2, size_t n) {
|
|
167
|
-
double d = 0.0;
|
|
168
|
-
|
|
169
|
-
for (size_t i = 0; i < n; i++) {
|
|
170
|
-
if (std::isnan(v1[i]) || std::isnan(v2[i])) {
|
|
171
|
-
return INFINITY;
|
|
172
|
-
}
|
|
173
|
-
if (std::isinf(v1[i]) && std::isinf(v2[i])) {
|
|
174
|
-
continue;
|
|
175
|
-
}
|
|
176
|
-
d += (v1[i] - v2[i])*(v1[i] - v2[i]);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return sqrt(d);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
static float vec_len(const float * v, size_t n) {
|
|
183
|
-
double d = 0.0;
|
|
184
|
-
|
|
185
|
-
for (size_t i = 0; i < n; i++) {
|
|
186
|
-
if (std::isnan(v[i])) {
|
|
187
|
-
return INFINITY;
|
|
188
|
-
}
|
|
189
|
-
if (std::isinf(v[i])) {
|
|
190
|
-
continue;
|
|
191
|
-
}
|
|
192
|
-
d += v[i]*v[i];
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
return sqrt(d);
|
|
196
|
-
}
|
|
197
|
-
*/
|
|
198
|
-
|
|
199
175
|
// normalized mean squared error = mse(a, b) / mse(a, 0)
|
|
200
176
|
static double nmse(const float * a, const float * b, size_t n) {
|
|
201
177
|
double mse_a_b = 0.0;
|
|
@@ -212,8 +188,40 @@ static double nmse(const float * a, const float * b, size_t n) {
|
|
|
212
188
|
return mse_a_b / mse_a_0;
|
|
213
189
|
}
|
|
214
190
|
|
|
191
|
+
// maximum absolute asymmetry between a and b
|
|
192
|
+
// asymmetry: (a - b) / (a + b)
|
|
193
|
+
// This is more stable than relative error if one of the values fluctuates towards zero.
|
|
194
|
+
// n: number of values to compare.
|
|
195
|
+
// expected_vals: optional vector of expected values for a. If expected_vals is not empty, filter out all comparisons where
|
|
196
|
+
// a does not match any of the expected values. Needed for noncontinuous gradients where the numerical calculation can fail.
|
|
197
|
+
static double mean_abs_asymm(const float * a, const float * b, const size_t n, const std::vector<float> & expected_vals) {
|
|
198
|
+
double sum = 0.0f;
|
|
199
|
+
|
|
200
|
+
size_t nvalid = 0;
|
|
201
|
+
for (size_t i = 0; i < n; i++) {
|
|
202
|
+
if (!expected_vals.empty()) {
|
|
203
|
+
bool matches_any = false;
|
|
204
|
+
for (const float & ev : expected_vals) {
|
|
205
|
+
if (fabsf(a[i] - ev) < 1e-3f) {
|
|
206
|
+
matches_any = true;
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
if (!matches_any) {
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const float asymm = (a[i] - b[i]) / (a[i] + b[i]);
|
|
216
|
+
|
|
217
|
+
sum += fabsf(asymm);
|
|
218
|
+
nvalid++;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return sum/nvalid;
|
|
222
|
+
}
|
|
223
|
+
|
|
215
224
|
// utils for printing the variables of the test cases
|
|
216
|
-
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
217
225
|
|
|
218
226
|
template<typename T>
|
|
219
227
|
static std::string var_to_str(const T & x) {
|
|
@@ -246,10 +254,6 @@ static std::string var_to_str(const std::array<T, N> & x) {
|
|
|
246
254
|
return s;
|
|
247
255
|
}
|
|
248
256
|
|
|
249
|
-
//static std::string var_to_str(ggml_unary_op unary_op) {
|
|
250
|
-
// return ggml_unary_op_name(unary_op);
|
|
251
|
-
//}
|
|
252
|
-
|
|
253
257
|
static std::string var_to_str(ggml_type type) {
|
|
254
258
|
return ggml_type_name(type);
|
|
255
259
|
}
|
|
@@ -262,6 +266,8 @@ static std::string var_to_str(ggml_op_pool pool) {
|
|
|
262
266
|
}
|
|
263
267
|
}
|
|
264
268
|
|
|
269
|
+
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
270
|
+
|
|
265
271
|
#define VARS_TO_STR1(a) VAR_TO_STR(a)
|
|
266
272
|
#define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b)
|
|
267
273
|
#define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c)
|
|
@@ -295,6 +301,7 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
|
|
295
301
|
enum test_mode {
|
|
296
302
|
MODE_TEST,
|
|
297
303
|
MODE_PERF,
|
|
304
|
+
MODE_GRAD,
|
|
298
305
|
};
|
|
299
306
|
|
|
300
307
|
struct test_case {
|
|
@@ -314,6 +321,32 @@ struct test_case {
|
|
|
314
321
|
return 1e-7;
|
|
315
322
|
}
|
|
316
323
|
|
|
324
|
+
virtual double max_maa_err() {
|
|
325
|
+
return 1e-4;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
virtual float grad_eps() {
|
|
329
|
+
return 1e-1f;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// If false, estimate gradient with 2 points, neglects 3rd order derivative and higher.
|
|
333
|
+
// If true, estimate gradient with 4 points, neglects 5th order derivative and higher.
|
|
334
|
+
virtual bool grad_precise() {
|
|
335
|
+
return false;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Skip gradient checks if total number of gradients to be checked is larger than this (to speed up the tests).
|
|
339
|
+
virtual int64_t grad_nmax() {
|
|
340
|
+
return 10000;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// No effect if empty.
|
|
344
|
+
// If not empty, skip all gradient checks where the numerical result does not match any of the values.
|
|
345
|
+
// Needed for dealing with noncontinuous gradients (e.g. ReLU) where estimation using finite differences is unreliable.
|
|
346
|
+
virtual std::vector<float> grad_expect() {
|
|
347
|
+
return {};
|
|
348
|
+
}
|
|
349
|
+
|
|
317
350
|
virtual void initialize_tensors(ggml_context * ctx) {
|
|
318
351
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
|
319
352
|
init_tensor_uniform(t);
|
|
@@ -331,7 +364,13 @@ struct test_case {
|
|
|
331
364
|
return size;
|
|
332
365
|
}
|
|
333
366
|
|
|
367
|
+
virtual uint64_t op_flops(ggml_tensor * t) {
|
|
368
|
+
GGML_UNUSED(t);
|
|
369
|
+
return 0;
|
|
370
|
+
}
|
|
371
|
+
|
|
334
372
|
ggml_cgraph * gf = nullptr;
|
|
373
|
+
ggml_cgraph * gb = nullptr;
|
|
335
374
|
|
|
336
375
|
static const int sentinel_size = 1024;
|
|
337
376
|
|
|
@@ -340,7 +379,7 @@ struct test_case {
|
|
|
340
379
|
std::vector<ggml_tensor *> sentinels;
|
|
341
380
|
|
|
342
381
|
void add_sentinel(ggml_context * ctx) {
|
|
343
|
-
if (mode == MODE_PERF) {
|
|
382
|
+
if (mode == MODE_PERF || mode == MODE_GRAD) {
|
|
344
383
|
return;
|
|
345
384
|
}
|
|
346
385
|
ggml_tensor * sentinel = ::ggml_new_tensor_1d(ctx, GGML_TYPE_F32, sentinel_size);
|
|
@@ -389,6 +428,7 @@ struct test_case {
|
|
|
389
428
|
/* .no_alloc = */ true,
|
|
390
429
|
};
|
|
391
430
|
ggml_context * ctx = ggml_init(params);
|
|
431
|
+
GGML_ASSERT(ctx);
|
|
392
432
|
|
|
393
433
|
gf = ggml_new_graph(ctx);
|
|
394
434
|
|
|
@@ -439,7 +479,7 @@ struct test_case {
|
|
|
439
479
|
|
|
440
480
|
// add sentinels as graph nodes so that they are checked in the callback
|
|
441
481
|
for (ggml_tensor * sentinel : sentinels) {
|
|
442
|
-
gf
|
|
482
|
+
ggml_graph_add_node(gf, sentinel);
|
|
443
483
|
}
|
|
444
484
|
|
|
445
485
|
// randomize tensors
|
|
@@ -550,6 +590,7 @@ struct test_case {
|
|
|
550
590
|
/* .no_alloc = */ true,
|
|
551
591
|
};
|
|
552
592
|
ggml_context * ctx = ggml_init(params);
|
|
593
|
+
GGML_ASSERT(ctx);
|
|
553
594
|
|
|
554
595
|
ggml_tensor * out = build_graph(ctx);
|
|
555
596
|
|
|
@@ -570,12 +611,11 @@ struct test_case {
|
|
|
570
611
|
}
|
|
571
612
|
|
|
572
613
|
// align while also leaving some margin for variations in parameters
|
|
573
|
-
int align =
|
|
614
|
+
int align = 8;
|
|
574
615
|
int last = (len + align - 1) / align * align;
|
|
575
616
|
if (last - len < 5) {
|
|
576
617
|
last += align;
|
|
577
618
|
}
|
|
578
|
-
last = std::max(last, 60);
|
|
579
619
|
printf("%*s", last - len, "");
|
|
580
620
|
|
|
581
621
|
// allocate
|
|
@@ -596,11 +636,27 @@ struct test_case {
|
|
|
596
636
|
// warmup run
|
|
597
637
|
ggml_backend_graph_compute(backend, gf);
|
|
598
638
|
|
|
639
|
+
// determine number of runs
|
|
640
|
+
int n_runs;
|
|
641
|
+
if (op_flops(out) > 0) {
|
|
642
|
+
// based on flops
|
|
643
|
+
const uint64_t GFLOP = 1000 * 1000 * 1000;
|
|
644
|
+
const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
|
645
|
+
const uint64_t target_flops_gpu = 100ULL * GFLOP;
|
|
646
|
+
uint64_t target_flops = ggml_backend_is_cpu(backend) ? target_flops_cpu : target_flops_gpu;
|
|
647
|
+
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
|
648
|
+
} else {
|
|
649
|
+
// based on memory size
|
|
650
|
+
const size_t GB = 1ULL << 30;
|
|
651
|
+
const size_t target_size_cpu = 8 * GB;
|
|
652
|
+
const size_t target_size_gpu = 32 * GB;
|
|
653
|
+
size_t target_size = ggml_backend_is_cpu(backend) ? target_size_cpu : target_size_gpu;
|
|
654
|
+
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
|
655
|
+
}
|
|
656
|
+
|
|
599
657
|
// duplicate the op
|
|
600
|
-
size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
|
|
601
|
-
int n_runs = std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1;
|
|
602
658
|
for (int i = 1; i < n_runs; i++) {
|
|
603
|
-
gf
|
|
659
|
+
ggml_graph_add_node(gf, out);
|
|
604
660
|
}
|
|
605
661
|
|
|
606
662
|
// calculate memory
|
|
@@ -615,36 +671,330 @@ struct test_case {
|
|
|
615
671
|
}
|
|
616
672
|
return size;
|
|
617
673
|
};
|
|
618
|
-
for (int i = 0; i < gf
|
|
619
|
-
if (ggml_is_view_op(gf
|
|
674
|
+
for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) {
|
|
675
|
+
if (ggml_is_view_op(ggml_graph_node(gf, i)->op) || ggml_graph_node(gf, i) == out) {
|
|
620
676
|
continue;
|
|
621
677
|
}
|
|
622
|
-
mem += tensor_op_size(gf
|
|
678
|
+
mem += tensor_op_size(ggml_graph_node(gf, i));
|
|
623
679
|
}
|
|
624
680
|
|
|
625
681
|
// run
|
|
626
|
-
|
|
682
|
+
int64_t total_time_us = 0;
|
|
683
|
+
int total_runs = 0;
|
|
684
|
+
do {
|
|
685
|
+
int64_t start_time = ggml_time_us();
|
|
686
|
+
ggml_backend_graph_compute(backend, gf);
|
|
687
|
+
int64_t end_time = ggml_time_us();
|
|
688
|
+
|
|
689
|
+
total_time_us += end_time - start_time;
|
|
690
|
+
total_runs += n_runs;
|
|
691
|
+
} while (total_time_us < 1000*1000); // run for at least 1 second
|
|
692
|
+
|
|
693
|
+
printf(" %8d runs - %8.2f us/run - ",
|
|
694
|
+
total_runs,
|
|
695
|
+
(double)total_time_us / total_runs);
|
|
696
|
+
|
|
697
|
+
if (op_flops(out) > 0) {
|
|
698
|
+
double flops_per_sec = (op_flops(out) * total_runs) / (total_time_us / 1e6);
|
|
699
|
+
auto format_flops = [](double flops) -> std::string {
|
|
700
|
+
char buf[256];
|
|
701
|
+
if (flops >= 1e12) {
|
|
702
|
+
snprintf(buf, sizeof(buf), "%6.2f TFLOP", flops / 1e12);
|
|
703
|
+
} else if (flops >= 1e9) {
|
|
704
|
+
snprintf(buf, sizeof(buf), "%6.2f GFLOP", flops / 1e9);
|
|
705
|
+
} else if (flops >= 1e6) {
|
|
706
|
+
snprintf(buf, sizeof(buf), "%6.2f MFLOP", flops / 1e6);
|
|
707
|
+
} else {
|
|
708
|
+
snprintf(buf, sizeof(buf), "%6.2f KFLOP", flops / 1e3);
|
|
709
|
+
}
|
|
710
|
+
return buf;
|
|
711
|
+
};
|
|
712
|
+
printf("%s/run - \033[1;34m%sS\033[0m",
|
|
713
|
+
format_flops(op_flops(out)).c_str(),
|
|
714
|
+
format_flops(flops_per_sec).c_str());
|
|
715
|
+
|
|
716
|
+
} else {
|
|
717
|
+
printf("%8zu kB/run - \033[1;34m%7.2f GB/s\033[0m",
|
|
718
|
+
op_size(out) / 1024,
|
|
719
|
+
mem / (total_time_us / 1e6) / 1024.0 / 1024.0 / 1024.0);
|
|
720
|
+
}
|
|
721
|
+
printf("\n");
|
|
722
|
+
|
|
723
|
+
ggml_backend_buffer_free(buf);
|
|
724
|
+
|
|
725
|
+
ggml_free(ctx);
|
|
726
|
+
|
|
727
|
+
return true;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
bool eval_grad(ggml_backend_t backend, const char * op_name) {
|
|
731
|
+
mode = MODE_GRAD;
|
|
732
|
+
const std::vector<float> expect = grad_expect();
|
|
733
|
+
|
|
734
|
+
ggml_init_params params = {
|
|
735
|
+
/* .mem_size = */ ggml_tensor_overhead()*128 + 2*ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, true),
|
|
736
|
+
/* .mem_base = */ NULL,
|
|
737
|
+
/* .no_alloc = */ true,
|
|
738
|
+
};
|
|
739
|
+
ggml_context * ctx = ggml_init(params);
|
|
740
|
+
GGML_ASSERT(ctx);
|
|
741
|
+
|
|
742
|
+
gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
|
|
743
|
+
gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
|
|
744
|
+
|
|
745
|
+
ggml_tensor * out = build_graph(ctx);
|
|
746
|
+
|
|
747
|
+
if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) {
|
|
748
|
+
//printf(" %s: skipping\n", op_desc(out).c_str());
|
|
749
|
+
ggml_free(ctx);
|
|
750
|
+
return true;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
printf(" %s(%s): ", op_desc(out).c_str(), vars().c_str());
|
|
754
|
+
fflush(stdout);
|
|
755
|
+
|
|
756
|
+
if (out->type != GGML_TYPE_F32) {
|
|
757
|
+
ggml_free(ctx);
|
|
758
|
+
printf("not supported [%s->type != FP32]\n", out->name);
|
|
759
|
+
return true;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// check if the backend supports the ops
|
|
763
|
+
bool supported = true;
|
|
764
|
+
bool any_params = false;
|
|
765
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
766
|
+
if (!ggml_backend_supports_op(backend, t)) {
|
|
767
|
+
printf("not supported [%s] ", ggml_backend_name(backend));
|
|
768
|
+
supported = false;
|
|
769
|
+
break;
|
|
770
|
+
}
|
|
771
|
+
if ((t->flags & GGML_TENSOR_FLAG_PARAM)) {
|
|
772
|
+
any_params = true;
|
|
773
|
+
if (t->type != GGML_TYPE_F32) {
|
|
774
|
+
printf("not supported [%s->type != FP32] ", t->name);
|
|
775
|
+
supported = false;
|
|
776
|
+
break;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
if (!any_params) {
|
|
781
|
+
printf("not supported [%s] \n", op_name);
|
|
782
|
+
supported = false;
|
|
783
|
+
}
|
|
784
|
+
if (!supported) {
|
|
785
|
+
printf("\n");
|
|
786
|
+
ggml_free(ctx);
|
|
787
|
+
return true;
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
int64_t ngrads = 0;
|
|
791
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
792
|
+
if (t->flags & GGML_TENSOR_FLAG_PARAM) {
|
|
793
|
+
ngrads += ggml_nelements(t);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
if (ngrads > grad_nmax()) {
|
|
797
|
+
printf("skipping large tensors for speed \n");
|
|
798
|
+
ggml_free(ctx);
|
|
799
|
+
return true;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
if (!ggml_is_scalar(out)) {
|
|
804
|
+
out = ggml_sum(ctx, out);
|
|
805
|
+
ggml_set_name(out, "sum_of_out");
|
|
806
|
+
}
|
|
807
|
+
ggml_set_loss(out);
|
|
808
|
+
|
|
809
|
+
ggml_build_forward_expand(gf, out);
|
|
810
|
+
ggml_graph_cpy(gf, gb);
|
|
811
|
+
ggml_build_backward_expand(ctx, gf, gb, false);
|
|
812
|
+
if (expect.size() != 1 || expect[0] != 0.0f) {
|
|
813
|
+
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
|
|
814
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
815
|
+
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || t->grad->op != GGML_OP_NONE);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
// TODO: refactor so that this check is only needed once
|
|
820
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
821
|
+
if (!ggml_backend_supports_op(backend, t)) {
|
|
822
|
+
printf("not supported [%s] ", ggml_backend_name(backend));
|
|
823
|
+
supported = false;
|
|
824
|
+
break;
|
|
825
|
+
}
|
|
826
|
+
if ((t->flags & GGML_TENSOR_FLAG_PARAM) && t->type != GGML_TYPE_F32) {
|
|
827
|
+
printf("not supported [%s->type != FP32] ", t->name);
|
|
828
|
+
supported = false;
|
|
829
|
+
break;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
if (!supported) {
|
|
833
|
+
printf("\n");
|
|
834
|
+
ggml_free(ctx);
|
|
835
|
+
return true;
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
// allocate
|
|
839
|
+
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
|
|
840
|
+
if (buf == NULL) {
|
|
841
|
+
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
|
|
842
|
+
ggml_free(ctx);
|
|
843
|
+
return false;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
initialize_tensors(ctx); // Randomizes all tensors (including gradients).
|
|
848
|
+
ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
|
|
627
849
|
|
|
628
|
-
int64_t start_time = ggml_time_us();
|
|
629
850
|
ggml_backend_graph_compute(backend, gf);
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
851
|
+
ggml_backend_graph_compute(backend, gb);
|
|
852
|
+
|
|
853
|
+
bool ok = true;
|
|
854
|
+
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
|
855
|
+
if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
|
|
856
|
+
continue;
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
const char * bn = ggml_backend_name(backend);
|
|
860
|
+
const int64_t ne = ggml_nelements(t);
|
|
861
|
+
|
|
862
|
+
std::vector<float> ga = tensor_to_float(t->grad);
|
|
863
|
+
|
|
864
|
+
for (int64_t i = 0; i < ne; ++i) { // gradient algebraic
|
|
865
|
+
// check for nans
|
|
866
|
+
if (!std::isfinite(ga[i])) {
|
|
867
|
+
printf("[%s] nonfinite gradient at index %" PRId64 " (%s=%f) ", ggml_op_desc(t), i, bn, ga[i]);
|
|
868
|
+
ok = false;
|
|
869
|
+
break;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
if (!ok) {
|
|
873
|
+
break;
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
std::vector<float> gn(ne); // gradient numeric
|
|
877
|
+
GGML_ASSERT(ga.size() == gn.size());
|
|
878
|
+
|
|
879
|
+
std::vector<float> x0 = tensor_to_float(t); // original t data
|
|
880
|
+
GGML_ASSERT(ggml_is_scalar(out));
|
|
881
|
+
GGML_ASSERT(out->type == GGML_TYPE_F32);
|
|
882
|
+
|
|
883
|
+
const float eps = grad_eps();
|
|
884
|
+
for (int64_t i = 0; i < ne; ++i) {
|
|
885
|
+
const float xiu = x0[i] + 1.0f*eps; // x, index i, up
|
|
886
|
+
const float xiuh = x0[i] + 0.5f*eps; // x, index i, up half
|
|
887
|
+
const float xidh = x0[i] - 0.5f*eps; // x, index i, down half
|
|
888
|
+
const float xid = x0[i] - 1.0f*eps; // x, index i, down
|
|
889
|
+
|
|
890
|
+
float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
|
|
891
|
+
|
|
892
|
+
ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
|
|
893
|
+
ggml_backend_graph_compute(backend, gf);
|
|
894
|
+
ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));
|
|
895
|
+
|
|
896
|
+
ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
|
|
897
|
+
ggml_backend_graph_compute(backend, gf);
|
|
898
|
+
ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));
|
|
899
|
+
|
|
900
|
+
if (grad_precise()) {
|
|
901
|
+
ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
|
|
902
|
+
ggml_backend_graph_compute(backend, gf);
|
|
903
|
+
ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));
|
|
904
|
+
|
|
905
|
+
ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
|
|
906
|
+
ggml_backend_graph_compute(backend, gf);
|
|
907
|
+
ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));
|
|
908
|
+
|
|
909
|
+
gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
|
|
910
|
+
} else {
|
|
911
|
+
gn[i] = (fu - fd) / (2.0f*eps);
|
|
912
|
+
}
|
|
633
913
|
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
914
|
+
ggml_backend_tensor_set(t, x0.data(), 0, ggml_nbytes(t));
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
const double err = mean_abs_asymm(gn.data(), ga.data(), gn.size(), expect);
|
|
918
|
+
if (err > max_maa_err()) {
|
|
919
|
+
printf("[%s] MAA = %.9f > %.9f ", ggml_op_desc(t), err, max_maa_err());
|
|
920
|
+
ok = false;
|
|
921
|
+
break;
|
|
922
|
+
}
|
|
923
|
+
if (!ok) {
|
|
924
|
+
break;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
if (!ok) {
|
|
929
|
+
printf("compare failed ");
|
|
930
|
+
}
|
|
639
931
|
|
|
640
932
|
ggml_backend_buffer_free(buf);
|
|
641
933
|
|
|
642
934
|
ggml_free(ctx);
|
|
643
935
|
|
|
644
|
-
|
|
936
|
+
if (ok) {
|
|
937
|
+
printf("\033[1;32mOK\033[0m\n");
|
|
938
|
+
return true;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
printf("\033[1;31mFAIL\033[0m\n");
|
|
942
|
+
return false;
|
|
943
|
+
}
|
|
944
|
+
};
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
// ###################################
|
|
948
|
+
// ## Section 2: GGML Op Defintions ##
|
|
949
|
+
// ###################################
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
// The following is an example showing the bare minimum for creating a test for a GGML op.
|
|
953
|
+
|
|
954
|
+
// GGML_OP_EXAMPLE
|
|
955
|
+
struct test_example : public test_case {
|
|
956
|
+
// Always define these 2 or variants thereof:
|
|
957
|
+
const ggml_type type; // The type of the input tensors.
|
|
958
|
+
const std::array<int64_t, 4> ne; // The shape of the input tensors.
|
|
959
|
+
// For some ops it's necessary to define multiple types or shapes for the inputs.
|
|
960
|
+
// Or they may need additional parameters.
|
|
961
|
+
|
|
962
|
+
// Put all parameters needed to fully define the test into one of the VARS_TO_STR macros.
|
|
963
|
+
// In most cases these are just the properties of the struct that you defined above.
|
|
964
|
+
// This is needed for info prints.
|
|
965
|
+
std::string vars() override {
|
|
966
|
+
return VARS_TO_STR2(type, ne);
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// Define a constructor for the struct.
|
|
970
|
+
// In most cases it will be sufficient to have the same arguments as the struct has properties
|
|
971
|
+
// and just use initializer lists.
|
|
972
|
+
test_example(ggml_type type = GGML_TYPE_F32,
|
|
973
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
974
|
+
: type(type), ne(ne) {}
|
|
975
|
+
|
|
976
|
+
// Define how a simple GGML compute graph can be constructed for the new GGML op.
|
|
977
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
978
|
+
// Step 1: create input tensors that don't depend on any other tensors:
|
|
979
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
980
|
+
ggml_set_name(a, "a"); // Setting names is optional but it's useful for debugging.
|
|
981
|
+
|
|
982
|
+
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
983
|
+
ggml_set_name(b, "b");
|
|
984
|
+
|
|
985
|
+
// Step 2: use the op that you want to test in the GGML compute graph.
|
|
986
|
+
ggml_tensor * out = ggml_add(ctx, a, b); // For this example we're just doing a simple addition.
|
|
987
|
+
ggml_set_name(out, "out");
|
|
988
|
+
|
|
989
|
+
// Step 3: return the output tensor.
|
|
990
|
+
return out;
|
|
645
991
|
}
|
|
992
|
+
// In order to also check the gradients for your op, add calls like ggml_set_param(ctx, a)
|
|
993
|
+
// immediately after you create the tensors.
|
|
994
|
+
// This is optional and only makes sense if a backward pass has actually been implemented for the new op.
|
|
646
995
|
};
|
|
647
996
|
|
|
997
|
+
|
|
648
998
|
// GGML_OP_UNARY
|
|
649
999
|
struct test_unary : public test_case {
|
|
650
1000
|
const ggml_unary_op op;
|
|
@@ -658,20 +1008,36 @@ struct test_unary : public test_case {
|
|
|
658
1008
|
|
|
659
1009
|
test_unary(ggml_unary_op op,
|
|
660
1010
|
ggml_type type = GGML_TYPE_F32,
|
|
661
|
-
std::array<int64_t, 4> ne_a = {128,
|
|
1011
|
+
std::array<int64_t, 4> ne_a = {128, 2, 2, 2},
|
|
662
1012
|
int v = 0)
|
|
663
1013
|
: op(op), type(type), ne_a(ne_a), v(v) {}
|
|
664
1014
|
|
|
665
1015
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1016
|
+
const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG ||
|
|
1017
|
+
op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU;
|
|
1018
|
+
|
|
666
1019
|
ggml_tensor * a;
|
|
667
1020
|
if (v & 1) {
|
|
668
1021
|
auto ne = ne_a; ne[0] *= 3;
|
|
669
1022
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1023
|
+
if (grad_supported) {
|
|
1024
|
+
ggml_set_param(ctx, a);
|
|
1025
|
+
}
|
|
1026
|
+
ggml_set_name(a, "a");
|
|
1027
|
+
|
|
670
1028
|
a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
1029
|
+
ggml_set_name(a, "view_of_a");
|
|
671
1030
|
} else {
|
|
672
1031
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
1032
|
+
if (grad_supported) {
|
|
1033
|
+
ggml_set_param(ctx, a);
|
|
1034
|
+
}
|
|
1035
|
+
ggml_set_name(a, "a");
|
|
673
1036
|
}
|
|
1037
|
+
|
|
674
1038
|
ggml_tensor * out = ggml_unary(ctx, a, op);
|
|
1039
|
+
ggml_set_name(out, "out");
|
|
1040
|
+
|
|
675
1041
|
return out;
|
|
676
1042
|
}
|
|
677
1043
|
|
|
@@ -681,6 +1047,24 @@ struct test_unary : public test_case {
|
|
|
681
1047
|
init_tensor_uniform(t, -150.f, 150.f);
|
|
682
1048
|
}
|
|
683
1049
|
}
|
|
1050
|
+
|
|
1051
|
+
float grad_eps() override {
|
|
1052
|
+
return 15.0f;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
std::vector<float> grad_expect() override {
|
|
1056
|
+
if (op == GGML_UNARY_OP_ABS) {
|
|
1057
|
+
return {-1.0f, 1.0f};
|
|
1058
|
+
}
|
|
1059
|
+
if (op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_STEP) {
|
|
1060
|
+
return {0.0f};
|
|
1061
|
+
}
|
|
1062
|
+
if (op == GGML_UNARY_OP_RELU) {
|
|
1063
|
+
return {0.0f, 1.0f};
|
|
1064
|
+
}
|
|
1065
|
+
return {};
|
|
1066
|
+
}
|
|
1067
|
+
|
|
684
1068
|
};
|
|
685
1069
|
|
|
686
1070
|
// GGML_OP_GET_ROWS
|
|
@@ -701,11 +1085,24 @@ struct test_get_rows : public test_case {
|
|
|
701
1085
|
|
|
702
1086
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
703
1087
|
ggml_tensor * in = ggml_new_tensor_3d(ctx, type, n, m, b);
|
|
1088
|
+
ggml_set_name(in, "in");
|
|
1089
|
+
|
|
704
1090
|
ggml_tensor * rows = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, r, b);
|
|
1091
|
+
ggml_set_name(rows, "rows");
|
|
705
1092
|
if (v) {
|
|
706
1093
|
rows = ggml_view_2d(ctx, rows, r/2, b, rows->nb[1], 0);
|
|
1094
|
+
ggml_set_name(rows, "view_of_rows");
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
const bool grad_supported = ggml_is_matrix(in) && ggml_is_vector(rows);
|
|
1098
|
+
if (grad_supported) {
|
|
1099
|
+
ggml_set_param(ctx, in);
|
|
1100
|
+
// rows is a constant input -> no gradients
|
|
707
1101
|
}
|
|
1102
|
+
|
|
708
1103
|
ggml_tensor * out = ggml_get_rows(ctx, in, rows);
|
|
1104
|
+
ggml_set_name(out, "out");
|
|
1105
|
+
|
|
709
1106
|
return out;
|
|
710
1107
|
}
|
|
711
1108
|
|
|
@@ -726,29 +1123,101 @@ struct test_get_rows : public test_case {
|
|
|
726
1123
|
}
|
|
727
1124
|
};
|
|
728
1125
|
|
|
729
|
-
//
|
|
730
|
-
struct
|
|
1126
|
+
// GGML_OP_ARGMAX
|
|
1127
|
+
struct test_argmax : public test_case {
|
|
731
1128
|
const ggml_type type;
|
|
732
1129
|
const std::array<int64_t, 4> ne;
|
|
733
|
-
const std::array<int, 4> nr;
|
|
734
1130
|
|
|
735
1131
|
std::string vars() override {
|
|
736
|
-
return
|
|
1132
|
+
return VARS_TO_STR2(type, ne);
|
|
737
1133
|
}
|
|
738
1134
|
|
|
739
|
-
|
|
740
|
-
|
|
1135
|
+
test_argmax(ggml_type type = GGML_TYPE_F32,
|
|
1136
|
+
std::array<int64_t, 4> ne = {10, 100, 1, 1})
|
|
1137
|
+
: type(type), ne(ne) {}
|
|
1138
|
+
|
|
1139
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1140
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1141
|
+
ggml_set_name(a, "a");
|
|
1142
|
+
|
|
1143
|
+
ggml_tensor * out = ggml_argmax(ctx, a);
|
|
1144
|
+
ggml_set_name(out, "out");
|
|
1145
|
+
|
|
1146
|
+
return out;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
double max_nmse_err() override {
|
|
1150
|
+
return 0.0;
|
|
1151
|
+
}
|
|
1152
|
+
};
|
|
1153
|
+
|
|
1154
|
+
// GGML_OP_COUNT_EQUAL
|
|
1155
|
+
struct test_count_equal : public test_case {
|
|
1156
|
+
const ggml_type type;
|
|
1157
|
+
const std::array<int64_t, 4> ne;
|
|
1158
|
+
|
|
1159
|
+
std::string vars() override {
|
|
1160
|
+
return VARS_TO_STR2(type, ne);
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
test_count_equal(ggml_type type = GGML_TYPE_F32,
|
|
1164
|
+
std::array<int64_t, 4> ne = {4, 500, 1, 1})
|
|
1165
|
+
: type(type), ne(ne) {}
|
|
1166
|
+
|
|
1167
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1168
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1169
|
+
ggml_set_name(a, "a");
|
|
1170
|
+
|
|
1171
|
+
ggml_tensor * a_argmax = ggml_argmax(ctx, a);
|
|
1172
|
+
ggml_set_name(a_argmax, "a_argmax");
|
|
1173
|
+
|
|
1174
|
+
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1175
|
+
ggml_set_name(b, "b");
|
|
1176
|
+
|
|
1177
|
+
ggml_tensor * b_argmax = ggml_argmax(ctx, a);
|
|
1178
|
+
ggml_set_name(b_argmax, "b_argmax");
|
|
1179
|
+
|
|
1180
|
+
ggml_tensor * out = ggml_count_equal(ctx, a_argmax, b_argmax);
|
|
1181
|
+
ggml_set_name(out, "out");
|
|
1182
|
+
|
|
1183
|
+
return out;
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
double max_nmse_err() override {
|
|
1187
|
+
return 0.0;
|
|
1188
|
+
}
|
|
1189
|
+
};
|
|
1190
|
+
|
|
1191
|
+
// GGML_OP_REPEAT
|
|
1192
|
+
struct test_repeat : public test_case {
|
|
1193
|
+
const ggml_type type;
|
|
1194
|
+
const std::array<int64_t, 4> ne;
|
|
1195
|
+
const std::array<int, 4> nr;
|
|
1196
|
+
|
|
1197
|
+
std::string vars() override {
|
|
1198
|
+
return VARS_TO_STR3(type, ne, nr);
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
size_t op_size(ggml_tensor * t) override {
|
|
1202
|
+
return ggml_nbytes(t) * 2;
|
|
741
1203
|
}
|
|
742
1204
|
|
|
743
1205
|
test_repeat(ggml_type type = GGML_TYPE_F32,
|
|
744
|
-
std::array<int64_t, 4> ne = {10,
|
|
1206
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
745
1207
|
std::array<int, 4> nr = {2, 2, 2, 2})
|
|
746
1208
|
: type(type), ne(ne), nr(nr) {}
|
|
747
1209
|
|
|
748
1210
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
749
1211
|
ggml_tensor * target = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
1212
|
+
ggml_set_name(target, "target");
|
|
1213
|
+
|
|
750
1214
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1215
|
+
ggml_set_param(ctx, src);
|
|
1216
|
+
ggml_set_name(src, "src");
|
|
1217
|
+
|
|
751
1218
|
ggml_tensor * out = ggml_repeat(ctx, src, target);
|
|
1219
|
+
ggml_set_name(out, "out");
|
|
1220
|
+
|
|
752
1221
|
return out;
|
|
753
1222
|
}
|
|
754
1223
|
};
|
|
@@ -774,10 +1243,62 @@ struct test_dup : public test_case {
|
|
|
774
1243
|
|
|
775
1244
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
776
1245
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1246
|
+
ggml_set_param(ctx, src);
|
|
1247
|
+
ggml_set_name(src, "src");
|
|
1248
|
+
|
|
777
1249
|
if (_use_permute) {
|
|
778
1250
|
src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
|
|
1251
|
+
ggml_set_name(src, "src_permuted");
|
|
779
1252
|
}
|
|
1253
|
+
|
|
780
1254
|
ggml_tensor * out = ggml_dup(ctx, src);
|
|
1255
|
+
ggml_set_name(out, "out");
|
|
1256
|
+
|
|
1257
|
+
return out;
|
|
1258
|
+
}
|
|
1259
|
+
};
|
|
1260
|
+
|
|
1261
|
+
// GGML_OP_SET
|
|
1262
|
+
struct test_set : public test_case {
|
|
1263
|
+
const ggml_type type_src;
|
|
1264
|
+
const ggml_type type_dst;
|
|
1265
|
+
const std::array<int64_t, 4> ne;
|
|
1266
|
+
const int dim;
|
|
1267
|
+
|
|
1268
|
+
std::string vars() override {
|
|
1269
|
+
return VARS_TO_STR4(type_src, type_dst, ne, dim);
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
size_t op_size(ggml_tensor * t) override {
|
|
1273
|
+
return ggml_nbytes(t) + ggml_nbytes(t->src[0]);
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
test_set(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
|
1277
|
+
std::array<int64_t, 4> ne = {6, 5, 4, 3}, int dim = 1)
|
|
1278
|
+
: type_src(type_src), type_dst(type_dst), ne(ne), dim(dim) {}
|
|
1279
|
+
|
|
1280
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1281
|
+
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1282
|
+
ggml_set_param(ctx, src);
|
|
1283
|
+
ggml_set_name(src, "src");
|
|
1284
|
+
|
|
1285
|
+
auto ne_dst = ne;
|
|
1286
|
+
for (int i = 0; i < dim; ++i) {
|
|
1287
|
+
ne_dst[i] *= 2;
|
|
1288
|
+
}
|
|
1289
|
+
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
|
1290
|
+
ggml_set_param(ctx, dst);
|
|
1291
|
+
ggml_set_name(dst, "dst");
|
|
1292
|
+
|
|
1293
|
+
size_t offset = 0;
|
|
1294
|
+
for (int i = 0; i < dim; ++i) {
|
|
1295
|
+
offset += ((ne_dst[i] - ne[i])/2)*dst->nb[i];
|
|
1296
|
+
}
|
|
1297
|
+
ggml_tensor * out = ggml_set(ctx, dst, src,
|
|
1298
|
+
// The backward pass requires setting a contiguous region:
|
|
1299
|
+
src->nb[1], src->nb[2], src->nb[3], offset);
|
|
1300
|
+
ggml_set_name(out, "out");
|
|
1301
|
+
|
|
781
1302
|
return out;
|
|
782
1303
|
}
|
|
783
1304
|
};
|
|
@@ -804,18 +1325,26 @@ struct test_cpy : public test_case {
|
|
|
804
1325
|
|
|
805
1326
|
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
|
806
1327
|
std::array<int64_t, 4> ne = {10, 10, 10, 1},
|
|
807
|
-
std::array<int64_t, 4> permute = {0, 0, 0, 0}
|
|
808
|
-
bool _dst_use_permute = false)
|
|
1328
|
+
std::array<int64_t, 4> permute = {0, 0, 0, 0})
|
|
809
1329
|
: type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
|
|
810
1330
|
_src_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
|
811
1331
|
|
|
812
1332
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
813
1333
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1334
|
+
ggml_set_param(ctx, src);
|
|
1335
|
+
ggml_set_name(src, "src");
|
|
1336
|
+
|
|
814
1337
|
if (_src_use_permute) {
|
|
815
1338
|
src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
|
|
1339
|
+
ggml_set_name(src, "src_permuted");
|
|
816
1340
|
}
|
|
1341
|
+
|
|
817
1342
|
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, src->ne);
|
|
1343
|
+
ggml_set_name(dst, "dst");
|
|
1344
|
+
|
|
818
1345
|
ggml_tensor * out = ggml_cpy(ctx, src, dst);
|
|
1346
|
+
ggml_set_name(out, "out");
|
|
1347
|
+
|
|
819
1348
|
return out;
|
|
820
1349
|
}
|
|
821
1350
|
};
|
|
@@ -835,8 +1364,14 @@ struct test_cont : public test_case {
|
|
|
835
1364
|
|
|
836
1365
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
837
1366
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1367
|
+
ggml_set_param(ctx, src);
|
|
1368
|
+
ggml_set_name(src, "src");
|
|
1369
|
+
|
|
838
1370
|
src = ggml_transpose(ctx, src);
|
|
1371
|
+
ggml_set_name(src, "src_transposed");
|
|
1372
|
+
|
|
839
1373
|
ggml_tensor * out = ggml_cont(ctx, src);
|
|
1374
|
+
ggml_set_name(out, "out");
|
|
840
1375
|
|
|
841
1376
|
return out;
|
|
842
1377
|
}
|
|
@@ -867,21 +1402,79 @@ struct test_bin_bcast : public test_case {
|
|
|
867
1402
|
|
|
868
1403
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
869
1404
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
1405
|
+
ggml_set_name(a, "a");
|
|
1406
|
+
|
|
870
1407
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1408
|
+
ggml_set_name(b, "b");
|
|
1409
|
+
|
|
1410
|
+
// The backward pass supports broadcasting only for GGML_ADD:
|
|
1411
|
+
const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
|
|
1412
|
+
if (grad_supported) {
|
|
1413
|
+
ggml_set_param(ctx, a);
|
|
1414
|
+
ggml_set_param(ctx, b);
|
|
1415
|
+
}
|
|
1416
|
+
|
|
871
1417
|
ggml_tensor * out = op(ctx, a, b);
|
|
1418
|
+
ggml_set_name(out, "out");
|
|
1419
|
+
|
|
872
1420
|
return out;
|
|
873
1421
|
}
|
|
874
1422
|
|
|
875
1423
|
void initialize_tensors(ggml_context * ctx) override {
|
|
876
1424
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
877
|
-
if (op == ggml_div) {
|
|
878
|
-
//
|
|
879
|
-
init_tensor_uniform(t,
|
|
1425
|
+
if (op == ggml_mul || op == ggml_div) {
|
|
1426
|
+
// MUL and DIV have numerical issues around zero:
|
|
1427
|
+
init_tensor_uniform(t, 0.9f, 1.1f);
|
|
880
1428
|
} else {
|
|
881
1429
|
init_tensor_uniform(t);
|
|
882
1430
|
}
|
|
883
1431
|
}
|
|
884
1432
|
}
|
|
1433
|
+
|
|
1434
|
+
float grad_eps() override {
|
|
1435
|
+
return 0.1f * (op == ggml_mul ? ne[0]*ne[1]*ne[2]*ne[3] : 1);
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
bool grad_precise() override {
|
|
1439
|
+
return op == ggml_div;
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
double max_maa_err() override {
|
|
1443
|
+
return op == ggml_add ? 1e-4 : 1e-3;
|
|
1444
|
+
}
|
|
1445
|
+
};
|
|
1446
|
+
|
|
1447
|
+
// GGML_OP_ADD1
|
|
1448
|
+
struct test_add1 : public test_case {
|
|
1449
|
+
const ggml_type type;
|
|
1450
|
+
const std::array<int64_t, 4> ne;
|
|
1451
|
+
|
|
1452
|
+
std::string vars() override {
|
|
1453
|
+
return VARS_TO_STR2(type, ne);
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
test_add1(ggml_type type = GGML_TYPE_F32,
|
|
1457
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1458
|
+
: type(type), ne(ne) {}
|
|
1459
|
+
|
|
1460
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1461
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1462
|
+
ggml_set_param(ctx, a);
|
|
1463
|
+
ggml_set_name(a, "a");
|
|
1464
|
+
|
|
1465
|
+
ggml_tensor * b = ggml_new_tensor_1d(ctx, type, 1);
|
|
1466
|
+
// ggml_set_param(ctx, b); // TODO: implement
|
|
1467
|
+
ggml_set_name(b, "b");
|
|
1468
|
+
|
|
1469
|
+
ggml_tensor * out = ggml_add1(ctx, a, b);
|
|
1470
|
+
ggml_set_name(out, "out");
|
|
1471
|
+
|
|
1472
|
+
return out;
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
float grad_eps() override {
|
|
1476
|
+
return 0.1f * ne[0]*ne[1]*ne[2]*ne[3];
|
|
1477
|
+
}
|
|
885
1478
|
};
|
|
886
1479
|
|
|
887
1480
|
// GGML_OP_SCALE
|
|
@@ -901,7 +1494,12 @@ struct test_scale : public test_case {
|
|
|
901
1494
|
|
|
902
1495
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
903
1496
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1497
|
+
ggml_set_param(ctx, a);
|
|
1498
|
+
ggml_set_name(a, "a");
|
|
1499
|
+
|
|
904
1500
|
ggml_tensor * out = ggml_scale(ctx, a, scale);
|
|
1501
|
+
ggml_set_name(out, "out");
|
|
1502
|
+
|
|
905
1503
|
return out;
|
|
906
1504
|
}
|
|
907
1505
|
};
|
|
@@ -917,13 +1515,17 @@ struct test_norm : public test_case {
|
|
|
917
1515
|
}
|
|
918
1516
|
|
|
919
1517
|
test_norm(ggml_type type = GGML_TYPE_F32,
|
|
920
|
-
std::array<int64_t, 4> ne = {64,
|
|
1518
|
+
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
921
1519
|
float eps = 1e-6f)
|
|
922
1520
|
: type(type), ne(ne), eps(eps) {}
|
|
923
1521
|
|
|
924
1522
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
925
1523
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1524
|
+
ggml_set_name(a, "a");
|
|
1525
|
+
|
|
926
1526
|
ggml_tensor * out = ggml_norm(ctx, a, eps);
|
|
1527
|
+
ggml_set_name(out, "out");
|
|
1528
|
+
|
|
927
1529
|
return out;
|
|
928
1530
|
}
|
|
929
1531
|
};
|
|
@@ -939,13 +1541,104 @@ struct test_rms_norm : public test_case {
|
|
|
939
1541
|
}
|
|
940
1542
|
|
|
941
1543
|
test_rms_norm(ggml_type type = GGML_TYPE_F32,
|
|
942
|
-
std::array<int64_t, 4> ne = {64,
|
|
1544
|
+
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
943
1545
|
float eps = 1e-6f)
|
|
944
1546
|
: type(type), ne(ne), eps(eps) {}
|
|
945
1547
|
|
|
946
1548
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
947
1549
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1550
|
+
ggml_set_param(ctx, a);
|
|
1551
|
+
ggml_set_name(a, "a");
|
|
1552
|
+
|
|
948
1553
|
ggml_tensor * out = ggml_rms_norm(ctx, a, eps);
|
|
1554
|
+
ggml_set_name(out, "out");
|
|
1555
|
+
|
|
1556
|
+
return out;
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
bool grad_precise() override {
|
|
1560
|
+
return true;
|
|
1561
|
+
}
|
|
1562
|
+
};
|
|
1563
|
+
|
|
1564
|
+
// GGML_OP_SSM_CONV
|
|
1565
|
+
struct test_ssm_conv : public test_case {
|
|
1566
|
+
const ggml_type type;
|
|
1567
|
+
const std::array<int64_t, 4> ne_a;
|
|
1568
|
+
const std::array<int64_t, 4> ne_b;
|
|
1569
|
+
|
|
1570
|
+
std::string vars() override {
|
|
1571
|
+
return VARS_TO_STR3(type, ne_a, ne_b);
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
test_ssm_conv(ggml_type type = GGML_TYPE_F32,
|
|
1575
|
+
std::array<int64_t, 4> ne_a = {10, 10, 10, 1},
|
|
1576
|
+
std::array<int64_t, 4> ne_b = {3, 3, 1, 1})
|
|
1577
|
+
: type(type), ne_a(ne_a), ne_b(ne_b) {}
|
|
1578
|
+
|
|
1579
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1580
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
1581
|
+
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
1582
|
+
ggml_tensor * out = ggml_ssm_conv(ctx, a, b);
|
|
1583
|
+
return out;
|
|
1584
|
+
}
|
|
1585
|
+
};
|
|
1586
|
+
|
|
1587
|
+
// GGML_OP_SSM_SCAN
|
|
1588
|
+
struct test_ssm_scan : public test_case {
|
|
1589
|
+
const ggml_type type;
|
|
1590
|
+
|
|
1591
|
+
const int64_t d_state;
|
|
1592
|
+
const int64_t d_inner;
|
|
1593
|
+
const int64_t n_seq_tokens;
|
|
1594
|
+
const int64_t n_seqs;
|
|
1595
|
+
|
|
1596
|
+
std::string vars() override {
|
|
1597
|
+
return VARS_TO_STR5(type, d_state, d_inner, n_seq_tokens, n_seqs);
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
test_ssm_scan(ggml_type type = GGML_TYPE_F32,
|
|
1601
|
+
int64_t d_state = 32, int64_t d_inner = 32, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
1602
|
+
: type(type), d_state(d_state), d_inner(d_inner), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
1603
|
+
|
|
1604
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1605
|
+
ggml_tensor * s = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, n_seqs, 1 }.data());
|
|
1606
|
+
ggml_tensor * x = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
1607
|
+
ggml_tensor * dt = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
1608
|
+
ggml_tensor * A = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, 1 , 1 }.data());
|
|
1609
|
+
ggml_tensor * B = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
1610
|
+
ggml_tensor * C = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
1611
|
+
ggml_tensor * out = ggml_ssm_scan(ctx, s, x, dt, A, B, C);
|
|
1612
|
+
return out;
|
|
1613
|
+
}
|
|
1614
|
+
};
|
|
1615
|
+
|
|
1616
|
+
// GGML_OP_RWKV_WKV
|
|
1617
|
+
struct test_rwkv_wkv : public test_case {
|
|
1618
|
+
const ggml_type type;
|
|
1619
|
+
|
|
1620
|
+
const int64_t head_count;
|
|
1621
|
+
const int64_t head_size;
|
|
1622
|
+
const int64_t n_seq_tokens;
|
|
1623
|
+
const int64_t n_seqs;
|
|
1624
|
+
|
|
1625
|
+
std::string vars() override {
|
|
1626
|
+
return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs);
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
test_rwkv_wkv(ggml_type type = GGML_TYPE_F32,
|
|
1630
|
+
int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
1631
|
+
: type(type), head_count(head_count), head_size(head_size), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
1632
|
+
|
|
1633
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1634
|
+
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
|
1635
|
+
ggml_tensor * r = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
|
1636
|
+
ggml_tensor * k = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ head_size, 1, head_count, n_tokens }.data());
|
|
1637
|
+
ggml_tensor * v = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
|
1638
|
+
ggml_tensor * tf = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size, head_count }.data());
|
|
1639
|
+
ggml_tensor * td = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
|
1640
|
+
ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
1641
|
+
ggml_tensor * out = ggml_rwkv_wkv(ctx, k, v, r, tf, td, s);
|
|
949
1642
|
return out;
|
|
950
1643
|
}
|
|
951
1644
|
};
|
|
@@ -968,13 +1661,9 @@ struct test_mul_mat : public test_case {
|
|
|
968
1661
|
return 5e-4;
|
|
969
1662
|
}
|
|
970
1663
|
|
|
971
|
-
|
|
972
|
-
size_t a = ggml_nbytes(t->src[0]) * n * nr[0] * nr[1];
|
|
973
|
-
size_t b = ggml_nbytes(t->src[1]) * m;
|
|
974
|
-
size_t c = ggml_nbytes(t);
|
|
975
|
-
return a + b + c;
|
|
976
|
-
|
|
1664
|
+
uint64_t op_flops(ggml_tensor * t) override {
|
|
977
1665
|
GGML_UNUSED(t);
|
|
1666
|
+
return 2 * m * n * k * bs[0] * nr[0] * bs[1] * nr[1];
|
|
978
1667
|
}
|
|
979
1668
|
|
|
980
1669
|
test_mul_mat(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
@@ -987,7 +1676,14 @@ struct test_mul_mat : public test_case {
|
|
|
987
1676
|
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
|
988
1677
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0] , bs[1]);
|
|
989
1678
|
ggml_tensor * b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
1679
|
+
ggml_set_param(ctx, a);
|
|
1680
|
+
ggml_set_param(ctx, b);
|
|
1681
|
+
ggml_set_name(a, "a");
|
|
1682
|
+
ggml_set_name(b, "b");
|
|
1683
|
+
|
|
990
1684
|
ggml_tensor * out = ggml_mul_mat(ctx, a, b);
|
|
1685
|
+
ggml_set_name(out, "out");
|
|
1686
|
+
|
|
991
1687
|
return out;
|
|
992
1688
|
}
|
|
993
1689
|
};
|
|
@@ -1011,13 +1707,9 @@ struct test_mul_mat_id : public test_case {
|
|
|
1011
1707
|
return 5e-4;
|
|
1012
1708
|
}
|
|
1013
1709
|
|
|
1014
|
-
|
|
1015
|
-
size_t a = ggml_nbytes(t->src[2]) * n;
|
|
1016
|
-
size_t b = ggml_nbytes(t->src[1]) * m;
|
|
1017
|
-
size_t c = ggml_nbytes(t);
|
|
1018
|
-
return a + b + c;
|
|
1019
|
-
|
|
1710
|
+
uint64_t op_flops(ggml_tensor * t) override {
|
|
1020
1711
|
GGML_UNUSED(t);
|
|
1712
|
+
return 2 * m * k * n * n_used;
|
|
1021
1713
|
}
|
|
1022
1714
|
|
|
1023
1715
|
test_mul_mat_id(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
@@ -1031,12 +1723,21 @@ struct test_mul_mat_id : public test_case {
|
|
|
1031
1723
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1032
1724
|
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
|
1033
1725
|
ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
|
|
1726
|
+
ggml_set_name(as, "as");
|
|
1727
|
+
|
|
1034
1728
|
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
|
|
1729
|
+
ggml_set_name(ids, "ids");
|
|
1035
1730
|
if (n_used != n_mats) {
|
|
1036
1731
|
ids = ggml_view_2d(ctx, ids, n_used, n, ids->nb[1], 0);
|
|
1732
|
+
ggml_set_name(ids, "view_of_ids");
|
|
1037
1733
|
}
|
|
1734
|
+
|
|
1038
1735
|
ggml_tensor * b = ggml_new_tensor_3d(ctx, type_b, k, this->b ? 1 : n_used, n);
|
|
1736
|
+
ggml_set_name(b, "b");
|
|
1737
|
+
|
|
1039
1738
|
ggml_tensor * out = ggml_mul_mat_id(ctx, as, b, ids);
|
|
1739
|
+
ggml_set_name(out, "out");
|
|
1740
|
+
|
|
1040
1741
|
return out;
|
|
1041
1742
|
}
|
|
1042
1743
|
|
|
@@ -1062,8 +1763,157 @@ struct test_mul_mat_id : public test_case {
|
|
|
1062
1763
|
}
|
|
1063
1764
|
};
|
|
1064
1765
|
|
|
1065
|
-
//
|
|
1066
|
-
struct
|
|
1766
|
+
// GGML_OP_OUT_PROD
|
|
1767
|
+
struct test_out_prod : public test_case {
|
|
1768
|
+
const ggml_type type_a;
|
|
1769
|
+
const ggml_type type_b;
|
|
1770
|
+
const int64_t m;
|
|
1771
|
+
const int64_t n;
|
|
1772
|
+
const int64_t k;
|
|
1773
|
+
const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
1774
|
+
const bool trans_b;
|
|
1775
|
+
|
|
1776
|
+
std::string vars() override {
|
|
1777
|
+
return VARS_TO_STR7(type_a, type_b, m, n, k, bs, trans_b);
|
|
1778
|
+
}
|
|
1779
|
+
|
|
1780
|
+
double max_nmse_err() override {
|
|
1781
|
+
return 5e-4;
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
1785
|
+
int64_t m = 32, int64_t n = 32, int64_t k = 32,
|
|
1786
|
+
std::array<int64_t, 2> bs = {10, 10},
|
|
1787
|
+
bool trans_b = false)
|
|
1788
|
+
: type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), trans_b(trans_b) {}
|
|
1789
|
+
|
|
1790
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1791
|
+
ggml_tensor * a = ggml_new_tensor_4d(ctx, type_a, m, k, bs[0], bs[1]);
|
|
1792
|
+
ggml_set_name(a, "a");
|
|
1793
|
+
|
|
1794
|
+
ggml_tensor * b;
|
|
1795
|
+
if (trans_b) {
|
|
1796
|
+
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0], bs[1]);
|
|
1797
|
+
b = ggml_transpose(ctx, b);
|
|
1798
|
+
} else {
|
|
1799
|
+
b = ggml_new_tensor_4d(ctx, type_b, n, k, bs[0], bs[1]);
|
|
1800
|
+
}
|
|
1801
|
+
ggml_set_name(b, "b");
|
|
1802
|
+
|
|
1803
|
+
ggml_tensor * out = ggml_out_prod(ctx, a, b);
|
|
1804
|
+
ggml_set_name(out, "out");
|
|
1805
|
+
|
|
1806
|
+
return out;
|
|
1807
|
+
}
|
|
1808
|
+
};
|
|
1809
|
+
|
|
1810
|
+
// GGML_OP_SQR
|
|
1811
|
+
struct test_sqr : public test_case {
|
|
1812
|
+
const ggml_type type;
|
|
1813
|
+
const std::array<int64_t, 4> ne;
|
|
1814
|
+
|
|
1815
|
+
std::string vars() override {
|
|
1816
|
+
return VARS_TO_STR2(type, ne);
|
|
1817
|
+
}
|
|
1818
|
+
|
|
1819
|
+
test_sqr(ggml_type type = GGML_TYPE_F32,
|
|
1820
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1821
|
+
: type(type), ne(ne) {}
|
|
1822
|
+
|
|
1823
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1824
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1825
|
+
ggml_set_param(ctx, a);
|
|
1826
|
+
ggml_set_name(a, "a");
|
|
1827
|
+
|
|
1828
|
+
ggml_tensor * out = ggml_sqr(ctx, a);
|
|
1829
|
+
ggml_set_name(out, "out");
|
|
1830
|
+
|
|
1831
|
+
return out;
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
float grad_eps() override {
|
|
1835
|
+
return 0.1f * 0.25f*ne[0]*ne[1]*ne[2]*ne[3]; // 10% of expected value of sum.
|
|
1836
|
+
}
|
|
1837
|
+
};
|
|
1838
|
+
|
|
1839
|
+
// GGML_OP_SQRT
|
|
1840
|
+
struct test_sqrt : public test_case {
|
|
1841
|
+
const ggml_type type;
|
|
1842
|
+
const std::array<int64_t, 4> ne;
|
|
1843
|
+
|
|
1844
|
+
std::string vars() override {
|
|
1845
|
+
return VARS_TO_STR2(type, ne);
|
|
1846
|
+
}
|
|
1847
|
+
|
|
1848
|
+
test_sqrt(ggml_type type = GGML_TYPE_F32,
|
|
1849
|
+
std::array<int64_t, 4> ne = {10, 3, 3, 2})
|
|
1850
|
+
: type(type), ne(ne) {}
|
|
1851
|
+
|
|
1852
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1853
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1854
|
+
ggml_set_param(ctx, a);
|
|
1855
|
+
ggml_set_name(a, "a");
|
|
1856
|
+
|
|
1857
|
+
ggml_tensor * out = ggml_sqrt(ctx, a);
|
|
1858
|
+
ggml_set_name(out, "out");
|
|
1859
|
+
|
|
1860
|
+
return out;
|
|
1861
|
+
}
|
|
1862
|
+
|
|
1863
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
1864
|
+
// fill with positive values
|
|
1865
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1866
|
+
init_tensor_uniform(t, 50.0f, 100.0f);
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
float grad_eps() override {
|
|
1871
|
+
return 20.0f;
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
bool grad_precise() override {
|
|
1875
|
+
return true;
|
|
1876
|
+
}
|
|
1877
|
+
};
|
|
1878
|
+
|
|
1879
|
+
// GGML_OP_LOG
|
|
1880
|
+
struct test_log : public test_case {
|
|
1881
|
+
const ggml_type type;
|
|
1882
|
+
const std::array<int64_t, 4> ne;
|
|
1883
|
+
|
|
1884
|
+
std::string vars() override {
|
|
1885
|
+
return VARS_TO_STR2(type, ne);
|
|
1886
|
+
}
|
|
1887
|
+
|
|
1888
|
+
test_log(ggml_type type = GGML_TYPE_F32,
|
|
1889
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1890
|
+
: type(type), ne(ne) {}
|
|
1891
|
+
|
|
1892
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1893
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1894
|
+
ggml_set_param(ctx, a);
|
|
1895
|
+
ggml_set_name(a, "a");
|
|
1896
|
+
|
|
1897
|
+
ggml_tensor * out = ggml_log(ctx, a);
|
|
1898
|
+
ggml_set_name(out, "out");
|
|
1899
|
+
|
|
1900
|
+
return out;
|
|
1901
|
+
}
|
|
1902
|
+
|
|
1903
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
1904
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1905
|
+
// log(1) == 0, cluster values there to keep the sum low for better precision in the backward pass:
|
|
1906
|
+
init_tensor_uniform(t, 0.9f, 1.1f);
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
bool grad_precise() override {
|
|
1911
|
+
return true;
|
|
1912
|
+
}
|
|
1913
|
+
};
|
|
1914
|
+
|
|
1915
|
+
// GGML_OP_SIN
|
|
1916
|
+
struct test_sin : public test_case {
|
|
1067
1917
|
const ggml_type type;
|
|
1068
1918
|
const std::array<int64_t, 4> ne;
|
|
1069
1919
|
|
|
@@ -1071,19 +1921,42 @@ struct test_sqr : public test_case {
|
|
|
1071
1921
|
return VARS_TO_STR2(type, ne);
|
|
1072
1922
|
}
|
|
1073
1923
|
|
|
1074
|
-
|
|
1075
|
-
std::array<int64_t, 4> ne = {10,
|
|
1924
|
+
test_sin(ggml_type type = GGML_TYPE_F32,
|
|
1925
|
+
std::array<int64_t, 4> ne = {10, 2, 2, 2})
|
|
1076
1926
|
: type(type), ne(ne) {}
|
|
1077
1927
|
|
|
1078
1928
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1079
1929
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1080
|
-
|
|
1930
|
+
ggml_set_param(ctx, a);
|
|
1931
|
+
ggml_set_name(a, "a");
|
|
1932
|
+
|
|
1933
|
+
ggml_tensor * out = ggml_sin(ctx, a);
|
|
1934
|
+
ggml_set_name(out, "out");
|
|
1935
|
+
|
|
1081
1936
|
return out;
|
|
1082
1937
|
}
|
|
1938
|
+
|
|
1939
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
1940
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1941
|
+
init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
|
|
1945
|
+
double max_maa_err() override {
|
|
1946
|
+
return 1e-3;
|
|
1947
|
+
}
|
|
1948
|
+
|
|
1949
|
+
float grad_eps() override {
|
|
1950
|
+
return 0.2f;
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
bool grad_precise() override {
|
|
1954
|
+
return true;
|
|
1955
|
+
}
|
|
1083
1956
|
};
|
|
1084
1957
|
|
|
1085
|
-
//
|
|
1086
|
-
struct
|
|
1958
|
+
// GGML_OP_COS
|
|
1959
|
+
struct test_cos : public test_case {
|
|
1087
1960
|
const ggml_type type;
|
|
1088
1961
|
const std::array<int64_t, 4> ne;
|
|
1089
1962
|
|
|
@@ -1091,22 +1964,38 @@ struct test_sqrt : public test_case {
|
|
|
1091
1964
|
return VARS_TO_STR2(type, ne);
|
|
1092
1965
|
}
|
|
1093
1966
|
|
|
1094
|
-
|
|
1095
|
-
std::array<int64_t, 4> ne = {10,
|
|
1967
|
+
test_cos(ggml_type type = GGML_TYPE_F32,
|
|
1968
|
+
std::array<int64_t, 4> ne = {10, 2, 2, 2})
|
|
1096
1969
|
: type(type), ne(ne) {}
|
|
1097
1970
|
|
|
1098
1971
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1099
1972
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1100
|
-
|
|
1973
|
+
ggml_set_param(ctx, a);
|
|
1974
|
+
ggml_set_name(a, "a");
|
|
1975
|
+
|
|
1976
|
+
ggml_tensor * out = ggml_cos(ctx, a);
|
|
1977
|
+
ggml_set_name(out, "out");
|
|
1978
|
+
|
|
1101
1979
|
return out;
|
|
1102
1980
|
}
|
|
1103
1981
|
|
|
1104
1982
|
void initialize_tensors(ggml_context * ctx) override {
|
|
1105
|
-
// fill with positive values
|
|
1106
1983
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
1107
|
-
init_tensor_uniform(t,
|
|
1984
|
+
init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
1108
1985
|
}
|
|
1109
1986
|
}
|
|
1987
|
+
|
|
1988
|
+
double max_maa_err() override {
|
|
1989
|
+
return 1e-3;
|
|
1990
|
+
}
|
|
1991
|
+
|
|
1992
|
+
float grad_eps() override {
|
|
1993
|
+
return 0.2f;
|
|
1994
|
+
}
|
|
1995
|
+
|
|
1996
|
+
bool grad_precise() override {
|
|
1997
|
+
return true;
|
|
1998
|
+
}
|
|
1110
1999
|
};
|
|
1111
2000
|
|
|
1112
2001
|
// GGML_OP_CLAMP
|
|
@@ -1121,15 +2010,27 @@ struct test_clamp : public test_case {
|
|
|
1121
2010
|
}
|
|
1122
2011
|
|
|
1123
2012
|
test_clamp(ggml_type type = GGML_TYPE_F32,
|
|
1124
|
-
std::array<int64_t, 4> ne = {10,
|
|
2013
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
1125
2014
|
float min = -0.5f, float max = 0.5f)
|
|
1126
2015
|
: type(type), ne(ne), min(min), max(max) {}
|
|
1127
2016
|
|
|
1128
2017
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1129
2018
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2019
|
+
ggml_set_name(a, "a");
|
|
2020
|
+
|
|
1130
2021
|
ggml_tensor * out = ggml_clamp(ctx, a, min, max);
|
|
2022
|
+
ggml_set_name(out, "out");
|
|
2023
|
+
|
|
1131
2024
|
return out;
|
|
1132
2025
|
}
|
|
2026
|
+
|
|
2027
|
+
float grad_eps() override {
|
|
2028
|
+
return 1e-2f;
|
|
2029
|
+
}
|
|
2030
|
+
|
|
2031
|
+
std::vector<float> grad_expect() override {
|
|
2032
|
+
return {0.0f, 1.0f};
|
|
2033
|
+
}
|
|
1133
2034
|
};
|
|
1134
2035
|
|
|
1135
2036
|
// GGML_OP_DIAG_MASK_INF
|
|
@@ -1143,13 +2044,18 @@ struct test_diag_mask_inf : public test_case {
|
|
|
1143
2044
|
}
|
|
1144
2045
|
|
|
1145
2046
|
test_diag_mask_inf(ggml_type type = GGML_TYPE_F32,
|
|
1146
|
-
std::array<int64_t, 4> ne = {10, 10,
|
|
2047
|
+
std::array<int64_t, 4> ne = {10, 10, 3, 2},
|
|
1147
2048
|
int n_past = 5)
|
|
1148
2049
|
: type(type), ne(ne), n_past(n_past) {}
|
|
1149
2050
|
|
|
1150
2051
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1151
2052
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2053
|
+
ggml_set_param(ctx, a);
|
|
2054
|
+
ggml_set_name(a, "a");
|
|
2055
|
+
|
|
1152
2056
|
ggml_tensor * out = ggml_diag_mask_inf(ctx, a, n_past);
|
|
2057
|
+
ggml_set_name(out, "out");
|
|
2058
|
+
|
|
1153
2059
|
return out;
|
|
1154
2060
|
}
|
|
1155
2061
|
};
|
|
@@ -1173,7 +2079,7 @@ struct test_soft_max : public test_case {
|
|
|
1173
2079
|
}
|
|
1174
2080
|
|
|
1175
2081
|
test_soft_max(ggml_type type = GGML_TYPE_F32,
|
|
1176
|
-
std::array<int64_t, 4> ne = {10,
|
|
2082
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
1177
2083
|
bool mask = false,
|
|
1178
2084
|
float scale = 1.0f,
|
|
1179
2085
|
float max_bias = 0.0f)
|
|
@@ -1181,13 +2087,24 @@ struct test_soft_max : public test_case {
|
|
|
1181
2087
|
|
|
1182
2088
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1183
2089
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2090
|
+
ggml_set_param(ctx, a);
|
|
2091
|
+
ggml_set_name(a, "a");
|
|
2092
|
+
|
|
1184
2093
|
ggml_tensor * mask = nullptr;
|
|
1185
2094
|
if (this->mask) {
|
|
1186
2095
|
mask = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne[0], ne[1]);
|
|
2096
|
+
ggml_set_name(mask, "mask");
|
|
1187
2097
|
}
|
|
2098
|
+
|
|
1188
2099
|
ggml_tensor * out = ggml_soft_max_ext(ctx, a, mask, scale, max_bias);
|
|
2100
|
+
ggml_set_name(out, "out");
|
|
2101
|
+
|
|
1189
2102
|
return out;
|
|
1190
2103
|
}
|
|
2104
|
+
|
|
2105
|
+
bool grad_precise() override {
|
|
2106
|
+
return true;
|
|
2107
|
+
}
|
|
1191
2108
|
};
|
|
1192
2109
|
|
|
1193
2110
|
|
|
@@ -1209,7 +2126,7 @@ struct test_rope : public test_case {
|
|
|
1209
2126
|
}
|
|
1210
2127
|
|
|
1211
2128
|
test_rope(ggml_type type = GGML_TYPE_F32,
|
|
1212
|
-
std::array<int64_t, 4> ne_a = {10,
|
|
2129
|
+
std::array<int64_t, 4> ne_a = {10, 5, 3, 1},
|
|
1213
2130
|
int n_dims = 10, int mode = 0, int n_ctx = 512, float fs = 1.0f, float ef = 0.0f, float af = 0.0f, bool ff = false, int v = 0)
|
|
1214
2131
|
: type(type), ne_a(ne_a), n_dims(n_dims), mode(mode), n_ctx(n_ctx), fs(fs), ef(ef), af(af), ff(ff), v(v) {}
|
|
1215
2132
|
|
|
@@ -1218,13 +2135,29 @@ struct test_rope : public test_case {
|
|
|
1218
2135
|
if (v & 1) {
|
|
1219
2136
|
auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
1220
2137
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2138
|
+
ggml_set_param(ctx, a);
|
|
2139
|
+
ggml_set_name(a, "a");
|
|
2140
|
+
|
|
1221
2141
|
a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
2142
|
+
ggml_set_name(a, "view_of_a");
|
|
1222
2143
|
} else {
|
|
1223
2144
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2145
|
+
ggml_set_param(ctx, a);
|
|
2146
|
+
ggml_set_name(a, "a");
|
|
1224
2147
|
}
|
|
2148
|
+
|
|
1225
2149
|
ggml_tensor * pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, ne_a[2]);
|
|
1226
|
-
|
|
2150
|
+
ggml_set_name(pos, "pos");
|
|
2151
|
+
|
|
2152
|
+
ggml_tensor * freq = nullptr;
|
|
2153
|
+
if (ff) {
|
|
2154
|
+
freq = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_dims/2);
|
|
2155
|
+
ggml_set_name(freq, "freq");
|
|
2156
|
+
}
|
|
2157
|
+
|
|
1227
2158
|
ggml_tensor * out = ggml_rope_ext(ctx, a, pos, freq, n_dims, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
2159
|
+
ggml_set_name(out, "out");
|
|
2160
|
+
|
|
1228
2161
|
return out;
|
|
1229
2162
|
}
|
|
1230
2163
|
|
|
@@ -1247,6 +2180,14 @@ struct test_rope : public test_case {
|
|
|
1247
2180
|
}
|
|
1248
2181
|
}
|
|
1249
2182
|
}
|
|
2183
|
+
|
|
2184
|
+
double max_maa_err() override {
|
|
2185
|
+
return 1e-3;
|
|
2186
|
+
}
|
|
2187
|
+
|
|
2188
|
+
bool grad_precise() override {
|
|
2189
|
+
return true;
|
|
2190
|
+
}
|
|
1250
2191
|
};
|
|
1251
2192
|
|
|
1252
2193
|
// GGML_OP_POOL2D
|
|
@@ -1278,7 +2219,12 @@ struct test_pool2d : public test_case {
|
|
|
1278
2219
|
|
|
1279
2220
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1280
2221
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2222
|
+
ggml_set_param(ctx, input);
|
|
2223
|
+
ggml_set_name(input, "input");
|
|
2224
|
+
|
|
1281
2225
|
ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1);
|
|
2226
|
+
ggml_set_name(out, "out");
|
|
2227
|
+
|
|
1282
2228
|
return out;
|
|
1283
2229
|
}
|
|
1284
2230
|
};
|
|
@@ -1303,8 +2249,14 @@ struct test_conv_transpose_1d : public test_case {
|
|
|
1303
2249
|
|
|
1304
2250
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1305
2251
|
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
|
2252
|
+
ggml_set_name(input, "input");
|
|
2253
|
+
|
|
1306
2254
|
ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
|
|
2255
|
+
ggml_set_name(kernel, "kernel");
|
|
2256
|
+
|
|
1307
2257
|
ggml_tensor * out = ggml_conv_transpose_1d(ctx, kernel, input, s0, p0, d0);
|
|
2258
|
+
ggml_set_name(out, "out");
|
|
2259
|
+
|
|
1308
2260
|
return out;
|
|
1309
2261
|
}
|
|
1310
2262
|
};
|
|
@@ -1343,8 +2295,15 @@ struct test_im2col : public test_case {
|
|
|
1343
2295
|
|
|
1344
2296
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1345
2297
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2298
|
+
ggml_set_param(ctx, input);
|
|
2299
|
+
ggml_set_name(input, "input");
|
|
2300
|
+
|
|
1346
2301
|
ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
|
|
2302
|
+
ggml_set_name(kernel, "kernel");
|
|
2303
|
+
|
|
1347
2304
|
ggml_tensor * out = ggml_im2col(ctx, kernel, input, s0, s1, p0, p1, d0, d1, is_2D, dst_type);
|
|
2305
|
+
ggml_set_name(out, "out");
|
|
2306
|
+
|
|
1348
2307
|
return out;
|
|
1349
2308
|
}
|
|
1350
2309
|
};
|
|
@@ -1362,8 +2321,8 @@ struct test_concat : public test_case {
|
|
|
1362
2321
|
}
|
|
1363
2322
|
|
|
1364
2323
|
test_concat(ggml_type type = GGML_TYPE_F32,
|
|
1365
|
-
std::array<int64_t, 4> ne_a = {10,
|
|
1366
|
-
int64_t ne_b_d =
|
|
2324
|
+
std::array<int64_t, 4> ne_a = {10, 5, 5, 5},
|
|
2325
|
+
int64_t ne_b_d = 5,
|
|
1367
2326
|
int dim = 2, int v = 0)
|
|
1368
2327
|
: type(type), ne_a(ne_a), ne_b_d(ne_b_d), dim(dim), v(v) {}
|
|
1369
2328
|
|
|
@@ -1374,19 +2333,30 @@ struct test_concat : public test_case {
|
|
|
1374
2333
|
if (v & 1) {
|
|
1375
2334
|
auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
1376
2335
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2336
|
+
ggml_set_name(a, "a");
|
|
2337
|
+
|
|
1377
2338
|
a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
2339
|
+
ggml_set_name(a, "view_of_a");
|
|
1378
2340
|
} else {
|
|
1379
2341
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2342
|
+
ggml_set_name(a, "a");
|
|
1380
2343
|
}
|
|
1381
2344
|
ggml_tensor * b;
|
|
1382
2345
|
if (v & 2) {
|
|
1383
2346
|
auto ne = ne_b; ne[0] *= 3; ne[1] *= 2; ne[2] *= 4;
|
|
1384
2347
|
b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2348
|
+
ggml_set_name(b, "b");
|
|
2349
|
+
|
|
1385
2350
|
b = ggml_view_4d(ctx, b, ne_b[0], ne_b[1], ne_b[2], ne_b[3], b->nb[1], b->nb[2], b->nb[3], 0);
|
|
2351
|
+
ggml_set_name(b, "view_of_b");
|
|
1386
2352
|
} else {
|
|
1387
2353
|
b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
2354
|
+
ggml_set_name(b, "b");
|
|
1388
2355
|
}
|
|
2356
|
+
|
|
1389
2357
|
ggml_tensor * out = ggml_concat(ctx, a, b, dim);
|
|
2358
|
+
ggml_set_name(out, "out");
|
|
2359
|
+
|
|
1390
2360
|
return out;
|
|
1391
2361
|
}
|
|
1392
2362
|
};
|
|
@@ -1408,7 +2378,11 @@ struct test_argsort : public test_case {
|
|
|
1408
2378
|
|
|
1409
2379
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1410
2380
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2381
|
+
ggml_set_name(a, "a");
|
|
2382
|
+
|
|
1411
2383
|
ggml_tensor * out = ggml_argsort(ctx, a, order);
|
|
2384
|
+
ggml_set_name(out, "out");
|
|
2385
|
+
|
|
1412
2386
|
return out;
|
|
1413
2387
|
}
|
|
1414
2388
|
|
|
@@ -1441,6 +2415,35 @@ struct test_argsort : public test_case {
|
|
|
1441
2415
|
}
|
|
1442
2416
|
};
|
|
1443
2417
|
|
|
2418
|
+
// GGML_OP_SUM
|
|
2419
|
+
struct test_sum : public test_case {
|
|
2420
|
+
const ggml_type type;
|
|
2421
|
+
const std::array<int64_t, 4> ne;
|
|
2422
|
+
|
|
2423
|
+
std::string vars() override {
|
|
2424
|
+
return VARS_TO_STR2(type, ne);
|
|
2425
|
+
}
|
|
2426
|
+
|
|
2427
|
+
test_sum(ggml_type type = GGML_TYPE_F32,
|
|
2428
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
2429
|
+
: type(type), ne(ne) {}
|
|
2430
|
+
|
|
2431
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2432
|
+
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2433
|
+
ggml_set_param(ctx, a);
|
|
2434
|
+
ggml_set_name(a, "a");
|
|
2435
|
+
|
|
2436
|
+
ggml_tensor * out = ggml_sum(ctx, a);
|
|
2437
|
+
ggml_set_name(out, "out");
|
|
2438
|
+
|
|
2439
|
+
return out;
|
|
2440
|
+
}
|
|
2441
|
+
|
|
2442
|
+
float grad_eps() override {
|
|
2443
|
+
return 0.1f * sqrtf(ne[0]*ne[1]*ne[2]*ne[3]);
|
|
2444
|
+
}
|
|
2445
|
+
};
|
|
2446
|
+
|
|
1444
2447
|
// GGML_OP_SUM_ROWS
|
|
1445
2448
|
struct test_sum_rows : public test_case {
|
|
1446
2449
|
const ggml_type type;
|
|
@@ -1451,12 +2454,17 @@ struct test_sum_rows : public test_case {
|
|
|
1451
2454
|
}
|
|
1452
2455
|
|
|
1453
2456
|
test_sum_rows(ggml_type type = GGML_TYPE_F32,
|
|
1454
|
-
std::array<int64_t, 4> ne = {10,
|
|
2457
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
1455
2458
|
: type(type), ne(ne) {}
|
|
1456
2459
|
|
|
1457
2460
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1458
2461
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2462
|
+
ggml_set_param(ctx, a);
|
|
2463
|
+
ggml_set_name(a, "a");
|
|
2464
|
+
|
|
1459
2465
|
ggml_tensor * out = ggml_sum_rows(ctx, a);
|
|
2466
|
+
ggml_set_name(out, "out");
|
|
2467
|
+
|
|
1460
2468
|
return out;
|
|
1461
2469
|
}
|
|
1462
2470
|
};
|
|
@@ -1479,8 +2487,16 @@ struct test_upscale : public test_case {
|
|
|
1479
2487
|
|
|
1480
2488
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1481
2489
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1482
|
-
|
|
2490
|
+
ggml_set_name(a, "a");
|
|
2491
|
+
|
|
2492
|
+
if (transpose) {
|
|
2493
|
+
a = ggml_transpose(ctx, a);
|
|
2494
|
+
ggml_set_name(a, "a_transposed");
|
|
2495
|
+
}
|
|
2496
|
+
|
|
1483
2497
|
ggml_tensor * out = ggml_upscale(ctx, a, scale_factor);
|
|
2498
|
+
ggml_set_name(out, "out");
|
|
2499
|
+
|
|
1484
2500
|
return out;
|
|
1485
2501
|
}
|
|
1486
2502
|
};
|
|
@@ -1502,7 +2518,11 @@ struct test_upscale_ext : public test_case {
|
|
|
1502
2518
|
|
|
1503
2519
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1504
2520
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2521
|
+
ggml_set_name(a, "a");
|
|
2522
|
+
|
|
1505
2523
|
ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3]);
|
|
2524
|
+
ggml_set_name(out, "out");
|
|
2525
|
+
|
|
1506
2526
|
return out;
|
|
1507
2527
|
}
|
|
1508
2528
|
};
|
|
@@ -1512,6 +2532,7 @@ struct test_group_norm : public test_case {
|
|
|
1512
2532
|
const ggml_type type;
|
|
1513
2533
|
const std::array<int64_t, 4> ne;
|
|
1514
2534
|
const int32_t num_groups;
|
|
2535
|
+
const float eps;
|
|
1515
2536
|
|
|
1516
2537
|
std::string vars() override {
|
|
1517
2538
|
return VARS_TO_STR3(type, ne, num_groups);
|
|
@@ -1519,12 +2540,17 @@ struct test_group_norm : public test_case {
|
|
|
1519
2540
|
|
|
1520
2541
|
test_group_norm(ggml_type type = GGML_TYPE_F32,
|
|
1521
2542
|
std::array<int64_t, 4> ne = {64, 64, 320, 1},
|
|
1522
|
-
int32_t num_groups = 32
|
|
1523
|
-
|
|
2543
|
+
int32_t num_groups = 32,
|
|
2544
|
+
float eps = 1e-6f)
|
|
2545
|
+
: type(type), ne(ne), num_groups(num_groups), eps(eps) {}
|
|
1524
2546
|
|
|
1525
2547
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1526
2548
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1527
|
-
|
|
2549
|
+
ggml_set_name(a, "a");
|
|
2550
|
+
|
|
2551
|
+
ggml_tensor * out = ggml_group_norm(ctx, a, num_groups, eps);
|
|
2552
|
+
ggml_set_name(out, "out");
|
|
2553
|
+
|
|
1528
2554
|
return out;
|
|
1529
2555
|
}
|
|
1530
2556
|
};
|
|
@@ -1540,14 +2566,22 @@ struct test_acc : public test_case {
|
|
|
1540
2566
|
}
|
|
1541
2567
|
|
|
1542
2568
|
test_acc(ggml_type type = GGML_TYPE_F32,
|
|
1543
|
-
std::array<int64_t, 4> ne_a = {
|
|
1544
|
-
std::array<int64_t, 4> ne_b = {
|
|
2569
|
+
std::array<int64_t, 4> ne_a = {256, 17, 1, 1},
|
|
2570
|
+
std::array<int64_t, 4> ne_b = {256, 16, 1, 1})
|
|
1545
2571
|
: type(type), ne_a(ne_a), ne_b(ne_b) {}
|
|
1546
2572
|
|
|
1547
2573
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1548
2574
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2575
|
+
ggml_set_param(ctx, a);
|
|
2576
|
+
ggml_set_name(a, "a");
|
|
2577
|
+
|
|
1549
2578
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
2579
|
+
ggml_set_param(ctx, b);
|
|
2580
|
+
ggml_set_name(b, "b");
|
|
2581
|
+
|
|
1550
2582
|
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
|
|
2583
|
+
ggml_set_name(out, "out");
|
|
2584
|
+
|
|
1551
2585
|
return out;
|
|
1552
2586
|
}
|
|
1553
2587
|
};
|
|
@@ -1570,7 +2604,11 @@ struct test_pad : public test_case {
|
|
|
1570
2604
|
|
|
1571
2605
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1572
2606
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2607
|
+
ggml_set_name(a, "a");
|
|
2608
|
+
|
|
1573
2609
|
ggml_tensor * out = ggml_pad(ctx, a, pad_0, pad_1, 0, 0);
|
|
2610
|
+
ggml_set_name(out, "out");
|
|
2611
|
+
|
|
1574
2612
|
return out;
|
|
1575
2613
|
}
|
|
1576
2614
|
};
|
|
@@ -1592,6 +2630,8 @@ struct test_arange : public test_case {
|
|
|
1592
2630
|
|
|
1593
2631
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1594
2632
|
ggml_tensor * out = ggml_arange(ctx, start, stop, step);
|
|
2633
|
+
ggml_set_name(out, "out");
|
|
2634
|
+
|
|
1595
2635
|
return out;
|
|
1596
2636
|
}
|
|
1597
2637
|
};
|
|
@@ -1614,7 +2654,11 @@ struct test_timestep_embedding : public test_case {
|
|
|
1614
2654
|
|
|
1615
2655
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1616
2656
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2657
|
+
ggml_set_name(a, "a");
|
|
2658
|
+
|
|
1617
2659
|
ggml_tensor * out = ggml_timestep_embedding(ctx, a, dim, max_period);
|
|
2660
|
+
ggml_set_name(out, "out");
|
|
2661
|
+
|
|
1618
2662
|
return out;
|
|
1619
2663
|
}
|
|
1620
2664
|
};
|
|
@@ -1630,13 +2674,17 @@ struct test_leaky_relu : public test_case {
|
|
|
1630
2674
|
}
|
|
1631
2675
|
|
|
1632
2676
|
test_leaky_relu(ggml_type type = GGML_TYPE_F32,
|
|
1633
|
-
std::array<int64_t, 4> ne_a = {10,
|
|
2677
|
+
std::array<int64_t, 4> ne_a = {10, 5, 4, 3},
|
|
1634
2678
|
float negative_slope = 0.1f)
|
|
1635
2679
|
: type(type), ne_a(ne_a), negative_slope(negative_slope) {}
|
|
1636
2680
|
|
|
1637
2681
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1638
2682
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2683
|
+
ggml_set_name(a, "a");
|
|
2684
|
+
|
|
1639
2685
|
ggml_tensor * out = ggml_leaky_relu(ctx, a, negative_slope, true);
|
|
2686
|
+
ggml_set_name(out, "out");
|
|
2687
|
+
|
|
1640
2688
|
return out;
|
|
1641
2689
|
}
|
|
1642
2690
|
};
|
|
@@ -1651,30 +2699,145 @@ struct test_flash_attn_ext : public test_case {
|
|
|
1651
2699
|
const bool mask; // use mask
|
|
1652
2700
|
|
|
1653
2701
|
const float max_bias; // ALiBi
|
|
2702
|
+
const float logit_softcap; // Gemma 2
|
|
1654
2703
|
|
|
1655
2704
|
const ggml_type type_KV;
|
|
1656
2705
|
|
|
1657
2706
|
std::string vars() override {
|
|
1658
|
-
return
|
|
2707
|
+
return VARS_TO_STR8(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV);
|
|
1659
2708
|
}
|
|
1660
2709
|
|
|
1661
2710
|
double max_nmse_err() override {
|
|
1662
2711
|
return 5e-4;
|
|
1663
2712
|
}
|
|
1664
2713
|
|
|
1665
|
-
test_flash_attn_ext(int64_t hs = 128, int64_t nh = 32, int64_t kv = 96, int64_t nb = 8,
|
|
1666
|
-
|
|
2714
|
+
test_flash_attn_ext(int64_t hs = 128, int64_t nh = 32, int64_t kv = 96, int64_t nb = 8,
|
|
2715
|
+
bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_type type_KV = GGML_TYPE_F16)
|
|
2716
|
+
: hs(hs), nh(nh), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), type_KV(type_KV) {}
|
|
1667
2717
|
|
|
1668
2718
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1669
2719
|
const int64_t hs_padded = GGML_PAD(hs, ggml_blck_size(type_KV));
|
|
1670
2720
|
|
|
1671
2721
|
ggml_tensor * q = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, hs_padded, nb, nh, 1);
|
|
2722
|
+
ggml_set_name(q, "q");
|
|
2723
|
+
|
|
1672
2724
|
ggml_tensor * k = ggml_new_tensor_4d(ctx, type_KV, hs_padded, kv, nh, 1);
|
|
2725
|
+
ggml_set_name(k, "k");
|
|
2726
|
+
|
|
1673
2727
|
ggml_tensor * v = ggml_new_tensor_4d(ctx, type_KV, hs_padded, kv, nh, 1);
|
|
1674
|
-
|
|
1675
|
-
|
|
2728
|
+
ggml_set_name(v, "v");
|
|
2729
|
+
|
|
2730
|
+
ggml_tensor * m = nullptr;
|
|
2731
|
+
if (mask) {
|
|
2732
|
+
m = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kv, GGML_PAD(nb, GGML_KQ_MASK_PAD), 1, 1);
|
|
2733
|
+
ggml_set_name(m, "m");
|
|
2734
|
+
}
|
|
2735
|
+
|
|
2736
|
+
ggml_tensor * out = ggml_flash_attn_ext(ctx, q, k, v, m, 1.0f/sqrtf(hs), max_bias, logit_softcap);
|
|
2737
|
+
ggml_set_name(out, "out");
|
|
2738
|
+
|
|
2739
|
+
return out;
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2742
|
+
bool grad_precise() override {
|
|
2743
|
+
return true;
|
|
2744
|
+
}
|
|
2745
|
+
};
|
|
2746
|
+
|
|
2747
|
+
// GGML_OP_CROSS_ENTROPY_LOSS
|
|
2748
|
+
struct test_cross_entropy_loss : public test_case {
|
|
2749
|
+
const ggml_type type;
|
|
2750
|
+
const std::array<int64_t, 4> ne;
|
|
2751
|
+
|
|
2752
|
+
std::string vars() override {
|
|
2753
|
+
return VARS_TO_STR2(type, ne);
|
|
2754
|
+
}
|
|
2755
|
+
|
|
2756
|
+
test_cross_entropy_loss(ggml_type type = GGML_TYPE_F32,
|
|
2757
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
2758
|
+
: type(type), ne(ne) {}
|
|
2759
|
+
|
|
2760
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2761
|
+
ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2762
|
+
ggml_set_param(ctx, logits);
|
|
2763
|
+
ggml_set_name(logits, "logits");
|
|
2764
|
+
|
|
2765
|
+
ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2766
|
+
// The labels are assumed to be constant -> no gradients.
|
|
2767
|
+
ggml_set_name(labels, "labels");
|
|
2768
|
+
|
|
2769
|
+
// Ensure labels add up to 1:
|
|
2770
|
+
labels = ggml_soft_max(ctx, labels);
|
|
2771
|
+
ggml_set_name(labels, "labels_normalized");
|
|
2772
|
+
|
|
2773
|
+
ggml_tensor * out = ggml_cross_entropy_loss(ctx, logits, labels);
|
|
2774
|
+
ggml_set_name(out, "out");
|
|
2775
|
+
|
|
2776
|
+
return out;
|
|
2777
|
+
}
|
|
2778
|
+
|
|
2779
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
2780
|
+
// For larger abs. diffs between logits softmax is more linear, therefore more precise num. gradients.
|
|
2781
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
2782
|
+
init_tensor_uniform(t, -100.0f, 100.0f);
|
|
2783
|
+
}
|
|
2784
|
+
}
|
|
2785
|
+
|
|
2786
|
+
float grad_eps() override {
|
|
2787
|
+
return 1.0f;
|
|
2788
|
+
}
|
|
2789
|
+
|
|
2790
|
+
bool grad_precise() override {
|
|
2791
|
+
return true;
|
|
2792
|
+
}
|
|
2793
|
+
};
|
|
2794
|
+
|
|
2795
|
+
// GGML_OP_OPT_STEP_ADAMW
|
|
2796
|
+
struct test_opt_step_adamw : public test_case {
|
|
2797
|
+
const ggml_type type;
|
|
2798
|
+
const std::array<int64_t, 4> ne;
|
|
2799
|
+
const float alpha;
|
|
2800
|
+
const float beta1;
|
|
2801
|
+
const float beta2;
|
|
2802
|
+
const float eps;
|
|
2803
|
+
const float wd;
|
|
2804
|
+
|
|
2805
|
+
std::string vars() override {
|
|
2806
|
+
return VARS_TO_STR7(type, ne, alpha, beta1, beta2, eps, wd);
|
|
2807
|
+
}
|
|
2808
|
+
|
|
2809
|
+
test_opt_step_adamw(ggml_type type = GGML_TYPE_F32,
|
|
2810
|
+
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
2811
|
+
float alpha = 1e-3f,
|
|
2812
|
+
float beta1 = 0.9f,
|
|
2813
|
+
float beta2 = 0.999f,
|
|
2814
|
+
float eps = 1e-8f,
|
|
2815
|
+
float wd = 0.0f)
|
|
2816
|
+
: type(type), ne(ne), alpha(alpha), beta1(beta1), beta2(beta2), eps(eps), wd(wd) {}
|
|
2817
|
+
|
|
2818
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2819
|
+
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
2820
|
+
ggml_set_param(ctx, a); // Despite tensor a having gradients the output tensor will not.
|
|
2821
|
+
ggml_set_name(a, "a");
|
|
2822
|
+
|
|
2823
|
+
ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
2824
|
+
ggml_set_name(grad, "grad");
|
|
2825
|
+
|
|
2826
|
+
ggml_tensor * out = ggml_opt_step_adamw(ctx, a, grad, alpha, beta1, beta2, eps, wd);
|
|
2827
|
+
ggml_set_name(out, "out");
|
|
2828
|
+
|
|
1676
2829
|
return out;
|
|
1677
2830
|
}
|
|
2831
|
+
|
|
2832
|
+
void initialize_tensors(ggml_context * ctx) override {
|
|
2833
|
+
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
2834
|
+
init_tensor_uniform(t, 0.0f, 1.0f); // grad_v needs non-negative values.
|
|
2835
|
+
}
|
|
2836
|
+
}
|
|
2837
|
+
|
|
2838
|
+
bool grad_precise() override {
|
|
2839
|
+
return true;
|
|
2840
|
+
}
|
|
1678
2841
|
};
|
|
1679
2842
|
|
|
1680
2843
|
enum llm_norm_type {
|
|
@@ -2061,48 +3224,55 @@ struct test_falcon : public test_llm {
|
|
|
2061
3224
|
}
|
|
2062
3225
|
};
|
|
2063
3226
|
|
|
2064
|
-
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name) {
|
|
2065
|
-
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
2066
|
-
std::default_random_engine rng(0);
|
|
2067
3227
|
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
3228
|
+
// ###########################################
|
|
3229
|
+
// ## Section 3: GGML Op Test Instantiation ##
|
|
3230
|
+
// ###########################################
|
|
3231
|
+
static const ggml_type all_types[] = {
|
|
3232
|
+
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
|
|
3233
|
+
GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
|
|
3234
|
+
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
3235
|
+
GGML_TYPE_Q8_0,
|
|
3236
|
+
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
|
|
3237
|
+
GGML_TYPE_Q4_K, GGML_TYPE_Q5_K,
|
|
3238
|
+
GGML_TYPE_Q6_K,
|
|
3239
|
+
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
|
|
3240
|
+
GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
|
3241
|
+
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
|
3242
|
+
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
3243
|
+
};
|
|
2080
3244
|
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
3245
|
+
static const ggml_type base_types[] = {
|
|
3246
|
+
GGML_TYPE_F32, GGML_TYPE_F16,
|
|
3247
|
+
GGML_TYPE_Q4_0,
|
|
3248
|
+
GGML_TYPE_Q4_K,
|
|
3249
|
+
GGML_TYPE_IQ2_XXS
|
|
3250
|
+
};
|
|
2087
3251
|
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
3252
|
+
static const ggml_type other_types[] = {
|
|
3253
|
+
GGML_TYPE_Q4_1,
|
|
3254
|
+
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
3255
|
+
GGML_TYPE_Q8_0,
|
|
3256
|
+
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
|
|
3257
|
+
GGML_TYPE_Q5_K,
|
|
3258
|
+
GGML_TYPE_Q6_K,
|
|
3259
|
+
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
|
|
3260
|
+
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
|
3261
|
+
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
|
3262
|
+
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
3263
|
+
GGML_TYPE_BF16,
|
|
3264
|
+
};
|
|
3265
|
+
|
|
3266
|
+
// Test cases for evaluation: should try to cover edge cases while using small input sizes to keep the runtime low
|
|
3267
|
+
static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
3268
|
+
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
3269
|
+
std::default_random_engine rng(0);
|
|
2100
3270
|
|
|
2101
3271
|
// unary ops
|
|
2102
3272
|
for (int v : {0, 1}) {
|
|
2103
3273
|
for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) {
|
|
2104
|
-
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 128,
|
|
2105
|
-
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, {
|
|
3274
|
+
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 128, 2, 2, 2 }, v));
|
|
3275
|
+
test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 5, 7, 11, 13 }, v));
|
|
2106
3276
|
}
|
|
2107
3277
|
}
|
|
2108
3278
|
|
|
@@ -2138,8 +3308,20 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2138
3308
|
}
|
|
2139
3309
|
}
|
|
2140
3310
|
|
|
3311
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32));
|
|
2141
3312
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
|
|
2142
3313
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
|
|
3314
|
+
// test cases for 1D im2col
|
|
3315
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
3316
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
3317
|
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
3318
|
+
|
|
3319
|
+
// sycl backend will limit task global_range < MAX_INT
|
|
3320
|
+
// test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
|
|
3321
|
+
// however these cases need to alloc more memory which may fail in some devices (Intel Arc770, etc.)
|
|
3322
|
+
// these cases are verified (pass) in Intel(R) Data Center GPU Max 1100 (sycl backend) and NV A30 (cuda backend)
|
|
3323
|
+
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
3324
|
+
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
2143
3325
|
|
|
2144
3326
|
test_cases.emplace_back(new test_conv_transpose_1d());
|
|
2145
3327
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
|
|
@@ -2150,14 +3332,18 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2150
3332
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
|
|
2151
3333
|
test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
|
|
2152
3334
|
|
|
3335
|
+
test_cases.emplace_back(new test_argmax());
|
|
3336
|
+
test_cases.emplace_back(new test_count_equal());
|
|
2153
3337
|
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
3338
|
+
for (int ne3 : {1, 3}) { // CUDA backward pass only supports ne3 == 1
|
|
3339
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 1}));
|
|
3340
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
|
3341
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 2, 1, 1}));
|
|
3342
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 2, 1}));
|
|
3343
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
3344
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_I32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
|
3345
|
+
test_cases.emplace_back(new test_repeat(GGML_TYPE_I16, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
3346
|
+
}
|
|
2161
3347
|
|
|
2162
3348
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32));
|
|
2163
3349
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
|
|
@@ -2167,8 +3353,12 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2167
3353
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {0, 2, 1, 3})); // dup by rows
|
|
2168
3354
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {1, 0, 2, 3}));
|
|
2169
3355
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {1, 0, 2, 3})); // dup dst not-contiguous
|
|
2170
|
-
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10,
|
|
2171
|
-
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10,
|
|
3356
|
+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
|
3357
|
+
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
|
3358
|
+
|
|
3359
|
+
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
|
3360
|
+
test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim));
|
|
3361
|
+
}
|
|
2172
3362
|
|
|
2173
3363
|
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
2174
3364
|
for (ggml_type type_dst : all_types) {
|
|
@@ -2183,6 +3373,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2183
3373
|
}
|
|
2184
3374
|
|
|
2185
3375
|
test_cases.emplace_back(new test_cont());
|
|
3376
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
|
|
3377
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
|
|
3378
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}));
|
|
3379
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}));
|
|
3380
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}));
|
|
3381
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));
|
|
3382
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}));
|
|
3383
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}));
|
|
3384
|
+
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
|
|
2186
3385
|
|
|
2187
3386
|
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
|
|
2188
3387
|
for (auto op : {ggml_add, ggml_mul, ggml_div}) {
|
|
@@ -2193,16 +3392,16 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2193
3392
|
add_test_bin_bcast(GGML_TYPE_F32, {1, 1, 8, 1}, {1, 1, 1, 1});
|
|
2194
3393
|
add_test_bin_bcast(GGML_TYPE_F32, {1, 1, 1, 1}, {32, 1, 1, 1});
|
|
2195
3394
|
add_test_bin_bcast(GGML_TYPE_F32, {1, 1, 320, 320}, {1, 1, 1, 1});
|
|
2196
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2197
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2198
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2199
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2200
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2201
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2202
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2203
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2204
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
2205
|
-
add_test_bin_bcast(GGML_TYPE_F32, {
|
|
3395
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 1, 1}, {1, 1, 1, 1});
|
|
3396
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 1}, {1, 1, 1, 1});
|
|
3397
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 1, 1});
|
|
3398
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {2, 1, 1, 1});
|
|
3399
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 2, 1, 1});
|
|
3400
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 1});
|
|
3401
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 1, 2});
|
|
3402
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 2});
|
|
3403
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {1, 2, 2, 2});
|
|
3404
|
+
add_test_bin_bcast(GGML_TYPE_F32, {10, 5, 4, 3}, {2, 2, 2, 2});
|
|
2206
3405
|
|
|
2207
3406
|
// stable diffusion
|
|
2208
3407
|
add_test_bin_bcast(GGML_TYPE_F32, {1280, 1, 1, 1}, {1, 1, 1, 1});
|
|
@@ -2221,13 +3420,25 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2221
3420
|
//add_test_bin_bcast(GGML_TYPE_F32, {3, 3, 2560, 1280}, {1, 1, 1, 1});
|
|
2222
3421
|
//add_test_bin_bcast(GGML_TYPE_F32, {3, 3, 2560, 1280}, {2, 1, 1, 1});
|
|
2223
3422
|
|
|
3423
|
+
test_cases.emplace_back(new test_add1());
|
|
2224
3424
|
test_cases.emplace_back(new test_scale());
|
|
2225
3425
|
|
|
2226
3426
|
for (float eps : {1e-6f, 1e-5f, 1e-3f, 1e-1f}) {
|
|
2227
|
-
test_cases.emplace_back(new test_norm(GGML_TYPE_F32, {64,
|
|
2228
|
-
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64,
|
|
3427
|
+
test_cases.emplace_back(new test_norm(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
3428
|
+
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
2229
3429
|
}
|
|
2230
3430
|
|
|
3431
|
+
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 1, 1}, {4, 1536, 1, 1}));
|
|
3432
|
+
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {8, 1536, 1, 1}, {4, 1536, 1, 1}));
|
|
3433
|
+
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 4, 1}, {4, 1536, 1, 1}));
|
|
3434
|
+
|
|
3435
|
+
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1024, 32, 4));
|
|
3436
|
+
|
|
3437
|
+
test_cases.emplace_back(new test_rwkv_wkv(GGML_TYPE_F32, 32, 64, 1, 1));
|
|
3438
|
+
test_cases.emplace_back(new test_rwkv_wkv(GGML_TYPE_F32, 32, 64, 32, 1));
|
|
3439
|
+
test_cases.emplace_back(new test_rwkv_wkv(GGML_TYPE_F32, 32, 64, 32, 4));
|
|
3440
|
+
test_cases.emplace_back(new test_rwkv_wkv(GGML_TYPE_F32, 32, 64, 128, 4));
|
|
3441
|
+
|
|
2231
3442
|
#if 1
|
|
2232
3443
|
for (ggml_type type_a : base_types) {
|
|
2233
3444
|
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
@@ -2248,6 +3459,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2248
3459
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {2, 2}));
|
|
2249
3460
|
}
|
|
2250
3461
|
}
|
|
3462
|
+
for (ggml_type type_a : other_types) {
|
|
3463
|
+
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
3464
|
+
if (ggml_blck_size(type_a) != 256) {
|
|
3465
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, ggml_blck_size(type_a), {1, 1}, {1, 1}));
|
|
3466
|
+
}
|
|
3467
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
|
|
3468
|
+
}
|
|
3469
|
+
}
|
|
2251
3470
|
#else
|
|
2252
3471
|
// m = a rows
|
|
2253
3472
|
// n = b rows
|
|
@@ -2267,12 +3486,6 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2267
3486
|
}
|
|
2268
3487
|
#endif
|
|
2269
3488
|
|
|
2270
|
-
for (ggml_type type_a : other_types) {
|
|
2271
|
-
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
2272
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
|
|
2273
|
-
}
|
|
2274
|
-
}
|
|
2275
|
-
|
|
2276
3489
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 128, { 8, 1}, {1, 1}));
|
|
2277
3490
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 128, { 8, 1}, {4, 1}));
|
|
2278
3491
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 64, { 8, 1}, {4, 1}));
|
|
@@ -2280,6 +3493,12 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2280
3493
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
|
2281
3494
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
|
2282
3495
|
|
|
3496
|
+
// sycl backend will limit task global_range < MAX_INT
|
|
3497
|
+
// test case for f16-type-convert-to-fp32 kernel with large k under fp32 compute dtype (occurs in stable-diffusion)
|
|
3498
|
+
// however this case needs to alloc more memory which may fail in some devices (Intel Arc770, etc.)
|
|
3499
|
+
// this case is verified (pass) in Intel(R) Data Center GPU Max 1100 (sycl backend) and NV A30 (cuda backend)
|
|
3500
|
+
// test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 512, 262144, 9216, {1, 1}, {1, 1}));
|
|
3501
|
+
|
|
2283
3502
|
for (ggml_type type_a : base_types) {
|
|
2284
3503
|
for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
|
|
2285
3504
|
for (int n_mats : {4, 8}) {
|
|
@@ -2312,13 +3531,37 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2312
3531
|
}
|
|
2313
3532
|
}
|
|
2314
3533
|
|
|
3534
|
+
for (ggml_type type_a : base_types) {
|
|
3535
|
+
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
3536
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, { 1, 1}));
|
|
3537
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 1}));
|
|
3538
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 1}));
|
|
3539
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3540
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3541
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3542
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 1, 16, {10, 10}));
|
|
3543
|
+
|
|
3544
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, { 1, 1}));
|
|
3545
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, { 1, 1}, true));
|
|
3546
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 1}));
|
|
3547
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 1}));
|
|
3548
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3549
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3550
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3551
|
+
test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, 16, 16, {10, 10}));
|
|
3552
|
+
}
|
|
3553
|
+
}
|
|
3554
|
+
|
|
2315
3555
|
test_cases.emplace_back(new test_sqr());
|
|
2316
3556
|
test_cases.emplace_back(new test_sqrt());
|
|
3557
|
+
test_cases.emplace_back(new test_log());
|
|
3558
|
+
test_cases.emplace_back(new test_sin());
|
|
3559
|
+
test_cases.emplace_back(new test_cos());
|
|
2317
3560
|
test_cases.emplace_back(new test_clamp());
|
|
2318
3561
|
|
|
2319
|
-
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10,
|
|
2320
|
-
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10,
|
|
2321
|
-
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10,
|
|
3562
|
+
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));
|
|
3563
|
+
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 3, 1}, 5));
|
|
3564
|
+
test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 3, 2}, 5));
|
|
2322
3565
|
|
|
2323
3566
|
#if 0
|
|
2324
3567
|
std::uniform_int_distribution<> dist_ne1(1, 50);
|
|
@@ -2362,23 +3605,23 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2362
3605
|
for (float af : { 1.0f, 1.4245f }) {
|
|
2363
3606
|
for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
2364
3607
|
for (bool ff : {false, true}) { // freq_factors
|
|
2365
|
-
test_cases.emplace_back(new test_rope(type, {128, 32,
|
|
3608
|
+
test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 7B
|
|
2366
3609
|
|
|
2367
3610
|
if (all) {
|
|
2368
|
-
test_cases.emplace_back(new test_rope(type, {128, 40,
|
|
2369
|
-
test_cases.emplace_back(new test_rope(type, {128, 52,
|
|
2370
|
-
test_cases.emplace_back(new test_rope(type, {128, 64,
|
|
3611
|
+
test_cases.emplace_back(new test_rope(type, {128, 40, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 13B
|
|
3612
|
+
test_cases.emplace_back(new test_rope(type, {128, 52, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 30B
|
|
3613
|
+
test_cases.emplace_back(new test_rope(type, {128, 64, 2, 1}, 128, 0, 512, fs, ef, af, ff, v)); // llama 65B
|
|
2371
3614
|
}
|
|
2372
3615
|
|
|
2373
3616
|
if (all) {
|
|
2374
|
-
test_cases.emplace_back(new test_rope(type, { 64, 1,
|
|
2375
|
-
test_cases.emplace_back(new test_rope(type, { 64, 71,
|
|
2376
|
-
test_cases.emplace_back(new test_rope(type, { 64, 8,
|
|
2377
|
-
test_cases.emplace_back(new test_rope(type, { 80, 32,
|
|
2378
|
-
test_cases.emplace_back(new test_rope(type, { 80, 32,
|
|
3617
|
+
test_cases.emplace_back(new test_rope(type, { 64, 1, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 7B)
|
|
3618
|
+
test_cases.emplace_back(new test_rope(type, { 64, 71, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 7B)
|
|
3619
|
+
test_cases.emplace_back(new test_rope(type, { 64, 8, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 40B)
|
|
3620
|
+
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, 2, 512, fs, ef, af, ff, v)); // neox (stablelm)
|
|
3621
|
+
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, 2, 512, fs, ef, af, ff, v)); // neox (phi-2)
|
|
2379
3622
|
}
|
|
2380
3623
|
|
|
2381
|
-
test_cases.emplace_back(new test_rope(type, { 64, 128,
|
|
3624
|
+
test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1}, 64, 2, 512, fs, ef, af, ff, v)); // neox (falcon 40B)
|
|
2382
3625
|
}
|
|
2383
3626
|
}
|
|
2384
3627
|
|
|
@@ -2402,6 +3645,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2402
3645
|
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
|
|
2403
3646
|
}
|
|
2404
3647
|
|
|
3648
|
+
test_cases.emplace_back(new test_sum());
|
|
2405
3649
|
test_cases.emplace_back(new test_sum_rows());
|
|
2406
3650
|
test_cases.emplace_back(new test_upscale());
|
|
2407
3651
|
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 1 }, 2, true));
|
|
@@ -2417,11 +3661,14 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2417
3661
|
for (bool mask : { true, false } ) {
|
|
2418
3662
|
for (float max_bias : { 0.0f, 8.0f }) {
|
|
2419
3663
|
if (!mask && max_bias > 0.0f) continue;
|
|
2420
|
-
for (
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
3664
|
+
for (float logit_softcap : {0.0f, 10.0f}) {
|
|
3665
|
+
if (hs != 128 && logit_softcap != 0.0f) continue;
|
|
3666
|
+
for (int nh : { 32, }) {
|
|
3667
|
+
for (int kv : { 512, 1024, }) {
|
|
3668
|
+
for (int nb : { 1, 3, 32, 35, }) {
|
|
3669
|
+
for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) {
|
|
3670
|
+
test_cases.emplace_back(new test_flash_attn_ext(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV));
|
|
3671
|
+
}
|
|
2425
3672
|
}
|
|
2426
3673
|
}
|
|
2427
3674
|
}
|
|
@@ -2430,6 +3677,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2430
3677
|
}
|
|
2431
3678
|
}
|
|
2432
3679
|
|
|
3680
|
+
test_cases.emplace_back(new test_cross_entropy_loss());
|
|
3681
|
+
for (float wd : {0.0f, 1e-2f}) {
|
|
3682
|
+
test_cases.emplace_back(new test_opt_step_adamw(GGML_TYPE_F32, {10, 5, 4, 3}, 1.0f, 1e-3f, 0.9f, 0.999f, wd));
|
|
3683
|
+
}
|
|
3684
|
+
|
|
2433
3685
|
// these tests are disabled to save execution time, but they can be handy for debugging
|
|
2434
3686
|
#if 0
|
|
2435
3687
|
test_cases.emplace_back(new test_llama(1));
|
|
@@ -2438,8 +3690,30 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2438
3690
|
test_cases.emplace_back(new test_falcon(2));
|
|
2439
3691
|
#endif
|
|
2440
3692
|
|
|
2441
|
-
|
|
3693
|
+
return test_cases;
|
|
3694
|
+
}
|
|
3695
|
+
|
|
3696
|
+
// Test cases for performance evaluation: should be representative of real-world use cases
|
|
3697
|
+
static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
3698
|
+
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
3699
|
+
|
|
3700
|
+
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 1, 1, 1}));
|
|
3701
|
+
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
|
|
3702
|
+
|
|
3703
|
+
for (int bs : {1, 512}) {
|
|
3704
|
+
for (ggml_type type_a : all_types) {
|
|
3705
|
+
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
3706
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 4096, bs, 14336, {1, 1}, {1, 1}));
|
|
3707
|
+
}
|
|
3708
|
+
}
|
|
3709
|
+
}
|
|
3710
|
+
|
|
3711
|
+
return test_cases;
|
|
3712
|
+
}
|
|
3713
|
+
|
|
3714
|
+
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name) {
|
|
2442
3715
|
if (mode == MODE_TEST) {
|
|
3716
|
+
auto test_cases = make_test_cases_eval();
|
|
2443
3717
|
ggml_backend_t backend_cpu = ggml_backend_cpu_init();
|
|
2444
3718
|
|
|
2445
3719
|
size_t n_ok = 0;
|
|
@@ -2455,7 +3729,21 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2455
3729
|
return n_ok == test_cases.size();
|
|
2456
3730
|
}
|
|
2457
3731
|
|
|
3732
|
+
if (mode == MODE_GRAD) {
|
|
3733
|
+
auto test_cases = make_test_cases_eval();
|
|
3734
|
+
size_t n_ok = 0;
|
|
3735
|
+
for (auto & test : test_cases) {
|
|
3736
|
+
if (test->eval_grad(backend, op_name)) {
|
|
3737
|
+
n_ok++;
|
|
3738
|
+
}
|
|
3739
|
+
}
|
|
3740
|
+
printf(" %zu/%zu tests passed\n", n_ok, test_cases.size());
|
|
3741
|
+
|
|
3742
|
+
return n_ok == test_cases.size();
|
|
3743
|
+
}
|
|
3744
|
+
|
|
2458
3745
|
if (mode == MODE_PERF) {
|
|
3746
|
+
auto test_cases = make_test_cases_perf();
|
|
2459
3747
|
for (auto & test : test_cases) {
|
|
2460
3748
|
test->eval_perf(backend, op_name);
|
|
2461
3749
|
}
|
|
@@ -2463,13 +3751,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
|
2463
3751
|
}
|
|
2464
3752
|
|
|
2465
3753
|
GGML_ABORT("fatal error");
|
|
2466
|
-
return false;
|
|
2467
3754
|
}
|
|
2468
3755
|
|
|
2469
3756
|
static void usage(char ** argv) {
|
|
2470
3757
|
printf("Usage: %s [mode] [-o op] [-b backend]\n", argv[0]);
|
|
2471
|
-
printf("
|
|
2472
|
-
printf("
|
|
3758
|
+
printf(" valid modes:\n");
|
|
3759
|
+
printf(" - test (default, compare with CPU backend for correctness)\n");
|
|
3760
|
+
printf(" - grad (compare gradients from backpropagation with method of finite differences)\n");
|
|
3761
|
+
printf(" - perf (performance evaluation)\n");
|
|
3762
|
+
printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc)\n");
|
|
2473
3763
|
}
|
|
2474
3764
|
|
|
2475
3765
|
int main(int argc, char ** argv) {
|
|
@@ -2482,6 +3772,8 @@ int main(int argc, char ** argv) {
|
|
|
2482
3772
|
mode = MODE_TEST;
|
|
2483
3773
|
} else if (strcmp(argv[i], "perf") == 0) {
|
|
2484
3774
|
mode = MODE_PERF;
|
|
3775
|
+
} else if (strcmp(argv[i], "grad") == 0) {
|
|
3776
|
+
mode = MODE_GRAD;
|
|
2485
3777
|
} else if (strcmp(argv[i], "-o") == 0) {
|
|
2486
3778
|
if (i + 1 < argc) {
|
|
2487
3779
|
op_name_filter = argv[++i];
|
|
@@ -2503,30 +3795,41 @@ int main(int argc, char ** argv) {
|
|
|
2503
3795
|
}
|
|
2504
3796
|
|
|
2505
3797
|
// enumerate backends
|
|
2506
|
-
printf("Testing %zu
|
|
3798
|
+
printf("Testing %zu devices\n\n", ggml_backend_dev_count());
|
|
2507
3799
|
|
|
2508
3800
|
size_t n_ok = 0;
|
|
2509
3801
|
|
|
2510
|
-
for (size_t i = 0; i <
|
|
2511
|
-
|
|
3802
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
3803
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
2512
3804
|
|
|
2513
|
-
|
|
3805
|
+
printf("Backend %zu/%zu: %s\n", i + 1, ggml_backend_dev_count(), ggml_backend_dev_name(dev));
|
|
3806
|
+
|
|
3807
|
+
if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_dev_name(dev)) != 0) {
|
|
2514
3808
|
printf(" Skipping\n");
|
|
2515
3809
|
n_ok++;
|
|
2516
3810
|
continue;
|
|
2517
3811
|
}
|
|
2518
3812
|
|
|
2519
|
-
ggml_backend_t backend =
|
|
3813
|
+
ggml_backend_t backend = ggml_backend_dev_init(dev, NULL);
|
|
2520
3814
|
GGML_ASSERT(backend != NULL);
|
|
2521
3815
|
|
|
2522
|
-
if (backend_filter == NULL && ggml_backend_is_cpu(backend)) {
|
|
3816
|
+
if (backend_filter == NULL && ggml_backend_is_cpu(backend) && mode != MODE_GRAD) {
|
|
2523
3817
|
printf(" Skipping CPU backend\n");
|
|
2524
3818
|
ggml_backend_free(backend);
|
|
2525
3819
|
n_ok++;
|
|
2526
3820
|
continue;
|
|
2527
3821
|
}
|
|
2528
3822
|
|
|
2529
|
-
|
|
3823
|
+
if (ggml_backend_is_cpu(backend)) {
|
|
3824
|
+
// TODO: better value for n_threads
|
|
3825
|
+
ggml_backend_cpu_set_n_threads(backend, std::thread::hardware_concurrency() / 2);
|
|
3826
|
+
}
|
|
3827
|
+
|
|
3828
|
+
printf(" Device description: %s\n", ggml_backend_dev_description(dev));
|
|
3829
|
+
size_t free, total; // NOLINT
|
|
3830
|
+
ggml_backend_dev_memory(dev, &free, &total);
|
|
3831
|
+
printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
|
|
3832
|
+
printf("\n");
|
|
2530
3833
|
|
|
2531
3834
|
bool ok = test_backend(backend, mode, op_name_filter);
|
|
2532
3835
|
|
|
@@ -2543,9 +3846,9 @@ int main(int argc, char ** argv) {
|
|
|
2543
3846
|
ggml_backend_free(backend);
|
|
2544
3847
|
}
|
|
2545
3848
|
|
|
2546
|
-
printf("%zu/%zu backends passed\n", n_ok,
|
|
3849
|
+
printf("%zu/%zu backends passed\n", n_ok, ggml_backend_dev_count());
|
|
2547
3850
|
|
|
2548
|
-
if (n_ok !=
|
|
3851
|
+
if (n_ok != ggml_backend_dev_count()) {
|
|
2549
3852
|
printf("\033[1;31mFAIL\033[0m\n");
|
|
2550
3853
|
return 1;
|
|
2551
3854
|
}
|