@fugood/llama.node 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +46 -8
- package/lib/index.ts +3 -1
- package/package.json +8 -1
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +292 -28
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
|
@@ -1302,6 +1302,59 @@ struct test_repeat : public test_case {
|
|
|
1302
1302
|
}
|
|
1303
1303
|
};
|
|
1304
1304
|
|
|
1305
|
+
// GGML_OP_REPEAT_BACK
|
|
1306
|
+
struct test_repeat_back : public test_case {
|
|
1307
|
+
const ggml_type type;
|
|
1308
|
+
const std::array<int64_t, 4> ne;
|
|
1309
|
+
const std::array<int, 4> nr;
|
|
1310
|
+
const bool v; // whether src is a noncontiguous view
|
|
1311
|
+
|
|
1312
|
+
std::string vars() override {
|
|
1313
|
+
return VARS_TO_STR4(type, ne, nr, v);
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
size_t op_size(ggml_tensor * t) override {
|
|
1317
|
+
return ggml_nbytes(t) * 2;
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
test_repeat_back(ggml_type type = GGML_TYPE_F32,
|
|
1321
|
+
std::array<int64_t, 4> ne = {8, 6, 4, 2},
|
|
1322
|
+
std::array<int, 4> nr = {2, 2, 2, 2},
|
|
1323
|
+
bool v = false)
|
|
1324
|
+
: type(type), ne(ne), nr(nr), v(v) {}
|
|
1325
|
+
|
|
1326
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1327
|
+
ggml_tensor * src = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
1328
|
+
ggml_set_name(src, "src");
|
|
1329
|
+
|
|
1330
|
+
if (v) {
|
|
1331
|
+
GGML_ASSERT(ne[0] % 2 == 0);
|
|
1332
|
+
GGML_ASSERT(ne[1] % 2 == 0);
|
|
1333
|
+
GGML_ASSERT(ne[2] % 2 == 0);
|
|
1334
|
+
GGML_ASSERT(ne[3] % 2 == 0);
|
|
1335
|
+
GGML_ASSERT(nr[0] % 2 == 0 || nr[0] == 1);
|
|
1336
|
+
GGML_ASSERT(nr[1] % 2 == 0 || nr[1] == 1);
|
|
1337
|
+
GGML_ASSERT(nr[2] % 2 == 0 || nr[2] == 1);
|
|
1338
|
+
GGML_ASSERT(nr[3] % 2 == 0 || nr[3] == 1);
|
|
1339
|
+
|
|
1340
|
+
const int64_t ne00 = nr[0] == 1 ? src->ne[0] : src->ne[0] / 2;
|
|
1341
|
+
const int64_t ne01 = nr[1] == 1 ? src->ne[1] : src->ne[1] / 2;
|
|
1342
|
+
const int64_t ne02 = nr[2] == 1 ? src->ne[2] : src->ne[2] / 2;
|
|
1343
|
+
const int64_t ne03 = nr[3] == 1 ? src->ne[3] : src->ne[3] / 2;
|
|
1344
|
+
|
|
1345
|
+
src = ggml_view_4d(ctx, src, ne00, ne01, ne02, ne03, src->nb[1], src->nb[2], src->nb[3], 0);
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
ggml_tensor * target = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1349
|
+
ggml_set_name(target, "target");
|
|
1350
|
+
|
|
1351
|
+
ggml_tensor * out = ggml_repeat_back(ctx, src, target);
|
|
1352
|
+
ggml_set_name(out, "out");
|
|
1353
|
+
|
|
1354
|
+
return out;
|
|
1355
|
+
}
|
|
1356
|
+
};
|
|
1357
|
+
|
|
1305
1358
|
// GGML_OP_DUP
|
|
1306
1359
|
struct test_dup : public test_case {
|
|
1307
1360
|
const ggml_type type;
|
|
@@ -1621,21 +1674,28 @@ struct test_silu_back : public test_case {
|
|
|
1621
1674
|
struct test_norm : public test_case {
|
|
1622
1675
|
const ggml_type type;
|
|
1623
1676
|
const std::array<int64_t, 4> ne;
|
|
1624
|
-
|
|
1677
|
+
const bool v; // whether a is a non-contiguous view
|
|
1678
|
+
const float eps;
|
|
1625
1679
|
|
|
1626
1680
|
std::string vars() override {
|
|
1627
|
-
return
|
|
1681
|
+
return VARS_TO_STR4(type, ne, v, eps);
|
|
1628
1682
|
}
|
|
1629
1683
|
|
|
1630
1684
|
test_norm(ggml_type type = GGML_TYPE_F32,
|
|
1631
1685
|
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
1686
|
+
bool v = false,
|
|
1632
1687
|
float eps = 1e-6f)
|
|
1633
|
-
: type(type), ne(ne), eps(eps) {}
|
|
1688
|
+
: type(type), ne(ne), v(v), eps(eps) {}
|
|
1634
1689
|
|
|
1635
1690
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1636
1691
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1637
1692
|
ggml_set_name(a, "a");
|
|
1638
1693
|
|
|
1694
|
+
if (v) {
|
|
1695
|
+
a = ggml_view_4d(ctx, a, a->ne[0]/2, a->ne[1]/2, a->ne[2]/2, a->ne[3]/2, a->nb[1], a->nb[2], a->nb[3], 0);
|
|
1696
|
+
ggml_set_name(a, "view of a");
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1639
1699
|
ggml_tensor * out = ggml_norm(ctx, a, eps);
|
|
1640
1700
|
ggml_set_name(out, "out");
|
|
1641
1701
|
|
|
@@ -1647,22 +1707,29 @@ struct test_norm : public test_case {
|
|
|
1647
1707
|
struct test_rms_norm : public test_case {
|
|
1648
1708
|
const ggml_type type;
|
|
1649
1709
|
const std::array<int64_t, 4> ne;
|
|
1650
|
-
|
|
1710
|
+
const bool v; // whether a is a non-contiguous view
|
|
1711
|
+
const float eps;
|
|
1651
1712
|
|
|
1652
1713
|
std::string vars() override {
|
|
1653
|
-
return
|
|
1714
|
+
return VARS_TO_STR4(type, ne, v, eps);
|
|
1654
1715
|
}
|
|
1655
1716
|
|
|
1656
1717
|
test_rms_norm(ggml_type type = GGML_TYPE_F32,
|
|
1657
1718
|
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
1719
|
+
bool v = false,
|
|
1658
1720
|
float eps = 1e-6f)
|
|
1659
|
-
: type(type), ne(ne), eps(eps) {}
|
|
1721
|
+
: type(type), ne(ne), v(v), eps(eps) {}
|
|
1660
1722
|
|
|
1661
1723
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1662
1724
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1663
1725
|
ggml_set_param(ctx, a);
|
|
1664
1726
|
ggml_set_name(a, "a");
|
|
1665
1727
|
|
|
1728
|
+
if (v) {
|
|
1729
|
+
a = ggml_view_4d(ctx, a, a->ne[0]/2, a->ne[1]/2, a->ne[2]/2, a->ne[3]/2, a->nb[1], a->nb[2], a->nb[3], 0);
|
|
1730
|
+
ggml_set_name(a, "view of a");
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1666
1733
|
ggml_tensor * out = ggml_rms_norm(ctx, a, eps);
|
|
1667
1734
|
ggml_set_name(out, "out");
|
|
1668
1735
|
|
|
@@ -1688,7 +1755,7 @@ struct test_rms_norm : public test_case {
|
|
|
1688
1755
|
struct test_rms_norm_back : public test_case {
|
|
1689
1756
|
const ggml_type type;
|
|
1690
1757
|
const std::array<int64_t, 4> ne;
|
|
1691
|
-
float eps;
|
|
1758
|
+
const float eps;
|
|
1692
1759
|
|
|
1693
1760
|
std::string vars() override {
|
|
1694
1761
|
return VARS_TO_STR3(type, ne, eps);
|
|
@@ -1849,6 +1916,10 @@ struct test_mul_mat : public test_case {
|
|
|
1849
1916
|
return 5e-4;
|
|
1850
1917
|
}
|
|
1851
1918
|
|
|
1919
|
+
int64_t grad_nmax() override {
|
|
1920
|
+
return 20000;
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1852
1923
|
uint64_t op_flops(ggml_tensor * t) override {
|
|
1853
1924
|
GGML_UNUSED(t);
|
|
1854
1925
|
return 2 * m * n * k * bs[0] * nr[0] * bs[1] * nr[1];
|
|
@@ -1878,8 +1949,12 @@ struct test_mul_mat : public test_case {
|
|
|
1878
1949
|
|
|
1879
1950
|
a = ggml_new_tensor_4d(ctx, type_a, ne_a[per[0]], ne_a[per[1]], ne_a[per[2]], ne_a[per[3]]);
|
|
1880
1951
|
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
|
|
1881
|
-
|
|
1882
|
-
|
|
1952
|
+
if (!ggml_is_quantized(type_a)) {
|
|
1953
|
+
if (bs[1] == 1 && nr[1] == 1) {
|
|
1954
|
+
ggml_set_param(ctx, a);
|
|
1955
|
+
}
|
|
1956
|
+
ggml_set_param(ctx, b);
|
|
1957
|
+
}
|
|
1883
1958
|
ggml_set_name(a, "a");
|
|
1884
1959
|
ggml_set_name(b, "b");
|
|
1885
1960
|
|
|
@@ -1890,8 +1965,12 @@ struct test_mul_mat : public test_case {
|
|
|
1890
1965
|
} else {
|
|
1891
1966
|
a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
|
|
1892
1967
|
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
1893
|
-
|
|
1894
|
-
|
|
1968
|
+
if (!ggml_is_quantized(type_a)) {
|
|
1969
|
+
if (bs[1] == 1 && nr[1] == 1) {
|
|
1970
|
+
ggml_set_param(ctx, a);
|
|
1971
|
+
}
|
|
1972
|
+
ggml_set_param(ctx, b);
|
|
1973
|
+
}
|
|
1895
1974
|
ggml_set_name(a, "a");
|
|
1896
1975
|
ggml_set_name(b, "b");
|
|
1897
1976
|
}
|
|
@@ -2282,11 +2361,12 @@ struct test_soft_max : public test_case {
|
|
|
2282
2361
|
const ggml_type type;
|
|
2283
2362
|
const std::array<int64_t, 4> ne;
|
|
2284
2363
|
const bool mask;
|
|
2364
|
+
const ggml_type m_prec;
|
|
2285
2365
|
const float scale;
|
|
2286
2366
|
const float max_bias;
|
|
2287
2367
|
|
|
2288
2368
|
std::string vars() override {
|
|
2289
|
-
return
|
|
2369
|
+
return VARS_TO_STR6(type, ne, mask, m_prec, scale, max_bias);
|
|
2290
2370
|
}
|
|
2291
2371
|
|
|
2292
2372
|
// the 1024 test with bias occasionally fails:
|
|
@@ -2298,9 +2378,10 @@ struct test_soft_max : public test_case {
|
|
|
2298
2378
|
test_soft_max(ggml_type type = GGML_TYPE_F32,
|
|
2299
2379
|
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
2300
2380
|
bool mask = false,
|
|
2381
|
+
ggml_type m_prec = GGML_TYPE_F32,
|
|
2301
2382
|
float scale = 1.0f,
|
|
2302
2383
|
float max_bias = 0.0f)
|
|
2303
|
-
: type(type), ne(ne), mask(mask), scale(scale), max_bias(max_bias) {}
|
|
2384
|
+
: type(type), ne(ne), mask(mask), m_prec(m_prec), scale(scale), max_bias(max_bias) {}
|
|
2304
2385
|
|
|
2305
2386
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2306
2387
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2309,7 +2390,7 @@ struct test_soft_max : public test_case {
|
|
|
2309
2390
|
|
|
2310
2391
|
ggml_tensor * mask = nullptr;
|
|
2311
2392
|
if (this->mask) {
|
|
2312
|
-
mask = ggml_new_tensor_2d(ctx,
|
|
2393
|
+
mask = ggml_new_tensor_2d(ctx, m_prec, ne[0], ne[1]);
|
|
2313
2394
|
ggml_set_name(mask, "mask");
|
|
2314
2395
|
}
|
|
2315
2396
|
|
|
@@ -2852,7 +2933,7 @@ struct test_group_norm : public test_case {
|
|
|
2852
2933
|
const float eps;
|
|
2853
2934
|
|
|
2854
2935
|
std::string vars() override {
|
|
2855
|
-
return
|
|
2936
|
+
return VARS_TO_STR4(type, ne, num_groups, eps);
|
|
2856
2937
|
}
|
|
2857
2938
|
|
|
2858
2939
|
test_group_norm(ggml_type type = GGML_TYPE_F32,
|
|
@@ -3798,6 +3879,16 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3798
3879
|
test_cases.emplace_back(new test_repeat(GGML_TYPE_I16, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
3799
3880
|
}
|
|
3800
3881
|
|
|
3882
|
+
for (bool view : {false, true}) {
|
|
3883
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 1, 1}, view));
|
|
3884
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {2, 1, 1, 1}, view));
|
|
3885
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 2, 1, 1}, view));
|
|
3886
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 2, 1}, view));
|
|
3887
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 1, 2}, view));
|
|
3888
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_I32, {8, 6, 4, 2}, {2, 1, 1, 1}, view));
|
|
3889
|
+
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_I16, {8, 6, 4, 2}, {1, 1, 1, 2}, view));
|
|
3890
|
+
}
|
|
3891
|
+
|
|
3801
3892
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32));
|
|
3802
3893
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
|
|
3803
3894
|
test_cases.emplace_back(new test_dup(GGML_TYPE_I32));
|
|
@@ -3887,9 +3978,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3887
3978
|
test_cases.emplace_back(new test_scale());
|
|
3888
3979
|
test_cases.emplace_back(new test_silu_back());
|
|
3889
3980
|
|
|
3890
|
-
for (float eps : {0.0f, 1e-
|
|
3891
|
-
|
|
3892
|
-
|
|
3981
|
+
for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f}) {
|
|
3982
|
+
for (bool v : {false, true}) {
|
|
3983
|
+
test_cases.emplace_back(new test_norm (GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
|
|
3984
|
+
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
|
|
3985
|
+
}
|
|
3893
3986
|
test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
3894
3987
|
}
|
|
3895
3988
|
|
|
@@ -3909,38 +4002,35 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3909
4002
|
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 32, 4));
|
|
3910
4003
|
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 128, 4));
|
|
3911
4004
|
|
|
3912
|
-
for (
|
|
3913
|
-
|
|
3914
|
-
|
|
3915
|
-
|
|
3916
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q5_0, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
3917
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q5_1, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
3918
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
3919
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q4_K, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
3920
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q5_K, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
3921
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q6_K, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
3922
|
-
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_IQ4_NL, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
4005
|
+
for (ggml_type type_a : all_types) {
|
|
4006
|
+
for (int i = 1; i < 10; ++i) {
|
|
4007
|
+
test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
4008
|
+
}
|
|
3923
4009
|
}
|
|
3924
4010
|
|
|
3925
4011
|
#if 1
|
|
3926
4012
|
for (ggml_type type_a : base_types) {
|
|
3927
4013
|
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
3928
4014
|
// test cases without permutation
|
|
3929
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3930
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3931
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3932
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3933
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3934
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3935
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {
|
|
3936
|
-
|
|
3937
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16,
|
|
3938
|
-
|
|
3939
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {
|
|
3940
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {
|
|
3941
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {
|
|
3942
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {
|
|
3943
|
-
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {
|
|
4015
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
|
|
4016
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {2, 1}));
|
|
4017
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 2}));
|
|
4018
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 1}, {1, 1}));
|
|
4019
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 1}, {2, 1}));
|
|
4020
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {1, 1}));
|
|
4021
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {2, 1}));
|
|
4022
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {1, 2}));
|
|
4023
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {2, 2}));
|
|
4024
|
+
|
|
4025
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {1, 1}));
|
|
4026
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {2, 1}));
|
|
4027
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {1, 2}));
|
|
4028
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 1}, {1, 1}));
|
|
4029
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 1}, {2, 1}));
|
|
4030
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {1, 1}));
|
|
4031
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {2, 1}));
|
|
4032
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {1, 2}));
|
|
4033
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {2, 2}));
|
|
3944
4034
|
|
|
3945
4035
|
// test cases with permutation
|
|
3946
4036
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
@@ -4078,17 +4168,28 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
4078
4168
|
for (float scale : {1.0f, 0.1f}) {
|
|
4079
4169
|
for (int64_t ne0 : {16, 1024}) {
|
|
4080
4170
|
for (int64_t ne1 : {16, 1024}) {
|
|
4081
|
-
|
|
4082
|
-
|
|
4171
|
+
if (mask) {
|
|
4172
|
+
for (ggml_type m_prec : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
4173
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, m_prec, scale, max_bias));
|
|
4174
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, scale, max_bias));
|
|
4175
|
+
}
|
|
4176
|
+
} else {
|
|
4177
|
+
/* The precision of mask here doesn't matter as boolean mask is false */
|
|
4178
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias));
|
|
4179
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias));
|
|
4180
|
+
}
|
|
4083
4181
|
}
|
|
4084
4182
|
}
|
|
4085
4183
|
}
|
|
4086
4184
|
}
|
|
4087
4185
|
}
|
|
4088
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, 0.1f, 0.0f));
|
|
4089
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1},
|
|
4090
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {
|
|
4091
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f,
|
|
4186
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
4187
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
4188
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
4189
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
4190
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
4191
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 8.0f));
|
|
4192
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 8.0f));
|
|
4092
4193
|
|
|
4093
4194
|
for (float max_bias : {0.0f, 8.0f}) {
|
|
4094
4195
|
for (float scale : {1.0f, 0.1f}) {
|
|
@@ -4224,13 +4325,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
|
4224
4325
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
|
|
4225
4326
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
|
|
4226
4327
|
|
|
4227
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, 1.0f, 0.0f));
|
|
4228
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, 1.0f, 0.0f));
|
|
4229
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, 1.0f, 0.0f));
|
|
4230
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, 1.0f, 0.0f));
|
|
4231
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, 1.0f, 0.0f));
|
|
4232
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, 1.0f, 0.0f));
|
|
4233
|
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, 1.0f, 0.0f));
|
|
4328
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4329
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4330
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4331
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4332
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4333
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4334
|
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
4234
4335
|
|
|
4235
4336
|
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32, 10, 1, 1}));
|
|
4236
4337
|
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 10, 1, 1}));
|