@fugood/llama.node 0.3.17 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +39 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +366 -19
- package/src/LlamaCompletionWorker.h +30 -10
- package/src/LlamaContext.cpp +213 -5
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
- package/src/llama.cpp/.github/workflows/build.yml +41 -762
- package/src/llama.cpp/.github/workflows/docker.yml +5 -2
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +12 -12
- package/src/llama.cpp/CMakeLists.txt +5 -17
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +31 -3
- package/src/llama.cpp/common/arg.cpp +48 -29
- package/src/llama.cpp/common/chat.cpp +128 -106
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +37 -1
- package/src/llama.cpp/common/common.h +18 -9
- package/src/llama.cpp/common/llguidance.cpp +1 -0
- package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/src/llama.cpp/common/minja/minja.hpp +69 -36
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +57 -50
- package/src/llama.cpp/examples/CMakeLists.txt +2 -23
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml.h +10 -7
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
- package/src/llama.cpp/ggml/src/ggml.c +29 -20
- package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/src/llama.cpp/include/llama.h +52 -11
- package/src/llama.cpp/requirements/requirements-all.txt +3 -3
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +3 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +17 -7
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +389 -501
- package/src/llama.cpp/src/llama-context.h +44 -32
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +20 -38
- package/src/llama.cpp/src/llama-graph.h +12 -8
- package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
- package/src/llama.cpp/src/llama-kv-cache.h +271 -85
- package/src/llama.cpp/src/llama-memory.h +11 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +316 -69
- package/src/llama.cpp/src/llama-model.h +8 -1
- package/src/llama.cpp/src/llama-quant.cpp +15 -13
- package/src/llama.cpp/src/llama-sampling.cpp +18 -6
- package/src/llama.cpp/src/llama-vocab.cpp +42 -4
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +14 -0
- package/src/llama.cpp/tests/CMakeLists.txt +10 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
- package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
- package/src/llama.cpp/tests/test-chat.cpp +3 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
- package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
- package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
- package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
- package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.h +0 -135
- package/src/llama.cpp/examples/llava/llava.cpp +0 -586
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/mtmd.h +0 -168
- package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
|
@@ -823,7 +823,7 @@ struct test_case {
|
|
|
823
823
|
|
|
824
824
|
ggml_build_forward_expand(gf, out);
|
|
825
825
|
ggml_graph_cpy(gf, gb);
|
|
826
|
-
ggml_build_backward_expand(ctx.get(),
|
|
826
|
+
ggml_build_backward_expand(ctx.get(), gb, nullptr);
|
|
827
827
|
if (expect.size() != 1 || expect[0] != 0.0f) {
|
|
828
828
|
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
|
|
829
829
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
@@ -1026,7 +1026,7 @@ struct test_example : public test_case {
|
|
|
1026
1026
|
// Step 3: return the output tensor.
|
|
1027
1027
|
return out;
|
|
1028
1028
|
}
|
|
1029
|
-
// In order to also check the gradients for your op, add calls like ggml_set_param(
|
|
1029
|
+
// In order to also check the gradients for your op, add calls like ggml_set_param(a)
|
|
1030
1030
|
// immediately after you create the tensors.
|
|
1031
1031
|
// This is optional and only makes sense if a backward pass has actually been implemented for the new op.
|
|
1032
1032
|
};
|
|
@@ -1058,7 +1058,7 @@ struct test_unary : public test_case {
|
|
|
1058
1058
|
auto ne = ne_a; ne[0] *= 3;
|
|
1059
1059
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1060
1060
|
if (grad_supported) {
|
|
1061
|
-
ggml_set_param(
|
|
1061
|
+
ggml_set_param(a);
|
|
1062
1062
|
}
|
|
1063
1063
|
ggml_set_name(a, "a");
|
|
1064
1064
|
|
|
@@ -1067,7 +1067,7 @@ struct test_unary : public test_case {
|
|
|
1067
1067
|
} else {
|
|
1068
1068
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
1069
1069
|
if (grad_supported) {
|
|
1070
|
-
ggml_set_param(
|
|
1070
|
+
ggml_set_param(a);
|
|
1071
1071
|
}
|
|
1072
1072
|
ggml_set_name(a, "a");
|
|
1073
1073
|
}
|
|
@@ -1133,7 +1133,7 @@ struct test_get_rows : public test_case {
|
|
|
1133
1133
|
|
|
1134
1134
|
const bool grad_supported = ggml_is_matrix(in) && ggml_is_vector(rows);
|
|
1135
1135
|
if (grad_supported) {
|
|
1136
|
-
ggml_set_param(
|
|
1136
|
+
ggml_set_param(in);
|
|
1137
1137
|
// rows is a constant input -> no gradients
|
|
1138
1138
|
}
|
|
1139
1139
|
|
|
@@ -1322,7 +1322,7 @@ struct test_repeat : public test_case {
|
|
|
1322
1322
|
ggml_set_name(target, "target");
|
|
1323
1323
|
|
|
1324
1324
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1325
|
-
ggml_set_param(
|
|
1325
|
+
ggml_set_param(src);
|
|
1326
1326
|
ggml_set_name(src, "src");
|
|
1327
1327
|
|
|
1328
1328
|
ggml_tensor * out = ggml_repeat(ctx, src, target);
|
|
@@ -1406,7 +1406,7 @@ struct test_dup : public test_case {
|
|
|
1406
1406
|
|
|
1407
1407
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1408
1408
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1409
|
-
ggml_set_param(
|
|
1409
|
+
ggml_set_param(src);
|
|
1410
1410
|
ggml_set_name(src, "src");
|
|
1411
1411
|
|
|
1412
1412
|
if (_use_permute) {
|
|
@@ -1442,7 +1442,7 @@ struct test_set : public test_case {
|
|
|
1442
1442
|
|
|
1443
1443
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1444
1444
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1445
|
-
ggml_set_param(
|
|
1445
|
+
ggml_set_param(src);
|
|
1446
1446
|
ggml_set_name(src, "src");
|
|
1447
1447
|
|
|
1448
1448
|
auto ne_dst = ne;
|
|
@@ -1450,7 +1450,7 @@ struct test_set : public test_case {
|
|
|
1450
1450
|
ne_dst[i] *= 2;
|
|
1451
1451
|
}
|
|
1452
1452
|
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
|
1453
|
-
ggml_set_param(
|
|
1453
|
+
ggml_set_param(dst);
|
|
1454
1454
|
ggml_set_name(dst, "dst");
|
|
1455
1455
|
|
|
1456
1456
|
size_t offset = 0;
|
|
@@ -1498,7 +1498,7 @@ struct test_cpy : public test_case {
|
|
|
1498
1498
|
|
|
1499
1499
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1500
1500
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1501
|
-
ggml_set_param(
|
|
1501
|
+
ggml_set_param(src);
|
|
1502
1502
|
ggml_set_name(src, "src");
|
|
1503
1503
|
|
|
1504
1504
|
if (_src_use_permute) {
|
|
@@ -1536,7 +1536,7 @@ struct test_cont : public test_case {
|
|
|
1536
1536
|
|
|
1537
1537
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1538
1538
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1539
|
-
ggml_set_param(
|
|
1539
|
+
ggml_set_param(src);
|
|
1540
1540
|
ggml_set_name(src, "src");
|
|
1541
1541
|
|
|
1542
1542
|
src = ggml_transpose(ctx, src);
|
|
@@ -1583,8 +1583,8 @@ struct test_bin_bcast : public test_case {
|
|
|
1583
1583
|
// The backward pass supports broadcasting only for GGML_ADD:
|
|
1584
1584
|
const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
|
|
1585
1585
|
if (grad_supported) {
|
|
1586
|
-
ggml_set_param(
|
|
1587
|
-
ggml_set_param(
|
|
1586
|
+
ggml_set_param(a);
|
|
1587
|
+
ggml_set_param(b);
|
|
1588
1588
|
}
|
|
1589
1589
|
|
|
1590
1590
|
ggml_tensor * out = op(ctx, a, b);
|
|
@@ -1632,11 +1632,11 @@ struct test_add1 : public test_case {
|
|
|
1632
1632
|
|
|
1633
1633
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1634
1634
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1635
|
-
ggml_set_param(
|
|
1635
|
+
ggml_set_param(a);
|
|
1636
1636
|
ggml_set_name(a, "a");
|
|
1637
1637
|
|
|
1638
1638
|
ggml_tensor * b = ggml_new_tensor_1d(ctx, type, 1);
|
|
1639
|
-
// ggml_set_param(
|
|
1639
|
+
// ggml_set_param(b); // TODO: implement
|
|
1640
1640
|
ggml_set_name(b, "b");
|
|
1641
1641
|
|
|
1642
1642
|
ggml_tensor * out = ggml_add1(ctx, a, b);
|
|
@@ -1667,7 +1667,7 @@ struct test_scale : public test_case {
|
|
|
1667
1667
|
|
|
1668
1668
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1669
1669
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1670
|
-
ggml_set_param(
|
|
1670
|
+
ggml_set_param(a);
|
|
1671
1671
|
ggml_set_name(a, "a");
|
|
1672
1672
|
|
|
1673
1673
|
ggml_tensor * out = ggml_scale(ctx, a, scale);
|
|
@@ -1762,7 +1762,7 @@ struct test_rms_norm : public test_case {
|
|
|
1762
1762
|
|
|
1763
1763
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1764
1764
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1765
|
-
ggml_set_param(
|
|
1765
|
+
ggml_set_param(a);
|
|
1766
1766
|
ggml_set_name(a, "a");
|
|
1767
1767
|
|
|
1768
1768
|
if (v) {
|
|
@@ -1981,7 +1981,7 @@ struct test_mul_mat : public test_case {
|
|
|
1981
1981
|
const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
1982
1982
|
const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
1983
1983
|
const std::array<int64_t, 4> per; // permutation of dimensions
|
|
1984
|
-
const bool v; // whether a
|
|
1984
|
+
const bool v; // whether a and b are non-contiguous views
|
|
1985
1985
|
|
|
1986
1986
|
std::string vars() override {
|
|
1987
1987
|
return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v);
|
|
@@ -2028,9 +2028,9 @@ struct test_mul_mat : public test_case {
|
|
|
2028
2028
|
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
|
|
2029
2029
|
if (!ggml_is_quantized(type_a)) {
|
|
2030
2030
|
if (bs[1] == 1 && nr[1] == 1) {
|
|
2031
|
-
ggml_set_param(
|
|
2031
|
+
ggml_set_param(a);
|
|
2032
2032
|
}
|
|
2033
|
-
ggml_set_param(
|
|
2033
|
+
ggml_set_param(b);
|
|
2034
2034
|
}
|
|
2035
2035
|
ggml_set_name(a, "a");
|
|
2036
2036
|
ggml_set_name(b, "b");
|
|
@@ -2040,19 +2040,29 @@ struct test_mul_mat : public test_case {
|
|
|
2040
2040
|
ggml_set_name(a, "a_permuted");
|
|
2041
2041
|
ggml_set_name(b, "b_permuted");
|
|
2042
2042
|
} else {
|
|
2043
|
-
|
|
2044
2043
|
if (v) {
|
|
2045
|
-
a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0],
|
|
2046
|
-
|
|
2044
|
+
a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
|
|
2045
|
+
b = ggml_new_tensor_4d(ctx, type_b, k*2, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
2046
|
+
|
|
2047
|
+
if (!ggml_is_quantized(type_a)) {
|
|
2048
|
+
if (bs[1] == 1 && nr[1] == 1) {
|
|
2049
|
+
ggml_set_param(a);
|
|
2050
|
+
}
|
|
2051
|
+
ggml_set_param(b);
|
|
2052
|
+
}
|
|
2053
|
+
|
|
2054
|
+
a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
2055
|
+
b = ggml_view_4d(ctx, b, k, n, bs[0]*nr[0], bs[1]*nr[1], b->nb[1], b->nb[2], b->nb[3], 0);
|
|
2047
2056
|
} else {
|
|
2048
2057
|
a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2058
|
+
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
2059
|
+
|
|
2060
|
+
if (!ggml_is_quantized(type_a)) {
|
|
2061
|
+
if (bs[1] == 1 && nr[1] == 1) {
|
|
2062
|
+
ggml_set_param(a);
|
|
2063
|
+
}
|
|
2064
|
+
ggml_set_param(b);
|
|
2054
2065
|
}
|
|
2055
|
-
ggml_set_param(ctx, b);
|
|
2056
2066
|
}
|
|
2057
2067
|
ggml_set_name(a, "a");
|
|
2058
2068
|
ggml_set_name(b, "b");
|
|
@@ -2201,7 +2211,7 @@ struct test_sqr : public test_case {
|
|
|
2201
2211
|
|
|
2202
2212
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2203
2213
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2204
|
-
ggml_set_param(
|
|
2214
|
+
ggml_set_param(a);
|
|
2205
2215
|
ggml_set_name(a, "a");
|
|
2206
2216
|
|
|
2207
2217
|
ggml_tensor * out = ggml_sqr(ctx, a);
|
|
@@ -2230,7 +2240,7 @@ struct test_sqrt : public test_case {
|
|
|
2230
2240
|
|
|
2231
2241
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2232
2242
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2233
|
-
ggml_set_param(
|
|
2243
|
+
ggml_set_param(a);
|
|
2234
2244
|
ggml_set_name(a, "a");
|
|
2235
2245
|
|
|
2236
2246
|
ggml_tensor * out = ggml_sqrt(ctx, a);
|
|
@@ -2270,7 +2280,7 @@ struct test_log : public test_case {
|
|
|
2270
2280
|
|
|
2271
2281
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2272
2282
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2273
|
-
ggml_set_param(
|
|
2283
|
+
ggml_set_param(a);
|
|
2274
2284
|
ggml_set_name(a, "a");
|
|
2275
2285
|
|
|
2276
2286
|
ggml_tensor * out = ggml_log(ctx, a);
|
|
@@ -2306,7 +2316,7 @@ struct test_sin : public test_case {
|
|
|
2306
2316
|
|
|
2307
2317
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2308
2318
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2309
|
-
ggml_set_param(
|
|
2319
|
+
ggml_set_param(a);
|
|
2310
2320
|
ggml_set_name(a, "a");
|
|
2311
2321
|
|
|
2312
2322
|
ggml_tensor * out = ggml_sin(ctx, a);
|
|
@@ -2349,7 +2359,7 @@ struct test_cos : public test_case {
|
|
|
2349
2359
|
|
|
2350
2360
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2351
2361
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2352
|
-
ggml_set_param(
|
|
2362
|
+
ggml_set_param(a);
|
|
2353
2363
|
ggml_set_name(a, "a");
|
|
2354
2364
|
|
|
2355
2365
|
ggml_tensor * out = ggml_cos(ctx, a);
|
|
@@ -2429,7 +2439,7 @@ struct test_diag_mask_inf : public test_case {
|
|
|
2429
2439
|
|
|
2430
2440
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2431
2441
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2432
|
-
ggml_set_param(
|
|
2442
|
+
ggml_set_param(a);
|
|
2433
2443
|
ggml_set_name(a, "a");
|
|
2434
2444
|
|
|
2435
2445
|
ggml_tensor * out = ggml_diag_mask_inf(ctx, a, n_past);
|
|
@@ -2468,7 +2478,7 @@ struct test_soft_max : public test_case {
|
|
|
2468
2478
|
|
|
2469
2479
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2470
2480
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2471
|
-
ggml_set_param(
|
|
2481
|
+
ggml_set_param(a);
|
|
2472
2482
|
ggml_set_name(a, "a");
|
|
2473
2483
|
|
|
2474
2484
|
ggml_tensor * mask = nullptr;
|
|
@@ -2550,7 +2560,7 @@ struct test_rope : public test_case {
|
|
|
2550
2560
|
auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
2551
2561
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2552
2562
|
if (forward) {
|
|
2553
|
-
ggml_set_param(
|
|
2563
|
+
ggml_set_param(a);
|
|
2554
2564
|
}
|
|
2555
2565
|
ggml_set_name(a, "a");
|
|
2556
2566
|
|
|
@@ -2559,7 +2569,7 @@ struct test_rope : public test_case {
|
|
|
2559
2569
|
} else {
|
|
2560
2570
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2561
2571
|
if (forward) {
|
|
2562
|
-
ggml_set_param(
|
|
2572
|
+
ggml_set_param(a);
|
|
2563
2573
|
}
|
|
2564
2574
|
ggml_set_name(a, "a");
|
|
2565
2575
|
}
|
|
@@ -2673,7 +2683,7 @@ struct test_pool2d : public test_case {
|
|
|
2673
2683
|
|
|
2674
2684
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2675
2685
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2676
|
-
ggml_set_param(
|
|
2686
|
+
ggml_set_param(input);
|
|
2677
2687
|
ggml_set_name(input, "input");
|
|
2678
2688
|
|
|
2679
2689
|
ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1);
|
|
@@ -2749,7 +2759,7 @@ struct test_im2col : public test_case {
|
|
|
2749
2759
|
|
|
2750
2760
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2751
2761
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2752
|
-
ggml_set_param(
|
|
2762
|
+
ggml_set_param(input);
|
|
2753
2763
|
ggml_set_name(input, "input");
|
|
2754
2764
|
|
|
2755
2765
|
ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
|
|
@@ -2762,6 +2772,48 @@ struct test_im2col : public test_case {
|
|
|
2762
2772
|
}
|
|
2763
2773
|
};
|
|
2764
2774
|
|
|
2775
|
+
// GGML_OP_CONV_2D_DW
|
|
2776
|
+
struct test_conv_2d_dw : public test_case {
|
|
2777
|
+
const std::array<int64_t, 4> ne_input;
|
|
2778
|
+
const std::array<int64_t, 4> ne_kernel;
|
|
2779
|
+
const int stride;
|
|
2780
|
+
const int padding;
|
|
2781
|
+
const int dilation;
|
|
2782
|
+
const bool cwhn;
|
|
2783
|
+
|
|
2784
|
+
std::string vars() override {
|
|
2785
|
+
return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn);
|
|
2786
|
+
}
|
|
2787
|
+
|
|
2788
|
+
test_conv_2d_dw(std::array<int64_t, 4> ne_input = {64, 64, 16, 1},
|
|
2789
|
+
std::array<int64_t, 4> ne_kernel = {3, 3, 1, 16},
|
|
2790
|
+
int stride = 1, int padding = 0, int dilation = 1, bool cwhn = false)
|
|
2791
|
+
: ne_input(ne_input), ne_kernel(ne_kernel), stride(stride), padding(padding), dilation(dilation), cwhn(cwhn) {}
|
|
2792
|
+
|
|
2793
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2794
|
+
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
|
2795
|
+
ggml_set_name(input, "input");
|
|
2796
|
+
|
|
2797
|
+
ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
|
|
2798
|
+
ggml_set_name(kernel, "kernel");
|
|
2799
|
+
|
|
2800
|
+
if (cwhn) {
|
|
2801
|
+
// change memory layout to channel-most-contiguous (CWHN),
|
|
2802
|
+
// then permute it back so NE matches the original input
|
|
2803
|
+
input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
|
|
2804
|
+
input = ggml_permute(ctx, input, 2, 0, 1, 3);
|
|
2805
|
+
kernel = ggml_cont(ctx, ggml_permute(ctx, kernel, 2, 3, 1, 0));
|
|
2806
|
+
kernel = ggml_permute(ctx, kernel, 3, 2, 0, 1);
|
|
2807
|
+
}
|
|
2808
|
+
|
|
2809
|
+
ggml_tensor * out = ggml_conv_2d_dw_direct(
|
|
2810
|
+
ctx, kernel, input,
|
|
2811
|
+
stride, stride, padding, padding, dilation, dilation);
|
|
2812
|
+
ggml_set_name(out, "out");
|
|
2813
|
+
return out;
|
|
2814
|
+
}
|
|
2815
|
+
};
|
|
2816
|
+
|
|
2765
2817
|
// GGML_OP_CONCAT
|
|
2766
2818
|
struct test_concat : public test_case {
|
|
2767
2819
|
const ggml_type type;
|
|
@@ -2884,7 +2936,7 @@ struct test_sum : public test_case {
|
|
|
2884
2936
|
|
|
2885
2937
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2886
2938
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2887
|
-
ggml_set_param(
|
|
2939
|
+
ggml_set_param(a);
|
|
2888
2940
|
ggml_set_name(a, "a");
|
|
2889
2941
|
|
|
2890
2942
|
ggml_tensor * out = ggml_sum(ctx, a);
|
|
@@ -2913,7 +2965,7 @@ struct test_sum_rows : public test_case {
|
|
|
2913
2965
|
|
|
2914
2966
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2915
2967
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2916
|
-
ggml_set_param(
|
|
2968
|
+
ggml_set_param(a);
|
|
2917
2969
|
ggml_set_name(a, "a");
|
|
2918
2970
|
|
|
2919
2971
|
ggml_tensor * out = ggml_sum_rows(ctx, a);
|
|
@@ -2938,7 +2990,7 @@ struct test_mean : public test_case {
|
|
|
2938
2990
|
|
|
2939
2991
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2940
2992
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2941
|
-
ggml_set_param(
|
|
2993
|
+
ggml_set_param(a);
|
|
2942
2994
|
ggml_set_name(a, "a");
|
|
2943
2995
|
|
|
2944
2996
|
ggml_tensor * out = ggml_mean(ctx, a);
|
|
@@ -3084,11 +3136,11 @@ struct test_acc : public test_case {
|
|
|
3084
3136
|
|
|
3085
3137
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3086
3138
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
3087
|
-
ggml_set_param(
|
|
3139
|
+
ggml_set_param(a);
|
|
3088
3140
|
ggml_set_name(a, "a");
|
|
3089
3141
|
|
|
3090
3142
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
3091
|
-
ggml_set_param(
|
|
3143
|
+
ggml_set_param(b);
|
|
3092
3144
|
ggml_set_name(b, "b");
|
|
3093
3145
|
|
|
3094
3146
|
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
|
|
@@ -3325,7 +3377,7 @@ struct test_cross_entropy_loss : public test_case {
|
|
|
3325
3377
|
|
|
3326
3378
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3327
3379
|
ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
3328
|
-
ggml_set_param(
|
|
3380
|
+
ggml_set_param(logits);
|
|
3329
3381
|
ggml_set_name(logits, "logits");
|
|
3330
3382
|
|
|
3331
3383
|
ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -3407,7 +3459,7 @@ struct test_opt_step_adamw : public test_case {
|
|
|
3407
3459
|
|
|
3408
3460
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3409
3461
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
3410
|
-
ggml_set_param(
|
|
3462
|
+
ggml_set_param(a); // Despite tensor a having gradients the output tensor will not.
|
|
3411
3463
|
ggml_set_name(a, "a");
|
|
3412
3464
|
|
|
3413
3465
|
ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
@@ -3972,6 +4024,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3972
4024
|
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
3973
4025
|
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
3974
4026
|
|
|
4027
|
+
test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, false));
|
|
4028
|
+
test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, true));
|
|
4029
|
+
test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, false));
|
|
4030
|
+
test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, true));
|
|
4031
|
+
|
|
3975
4032
|
test_cases.emplace_back(new test_conv_transpose_1d());
|
|
3976
4033
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
|
|
3977
4034
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
|
|
@@ -4546,6 +4603,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
|
4546
4603
|
}
|
|
4547
4604
|
}
|
|
4548
4605
|
|
|
4606
|
+
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
|
|
4607
|
+
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
|
|
4608
|
+
|
|
4549
4609
|
return test_cases;
|
|
4550
4610
|
}
|
|
4551
4611
|
|
|
@@ -181,21 +181,20 @@ int main(void) {
|
|
|
181
181
|
},
|
|
182
182
|
{
|
|
183
183
|
/* .name= */ "ChatGLM4",
|
|
184
|
-
/* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant
|
|
185
|
-
/* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant
|
|
184
|
+
/* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
|
|
185
|
+
/* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n",
|
|
186
186
|
/* .expected_output_jinja= */ "",
|
|
187
187
|
/* .bos_token= */ "",
|
|
188
188
|
/* .eos_token= */ "",
|
|
189
189
|
},
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
// },
|
|
190
|
+
{
|
|
191
|
+
/* .name= */ "GLMEdge",
|
|
192
|
+
/* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
|
|
193
|
+
/* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
|
|
194
|
+
/* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
|
|
195
|
+
/* .bos_token= */ "",
|
|
196
|
+
/* .eos_token= */ "",
|
|
197
|
+
},
|
|
199
198
|
{
|
|
200
199
|
/* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
|
|
201
200
|
/* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
|
|
@@ -832,7 +832,9 @@ static void test_template_output_parsers() {
|
|
|
832
832
|
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
|
833
833
|
common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
834
834
|
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
835
|
-
|
|
835
|
+
common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
836
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
|
837
|
+
common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
836
838
|
|
|
837
839
|
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
838
840
|
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <assert.h>
|
|
3
|
+
|
|
4
|
+
#include "mtmd.h"
|
|
5
|
+
|
|
6
|
+
int main(void) {
|
|
7
|
+
printf("\n\nTesting libmtmd C API...\n");
|
|
8
|
+
printf("--------\n\n");
|
|
9
|
+
|
|
10
|
+
struct mtmd_context_params params = mtmd_context_params_default();
|
|
11
|
+
printf("Default image marker: %s\n", params.image_marker);
|
|
12
|
+
|
|
13
|
+
mtmd_input_chunks * chunks = mtmd_test_create_input_chunks();
|
|
14
|
+
|
|
15
|
+
if (!chunks) {
|
|
16
|
+
fprintf(stderr, "Failed to create input chunks\n");
|
|
17
|
+
return 1;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
size_t n_chunks = mtmd_input_chunks_size(chunks);
|
|
21
|
+
printf("Number of chunks: %zu\n", n_chunks);
|
|
22
|
+
assert(n_chunks > 0);
|
|
23
|
+
|
|
24
|
+
for (size_t i = 0; i < n_chunks; i++) {
|
|
25
|
+
const mtmd_input_chunk * chunk = mtmd_input_chunks_get(chunks, i);
|
|
26
|
+
assert(chunk != NULL);
|
|
27
|
+
enum mtmd_input_chunk_type type = mtmd_input_chunk_get_type(chunk);
|
|
28
|
+
printf("Chunk %zu type: %d\n", i, type);
|
|
29
|
+
|
|
30
|
+
if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
31
|
+
size_t n_tokens;
|
|
32
|
+
const llama_token * tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
|
33
|
+
printf(" Text chunk with %zu tokens\n", n_tokens);
|
|
34
|
+
assert(tokens != NULL);
|
|
35
|
+
assert(n_tokens > 0);
|
|
36
|
+
for (size_t j = 0; j < n_tokens; j++) {
|
|
37
|
+
assert(tokens[j] >= 0);
|
|
38
|
+
printf(" > Token %zu: %d\n", j, tokens[j]);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
} else if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
42
|
+
const mtmd_image_tokens * image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
43
|
+
size_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
|
|
44
|
+
size_t nx = mtmd_image_tokens_get_nx(image_tokens);
|
|
45
|
+
size_t ny = mtmd_image_tokens_get_ny(image_tokens);
|
|
46
|
+
const char * id = mtmd_image_tokens_get_id(image_tokens);
|
|
47
|
+
assert(n_tokens > 0);
|
|
48
|
+
assert(nx > 0);
|
|
49
|
+
assert(ny > 0);
|
|
50
|
+
assert(id != NULL);
|
|
51
|
+
printf(" Image chunk with %zu tokens\n", n_tokens);
|
|
52
|
+
printf(" Image size: %zu x %zu\n", nx, ny);
|
|
53
|
+
printf(" Image ID: %s\n", id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Free the chunks
|
|
58
|
+
mtmd_input_chunks_free(chunks);
|
|
59
|
+
|
|
60
|
+
printf("\n\nDONE: test libmtmd C API...\n");
|
|
61
|
+
|
|
62
|
+
return 0;
|
|
63
|
+
}
|