npm - @fugood/llama.node - Versions diffs - 0.3.17 → 0.4.0 - Mend

@fugood/llama.node 0.3.17 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

package/CMakeLists.txt +3 -1
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +39 -2
package/lib/index.js +132 -1
package/lib/index.ts +203 -3
package/package.json +2 -1
package/src/EmbeddingWorker.cpp +1 -1
package/src/LlamaCompletionWorker.cpp +366 -19
package/src/LlamaCompletionWorker.h +30 -10
package/src/LlamaContext.cpp +213 -5
package/src/LlamaContext.h +12 -0
package/src/common.hpp +15 -0
package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
package/src/llama.cpp/.github/workflows/build.yml +41 -762
package/src/llama.cpp/.github/workflows/docker.yml +5 -2
package/src/llama.cpp/.github/workflows/release.yml +716 -0
package/src/llama.cpp/.github/workflows/server.yml +12 -12
package/src/llama.cpp/CMakeLists.txt +5 -17
package/src/llama.cpp/cmake/build-info.cmake +8 -2
package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
package/src/llama.cpp/common/CMakeLists.txt +31 -3
package/src/llama.cpp/common/arg.cpp +48 -29
package/src/llama.cpp/common/chat.cpp +128 -106
package/src/llama.cpp/common/chat.h +2 -0
package/src/llama.cpp/common/common.cpp +37 -1
package/src/llama.cpp/common/common.h +18 -9
package/src/llama.cpp/common/llguidance.cpp +1 -0
package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
package/src/llama.cpp/common/minja/minja.hpp +69 -36
package/src/llama.cpp/common/regex-partial.cpp +204 -0
package/src/llama.cpp/common/regex-partial.h +56 -0
package/src/llama.cpp/common/sampling.cpp +57 -50
package/src/llama.cpp/examples/CMakeLists.txt +2 -23
package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/training/finetune.cpp +96 -0
package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
package/src/llama.cpp/ggml/include/ggml.h +10 -7
package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
package/src/llama.cpp/ggml/src/ggml.c +29 -20
package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
package/src/llama.cpp/include/llama.h +52 -11
package/src/llama.cpp/requirements/requirements-all.txt +3 -3
package/src/llama.cpp/scripts/xxd.cmake +1 -1
package/src/llama.cpp/src/CMakeLists.txt +1 -0
package/src/llama.cpp/src/llama-adapter.cpp +6 -0
package/src/llama.cpp/src/llama-arch.cpp +3 -0
package/src/llama.cpp/src/llama-batch.cpp +5 -1
package/src/llama.cpp/src/llama-batch.h +2 -1
package/src/llama.cpp/src/llama-chat.cpp +17 -7
package/src/llama.cpp/src/llama-chat.h +1 -0
package/src/llama.cpp/src/llama-context.cpp +389 -501
package/src/llama.cpp/src/llama-context.h +44 -32
package/src/llama.cpp/src/llama-cparams.h +1 -0
package/src/llama.cpp/src/llama-graph.cpp +20 -38
package/src/llama.cpp/src/llama-graph.h +12 -8
package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
package/src/llama.cpp/src/llama-kv-cache.h +271 -85
package/src/llama.cpp/src/llama-memory.h +11 -1
package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
package/src/llama.cpp/src/llama-model-saver.h +37 -0
package/src/llama.cpp/src/llama-model.cpp +316 -69
package/src/llama.cpp/src/llama-model.h +8 -1
package/src/llama.cpp/src/llama-quant.cpp +15 -13
package/src/llama.cpp/src/llama-sampling.cpp +18 -6
package/src/llama.cpp/src/llama-vocab.cpp +42 -4
package/src/llama.cpp/src/llama-vocab.h +6 -0
package/src/llama.cpp/src/llama.cpp +14 -0
package/src/llama.cpp/tests/CMakeLists.txt +10 -2
package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
package/src/llama.cpp/tests/test-chat.cpp +3 -1
package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
package/src/llama.cpp/tests/test-opt.cpp +33 -21
package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
package/src/llama.cpp/tests/test-sampling.cpp +1 -1
package/src/llama.cpp/tools/CMakeLists.txt +39 -0
package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
package/src/llama.cpp/tools/mtmd/clip.h +99 -0
package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/infill/infill.cpp +0 -590
package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
package/src/llama.cpp/examples/llava/clip.h +0 -135
package/src/llama.cpp/examples/llava/llava.cpp +0 -586
package/src/llama.cpp/examples/llava/llava.h +0 -49
package/src/llama.cpp/examples/llava/mtmd.h +0 -168
package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
/package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
/package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
/package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
/package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
/package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0

package/src/llama.cpp/tests/test-backend-ops.cpp CHANGED Viewed

@@ -823,7 +823,7 @@ struct test_case {
         ggml_build_forward_expand(gf, out);
         ggml_graph_cpy(gf, gb);
-        ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false);
+        ggml_build_backward_expand(ctx.get(), gb, nullptr);
         if (expect.size() != 1 || expect[0] != 0.0f) {
             GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
             for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
@@ -1026,7 +1026,7 @@ struct test_example : public test_case {
         // Step 3: return the output tensor.
         return out;
     }
-    // In order to also check the gradients for your op, add calls like ggml_set_param(ctx, a)
+    // In order to also check the gradients for your op, add calls like ggml_set_param(a)
     // immediately after you create the tensors.
     // This is optional and only makes sense if a backward pass has actually been implemented for the new op.
 };
@@ -1058,7 +1058,7 @@ struct test_unary : public test_case {
             auto ne = ne_a; ne[0] *= 3;
             a = ggml_new_tensor(ctx, type, 4, ne.data());
             if (grad_supported) {
-                ggml_set_param(ctx, a);
+                ggml_set_param(a);
             }
             ggml_set_name(a, "a");
@@ -1067,7 +1067,7 @@ struct test_unary : public test_case {
         } else {
             a = ggml_new_tensor(ctx, type, 4, ne_a.data());
             if (grad_supported) {
-                ggml_set_param(ctx, a);
+                ggml_set_param(a);
             }
             ggml_set_name(a, "a");
         }
@@ -1133,7 +1133,7 @@ struct test_get_rows : public test_case {
         const bool grad_supported = ggml_is_matrix(in) && ggml_is_vector(rows);
         if (grad_supported) {
-            ggml_set_param(ctx, in);
+            ggml_set_param(in);
             // rows is a constant input -> no gradients
         }
@@ -1322,7 +1322,7 @@ struct test_repeat : public test_case {
         ggml_set_name(target, "target");
         ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, src);
+        ggml_set_param(src);
         ggml_set_name(src, "src");
         ggml_tensor * out = ggml_repeat(ctx, src, target);
@@ -1406,7 +1406,7 @@ struct test_dup : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, src);
+        ggml_set_param(src);
         ggml_set_name(src, "src");
         if (_use_permute) {
@@ -1442,7 +1442,7 @@ struct test_set : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
-        ggml_set_param(ctx, src);
+        ggml_set_param(src);
         ggml_set_name(src, "src");
         auto ne_dst = ne;
@@ -1450,7 +1450,7 @@ struct test_set : public test_case {
             ne_dst[i] *= 2;
         }
         ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
-        ggml_set_param(ctx, dst);
+        ggml_set_param(dst);
         ggml_set_name(dst, "dst");
         size_t offset = 0;
@@ -1498,7 +1498,7 @@ struct test_cpy : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
-        ggml_set_param(ctx, src);
+        ggml_set_param(src);
         ggml_set_name(src, "src");
         if (_src_use_permute) {
@@ -1536,7 +1536,7 @@ struct test_cont : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, src);
+        ggml_set_param(src);
         ggml_set_name(src, "src");
         src = ggml_transpose(ctx, src);
@@ -1583,8 +1583,8 @@ struct test_bin_bcast : public test_case {
         // The backward pass supports broadcasting only for GGML_ADD:
         const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
         if (grad_supported) {
-            ggml_set_param(ctx, a);
-            ggml_set_param(ctx, b);
+            ggml_set_param(a);
+            ggml_set_param(b);
         }
         ggml_tensor * out = op(ctx, a, b);
@@ -1632,11 +1632,11 @@ struct test_add1 : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * b = ggml_new_tensor_1d(ctx, type, 1);
-        // ggml_set_param(ctx, b); // TODO: implement
+        // ggml_set_param(b); // TODO: implement
         ggml_set_name(b, "b");
         ggml_tensor * out = ggml_add1(ctx, a, b);
@@ -1667,7 +1667,7 @@ struct test_scale : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_scale(ctx, a, scale);
@@ -1762,7 +1762,7 @@ struct test_rms_norm : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         if (v) {
@@ -1981,7 +1981,7 @@ struct test_mul_mat : public test_case {
     const std::array<int64_t, 2> bs;  // dims 3 and 4
     const std::array<int64_t, 2> nr;  // repeat in dims 3 and 4
     const std::array<int64_t, 4> per; // permutation of dimensions
-    const bool v; // whether a is a non-contiguous view
+    const bool v; // whether a and b are non-contiguous views
     std::string vars() override {
         return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v);
@@ -2028,9 +2028,9 @@ struct test_mul_mat : public test_case {
             b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
             if (!ggml_is_quantized(type_a)) {
                 if (bs[1] == 1 && nr[1] == 1) {
-                    ggml_set_param(ctx, a);
+                    ggml_set_param(a);
                 }
-                ggml_set_param(ctx, b);
+                ggml_set_param(b);
             }
             ggml_set_name(a, "a");
             ggml_set_name(b, "b");
@@ -2040,19 +2040,29 @@ struct test_mul_mat : public test_case {
             ggml_set_name(a, "a_permuted");
             ggml_set_name(b, "b_permuted");
         } else {
             if (v) {
-                a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
-                a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
+                a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0],       bs[1]);
+                b = ggml_new_tensor_4d(ctx, type_b, k*2, n, bs[0]*nr[0], bs[1]*nr[1]);
+                if (!ggml_is_quantized(type_a)) {
+                    if (bs[1] == 1 && nr[1] == 1) {
+                        ggml_set_param(a);
+                    }
+                    ggml_set_param(b);
+                }
+                a = ggml_view_4d(ctx, a, k, m, bs[0],       bs[1],       a->nb[1], a->nb[2], a->nb[3], 0);
+                b = ggml_view_4d(ctx, b, k, n, bs[0]*nr[0], bs[1]*nr[1], b->nb[1], b->nb[2], b->nb[3], 0);
             } else {
                 a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0],       bs[1]);
-            }
-            b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
-            if (!ggml_is_quantized(type_a)) {
-                if (bs[1] == 1 && nr[1] == 1) {
-                    ggml_set_param(ctx, a);
+                b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
+                if (!ggml_is_quantized(type_a)) {
+                    if (bs[1] == 1 && nr[1] == 1) {
+                        ggml_set_param(a);
+                    }
+                    ggml_set_param(b);
                 }
-                ggml_set_param(ctx, b);
             }
             ggml_set_name(a, "a");
             ggml_set_name(b, "b");
@@ -2201,7 +2211,7 @@ struct test_sqr : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_sqr(ctx, a);
@@ -2230,7 +2240,7 @@ struct test_sqrt : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_sqrt(ctx, a);
@@ -2270,7 +2280,7 @@ struct test_log : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_log(ctx, a);
@@ -2306,7 +2316,7 @@ struct test_sin : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_sin(ctx, a);
@@ -2349,7 +2359,7 @@ struct test_cos : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_cos(ctx, a);
@@ -2429,7 +2439,7 @@ struct test_diag_mask_inf : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_diag_mask_inf(ctx, a, n_past);
@@ -2468,7 +2478,7 @@ struct test_soft_max : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * mask = nullptr;
@@ -2550,7 +2560,7 @@ struct test_rope : public test_case {
             auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
             a = ggml_new_tensor(ctx, type, 4, ne.data());
             if (forward) {
-                ggml_set_param(ctx, a);
+                ggml_set_param(a);
             }
             ggml_set_name(a, "a");
@@ -2559,7 +2569,7 @@ struct test_rope : public test_case {
         } else {
             a = ggml_new_tensor(ctx, type, 4, ne_a.data());
             if (forward) {
-                ggml_set_param(ctx, a);
+                ggml_set_param(a);
             }
             ggml_set_name(a, "a");
         }
@@ -2673,7 +2683,7 @@ struct test_pool2d : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
-        ggml_set_param(ctx, input);
+        ggml_set_param(input);
         ggml_set_name(input, "input");
         ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1);
@@ -2749,7 +2759,7 @@ struct test_im2col : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
-        ggml_set_param(ctx, input);
+        ggml_set_param(input);
         ggml_set_name(input, "input");
         ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
@@ -2762,6 +2772,48 @@ struct test_im2col : public test_case {
     }
 };
+// GGML_OP_CONV_2D_DW
+struct test_conv_2d_dw : public test_case {
+    const std::array<int64_t, 4> ne_input;
+    const std::array<int64_t, 4> ne_kernel;
+    const int stride;
+    const int padding;
+    const int dilation;
+    const bool cwhn;
+    std::string vars() override {
+        return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn);
+    }
+    test_conv_2d_dw(std::array<int64_t, 4> ne_input = {64, 64, 16, 1},
+            std::array<int64_t, 4> ne_kernel = {3, 3, 1, 16},
+            int stride = 1, int padding = 0, int dilation = 1, bool cwhn = false)
+        : ne_input(ne_input), ne_kernel(ne_kernel), stride(stride), padding(padding), dilation(dilation), cwhn(cwhn) {}
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
+        ggml_set_name(input, "input");
+        ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
+        ggml_set_name(kernel, "kernel");
+        if (cwhn) {
+            // change memory layout to channel-most-contiguous (CWHN),
+            // then permute it back so NE matches the original input
+            input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
+            input = ggml_permute(ctx, input, 2, 0, 1, 3);
+            kernel = ggml_cont(ctx, ggml_permute(ctx, kernel, 2, 3, 1, 0));
+            kernel = ggml_permute(ctx, kernel, 3, 2, 0, 1);
+        }
+        ggml_tensor * out = ggml_conv_2d_dw_direct(
+            ctx, kernel, input,
+            stride, stride, padding, padding, dilation, dilation);
+        ggml_set_name(out, "out");
+        return out;
+    }
+};
 // GGML_OP_CONCAT
 struct test_concat : public test_case {
     const ggml_type type;
@@ -2884,7 +2936,7 @@ struct test_sum : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_sum(ctx, a);
@@ -2913,7 +2965,7 @@ struct test_sum_rows : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_sum_rows(ctx, a);
@@ -2938,7 +2990,7 @@ struct test_mean : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * out = ggml_mean(ctx, a);
@@ -3084,11 +3136,11 @@ struct test_acc : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
-        ggml_set_param(ctx, a);
+        ggml_set_param(a);
         ggml_set_name(a, "a");
         ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
-        ggml_set_param(ctx, b);
+        ggml_set_param(b);
         ggml_set_name(b, "b");
         ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
@@ -3325,7 +3377,7 @@ struct test_cross_entropy_loss : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_param(ctx, logits);
+        ggml_set_param(logits);
         ggml_set_name(logits, "logits");
         ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
@@ -3407,7 +3459,7 @@ struct test_opt_step_adamw : public test_case {
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
-        ggml_set_param(ctx, a); // Despite tensor a having gradients the output tensor will not.
+        ggml_set_param(a); // Despite tensor a having gradients the output tensor will not.
         ggml_set_name(a, "a");
         ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
@@ -3972,6 +4024,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
     // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
+    test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, false));
+    test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, true));
+    test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, false));
+    test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, true));
     test_cases.emplace_back(new test_conv_transpose_1d());
     test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
     test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
@@ -4546,6 +4603,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
         }
     }
+    test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
+    test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
     return test_cases;
 }

package/src/llama.cpp/tests/test-chat-template.cpp CHANGED Viewed

@@ -181,21 +181,20 @@ int main(void) {
         },
         {
             /* .name= */ "ChatGLM4",
-            /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}"),
-            /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
+            /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
+            /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>\n",
             /* .expected_output_jinja= */ "",
             /* .bos_token= */ "",
             /* .eos_token= */ "",
         },
-        // TODO @ngxson : GLMEdge produces poor result without `[gMASK]<sop>`, so we're temporarily using GLM4 template for it. We should fix this in the future.
-        // {
-        //     /* .name= */ "GLMEdge",
-        //     /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
-        //     /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-        //     /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-        //     /* .bos_token= */ "",
-        //     /* .eos_token= */ "",
-        // },
+        {
+            /* .name= */ "GLMEdge",
+            /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
+            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
+            /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
+            /* .bos_token= */ "",
+            /* .eos_token= */ "",
+        },
         {
             /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
             /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),

package/src/llama.cpp/tests/test-chat.cpp CHANGED Viewed

@@ -832,7 +832,9 @@ static void test_template_output_parsers() {
         assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
                       common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-                      common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+            common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
+                        common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
         test_templates(tmpls.get(), end_tokens, message_assist_call, tools,

package/src/llama.cpp/tests/test-mtmd-c-api.c ADDED Viewed

@@ -0,0 +1,63 @@
+#include <stdio.h>
+#include <assert.h>
+#include "mtmd.h"
+int main(void) {
+    printf("\n\nTesting libmtmd C API...\n");
+    printf("--------\n\n");
+    struct mtmd_context_params params = mtmd_context_params_default();
+    printf("Default image marker: %s\n", params.image_marker);
+    mtmd_input_chunks * chunks = mtmd_test_create_input_chunks();
+    if (!chunks) {
+        fprintf(stderr, "Failed to create input chunks\n");
+        return 1;
+    }
+    size_t n_chunks = mtmd_input_chunks_size(chunks);
+    printf("Number of chunks: %zu\n", n_chunks);
+    assert(n_chunks > 0);
+    for (size_t i = 0; i < n_chunks; i++) {
+        const mtmd_input_chunk * chunk = mtmd_input_chunks_get(chunks, i);
+        assert(chunk != NULL);
+        enum mtmd_input_chunk_type type = mtmd_input_chunk_get_type(chunk);
+        printf("Chunk %zu type: %d\n", i, type);
+        if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
+            size_t n_tokens;
+            const llama_token * tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
+            printf("    Text chunk with %zu tokens\n", n_tokens);
+            assert(tokens != NULL);
+            assert(n_tokens > 0);
+            for (size_t j = 0; j < n_tokens; j++) {
+                assert(tokens[j] >= 0);
+                printf("    > Token %zu: %d\n", j, tokens[j]);
+            }
+        } else if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
+            const mtmd_image_tokens * image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
+            size_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
+            size_t nx = mtmd_image_tokens_get_nx(image_tokens);
+            size_t ny = mtmd_image_tokens_get_ny(image_tokens);
+            const char * id = mtmd_image_tokens_get_id(image_tokens);
+            assert(n_tokens > 0);
+            assert(nx > 0);
+            assert(ny > 0);
+            assert(id != NULL);
+            printf("    Image chunk with %zu tokens\n", n_tokens);
+            printf("    Image size: %zu x %zu\n", nx, ny);
+            printf("    Image ID: %s\n", id);
+        }
+    }
+    // Free the chunks
+    mtmd_input_chunks_free(chunks);
+    printf("\n\nDONE: test libmtmd C API...\n");
+    return 0;
+}