npm - @fugood/llama.node - Versions diffs - 0.3.16 → 0.4.0 - Mend

@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (281) hide show

package/CMakeLists.txt +6 -1
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +44 -2
package/lib/index.js +132 -1
package/lib/index.ts +203 -3
package/package.json +2 -1
package/src/EmbeddingWorker.cpp +1 -1
package/src/LlamaCompletionWorker.cpp +374 -19
package/src/LlamaCompletionWorker.h +31 -10
package/src/LlamaContext.cpp +216 -7
package/src/LlamaContext.h +12 -0
package/src/common.hpp +15 -0
package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
package/src/llama.cpp/.github/workflows/build.yml +89 -767
package/src/llama.cpp/.github/workflows/docker.yml +9 -6
package/src/llama.cpp/.github/workflows/release.yml +716 -0
package/src/llama.cpp/.github/workflows/server.yml +19 -23
package/src/llama.cpp/CMakeLists.txt +11 -1
package/src/llama.cpp/cmake/build-info.cmake +8 -2
package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
package/src/llama.cpp/common/CMakeLists.txt +35 -4
package/src/llama.cpp/common/arg.cpp +844 -121
package/src/llama.cpp/common/arg.h +9 -0
package/src/llama.cpp/common/chat.cpp +129 -107
package/src/llama.cpp/common/chat.h +2 -0
package/src/llama.cpp/common/common.cpp +64 -518
package/src/llama.cpp/common/common.h +35 -45
package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
package/src/llama.cpp/common/llguidance.cpp +31 -47
package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
package/src/llama.cpp/common/minja/minja.hpp +186 -127
package/src/llama.cpp/common/regex-partial.cpp +204 -0
package/src/llama.cpp/common/regex-partial.h +56 -0
package/src/llama.cpp/common/sampling.cpp +60 -50
package/src/llama.cpp/docs/build.md +122 -7
package/src/llama.cpp/examples/CMakeLists.txt +2 -32
package/src/llama.cpp/examples/batched/batched.cpp +1 -1
package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
package/src/llama.cpp/examples/sycl/build.sh +2 -2
package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/training/finetune.cpp +96 -0
package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
package/src/llama.cpp/ggml/include/ggml.h +76 -106
package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
package/src/llama.cpp/ggml/src/ggml.c +170 -265
package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
package/src/llama.cpp/include/llama.h +82 -22
package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
package/src/llama.cpp/requirements/requirements-all.txt +5 -3
package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
package/src/llama.cpp/scripts/xxd.cmake +1 -1
package/src/llama.cpp/src/CMakeLists.txt +4 -2
package/src/llama.cpp/src/llama-adapter.cpp +43 -1
package/src/llama.cpp/src/llama-arch.cpp +163 -17
package/src/llama.cpp/src/llama-arch.h +16 -0
package/src/llama.cpp/src/llama-batch.cpp +5 -1
package/src/llama.cpp/src/llama-batch.h +2 -1
package/src/llama.cpp/src/llama-chat.cpp +91 -16
package/src/llama.cpp/src/llama-chat.h +7 -2
package/src/llama.cpp/src/llama-context.cpp +479 -575
package/src/llama.cpp/src/llama-context.h +44 -33
package/src/llama.cpp/src/llama-cparams.h +1 -0
package/src/llama.cpp/src/llama-graph.cpp +209 -157
package/src/llama.cpp/src/llama-graph.h +38 -14
package/src/llama.cpp/src/llama-hparams.h +13 -0
package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
package/src/llama.cpp/src/llama-kv-cache.h +283 -171
package/src/llama.cpp/src/llama-memory.h +12 -2
package/src/llama.cpp/src/llama-mmap.cpp +1 -1
package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
package/src/llama.cpp/src/llama-model-loader.h +5 -3
package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
package/src/llama.cpp/src/llama-model-saver.h +37 -0
package/src/llama.cpp/src/llama-model.cpp +1803 -330
package/src/llama.cpp/src/llama-model.h +21 -2
package/src/llama.cpp/src/llama-quant.cpp +33 -10
package/src/llama.cpp/src/llama-sampling.cpp +25 -7
package/src/llama.cpp/src/llama-vocab.cpp +86 -10
package/src/llama.cpp/src/llama-vocab.h +6 -0
package/src/llama.cpp/src/llama.cpp +15 -1
package/src/llama.cpp/tests/CMakeLists.txt +52 -31
package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
package/src/llama.cpp/tests/test-chat.cpp +15 -3
package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
package/src/llama.cpp/tests/test-opt.cpp +33 -21
package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
package/src/llama.cpp/tests/test-sampling.cpp +1 -1
package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
package/src/llama.cpp/tools/CMakeLists.txt +39 -0
package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
package/src/llama.cpp/tools/mtmd/clip.h +99 -0
package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/infill/infill.cpp +0 -590
package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
package/src/llama.cpp/examples/llava/clip.h +0 -118
package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
package/src/llama.cpp/examples/llava/llava.cpp +0 -574
package/src/llama.cpp/examples/llava/llava.h +0 -49
package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
/package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
/package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
/package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0

package/src/llama.cpp/tests/test-chat-template.cpp CHANGED Viewed

@@ -19,6 +19,8 @@ static std::string normalize_newlines(const std::string & s) {
 #endif
 }
+#define U8C(x) (const char*)(u8##x)
 static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
     common_chat_msg msg;
     msg.role = role;
@@ -35,6 +37,8 @@ int main(void) {
         {"assistant", "   I am an assistant   "},
         {"user", "Another question"},
     };
+    // std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
     struct TestCase {
         std::string name;
         std::string template_str;
@@ -177,8 +181,8 @@ int main(void) {
         },
         {
             /* .name= */ "ChatGLM4",
-            /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
-            /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
+            /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
+            /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>\n",
             /* .expected_output_jinja= */ "",
             /* .bos_token= */ "",
             /* .eos_token= */ "",
@@ -193,8 +197,8 @@ int main(void) {
         },
         {
             /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
-            /* .template_str= */ u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}",
-            /* .expected_output= */ u8"You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>",
+            /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
+            /* .expected_output= */ U8C("You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>"),
             /* .expected_output_jinja= */ "",
             /* .bos_token= */ "",
             /* .eos_token= */ "",
@@ -202,7 +206,7 @@ int main(void) {
         {
             /* .name= */ "DeepSeek-V2",
             /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
-            /* .expected_output= */ u8"You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<｜end▁of▁sentence｜>User: Who are you\n\nAssistant:    I am an assistant   <｜end▁of▁sentence｜>User: Another question\n\nAssistant:",
+            /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<｜end▁of▁sentence｜>User: Who are you\n\nAssistant:    I am an assistant   <｜end▁of▁sentence｜>User: Another question\n\nAssistant:"),
             /* .expected_output_jinja= */ "",
             /* .bos_token= */ "",
             /* .eos_token= */ "<｜end▁of▁sentence｜>",
@@ -256,7 +260,7 @@ int main(void) {
         },
         {
             /* .name= */ "Infinigence/Megrez-3B-Instruct",
-            /* .template_str= */ u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct，将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
+            /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct，将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"),
             /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|>   I am an assistant   <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
             /* .expected_output_jinja= */ "",
             /* .bos_token= */ "",
@@ -270,6 +274,22 @@ int main(void) {
             /* .bos_token= */ "",
             /* .eos_token= */ "",
         },
+        {
+            /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
+            /* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n    {%- set name = tool.function.name %}\n    {%- set description = tool.function.description|default('') %}\n    {%- set parameters = tool.function.parameters|tojson %}\n    {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n    {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n    {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n    {{- tools_prefix }}\n    {%- for tool in tools %}\n        {{- __render_tool(tool) }}\n    {%- endfor %}\n    {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n    {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n    {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n    {{- names.assistant }}\n    {%- set call = message['function_call'] %}\n    {%- if call %}\n        {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n    {%- else %}\n        {{- ' ' + message.content + '\\n\\n' }}\n    {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'user' %}\n        {{- __render_user_message(message) }}\n    {%- endif %}\n    {%- if message.role == 'assistant' and not loop.last %}\n        {{- __render_assistant_message(message) }}\n    {%- endif %}\n    {%- if message.role == 'tool' %}\n        {{- __render_tool_message(message) }}\n    {%- endif %}\n    {%- if loop.last %}\n        {{- ' Ассистент:[SEP]' }}\n    {%- endif %}\n{%- endfor %}\n",
+            /* .expected_output= */ "<s> Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент:    I am an assistant   \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
+            /* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент:    I am an assistant   \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
+            /* .bos_token= */ "",
+            /* .eos_token= */ "",
+        },
+        {
+            /* .name= */ "inclusionAI/Ling-lite",
+            /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '<role>' + role + '</role>' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ '<role>ASSISTANT</role>' }}{% endif %}",
+            /* .expected_output= */ "<role>SYSTEM</role>You are a helpful assistant<role>HUMAN</role>Hello<role>ASSISTANT</role>Hi there<role>HUMAN</role>Who are you<role>ASSISTANT</role>   I am an assistant   <role>HUMAN</role>Another question<role>ASSISTANT</role>",
+            /* .expected_output_jinja= */ "",
+            /* .bos_token= */ "",
+            /* .eos_token= */ "",
+        },
     };
     std::vector<char> formatted_chat(1024);
     int32_t res;

package/src/llama.cpp/tests/test-chat.cpp CHANGED Viewed

@@ -11,8 +11,9 @@
 #include <string>
 #include "chat.h"
-#include "llama-grammar.h"
-#include "unicode.h"
+#include "../src/unicode.h"
+#include "../src/llama-grammar.h"
 using json = nlohmann::ordered_json;
@@ -569,6 +570,7 @@ static void test_template_output_parsers() {
     {
         // Not supported yet
         auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
     }
     {
@@ -665,6 +667,7 @@ static void test_template_output_parsers() {
         auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
         std::vector<std::string> end_tokens{ "<|im_end|>" };
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
         assert_equals(
             COMMON_CHAT_FORMAT_HERMES_2_PRO,
@@ -793,6 +796,7 @@ static void test_template_output_parsers() {
         auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
         std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
                       common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
@@ -815,6 +819,7 @@ static void test_template_output_parsers() {
         std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
         assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
         test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
@@ -824,8 +829,12 @@ static void test_template_output_parsers() {
         auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
         std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
+                      common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-                      common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+            common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
+                        common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
         test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
@@ -851,6 +860,7 @@ static void test_template_output_parsers() {
         auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
         std::vector<std::string>   end_tokens{ "<|eot_id|>" };
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
         test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
@@ -862,6 +872,7 @@ static void test_template_output_parsers() {
         auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
         std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
+        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
@@ -891,6 +902,7 @@ static void test_template_output_parsers() {
         auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
         std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
+        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_tools).format);
         assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);

package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} RENAMED Viewed

@@ -1,5 +1,5 @@
-#include "unicode.h"
-#include "llama-grammar.h"
+#include "../src/unicode.h"
+#include "../src/llama-grammar.h"
 #include <cstdio>
 #include <cstdlib>

package/src/llama.cpp/tests/test-grammar-integration.cpp CHANGED Viewed

@@ -2,10 +2,11 @@
 #undef NDEBUG
 #endif
-#include "unicode.h"
-#include "llama-grammar.h"
 #include "json-schema-to-grammar.h"
+#include "../src/unicode.h"
+#include "../src/llama-grammar.h"
 #include <cassert>
 #include <string>
 #include <vector>

package/src/llama.cpp/tests/test-grammar-llguidance.cpp CHANGED Viewed

@@ -2,7 +2,6 @@
 #    undef NDEBUG
 #endif
-#include "unicode.h"
 #include "sampling.h"
 #include <cassert>
@@ -84,7 +83,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
             fprintf(stderr,
                     "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following "
-                    "command:     ./llama-gbnf-validator test-grammar-integration.grammar.gbnf "
+                    "command:     ./test-gbnf-validator test-grammar-integration.grammar.gbnf "
                     "test-grammar-integration.string.txt\n\n");
         } else {
             fprintf(stdout, "✅︎\n");
@@ -1086,6 +1085,65 @@ static void test_json_schema() {
         });
 }
+static void one_hot(llama_token_data_array & tok_arr, llama_token selected) {
+    auto n_vocab = tok_arr.size;
+    tok_arr.selected = -1;
+    tok_arr.sorted   = false;
+    for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
+        tok_arr.data[token_id].id    = token_id;
+        tok_arr.data[token_id].logit = 0.0f;
+    }
+    tok_arr.data[selected].logit = 100.0f;
+}
+static void test_sampler_chain(void) {
+    auto sparams            = llama_sampler_chain_default_params();
+    sparams.no_perf         = false;
+    llama_sampler * sampler = llama_sampler_chain_init(sparams);
+    const auto grammar_data = R"(%llguidance {}
+start: /[A-Z ]*/)";
+    llama_sampler_chain_add(sampler, llama_sampler_init_llg(vocab, "lark", grammar_data));
+    llama_sampler_chain_add(sampler, llama_sampler_init_dist(42));
+    auto input  = "ALL YOUR BASE ARE BELONG TO US";
+    auto tokens = common_tokenize(vocab, input, false, false);
+    auto n_vocab = llama_vocab_n_tokens(vocab);
+    std::vector<llama_token_data> cur;
+    cur.reserve(n_vocab);
+    for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
+        cur.emplace_back(llama_token_data{ token_id, 0.0f, 0.0f });
+    }
+    auto tok_arr = llama_token_data_array{ cur.data(), cur.size(), -1, false };
+    for (const auto token : tokens) {
+        one_hot(tok_arr, token);
+        fprintf(stderr, "applying token: %d\n", token);
+        llama_sampler_apply(sampler, &tok_arr);
+        auto idx = tok_arr.selected;
+        fprintf(stderr, " -> %d %f\n", cur[idx].id, cur[idx].logit);
+        assert(cur[tok_arr.selected].id == token);
+        llama_sampler_accept(sampler, token);
+    }
+    auto tok_eos = llama_vocab_eot(vocab);
+    if (tok_eos == LLAMA_TOKEN_NULL) {
+        tok_eos = llama_vocab_eos(vocab);
+    }
+    one_hot(tok_arr, tok_eos);
+    llama_sampler_apply(sampler, &tok_arr);
+    assert(cur[tok_arr.selected].id == tok_eos);
+}
 int main(int argc, const char ** argv) {
     fprintf(stdout, "Running llguidance integration tests...\n");
@@ -1135,6 +1193,9 @@ int main(int argc, const char ** argv) {
     test_special_chars();
     test_quantifiers();
     test_json_schema();
+    test_sampler_chain();
     fprintf(stdout, "All tests passed.\n");
     return 0;
 }

package/src/llama.cpp/tests/test-grammar-parser.cpp CHANGED Viewed

@@ -3,7 +3,9 @@
 #endif
 #include "llama.h"
-#include "llama-grammar.h"
+// TODO: shold not include libllama sources
+#include "../src/llama-grammar.h"
 #include <cassert>

package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp CHANGED Viewed

@@ -4,7 +4,7 @@
 #include "json-schema-to-grammar.h"
-#include "llama-grammar.h"
+#include "../src/llama-grammar.h"
 #include <cassert>
 #include <fstream>
@@ -597,6 +597,22 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         )"""
     });
+    test({
+        SUCCESS,
+        "maxItems 0",
+        R"""({
+            "items": {
+                "type": "boolean"
+            },
+            "maxItems": 0
+        })""",
+        R"""(
+            boolean ::= ("true" | "false") space
+            root ::= "[" space  "]" space
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )"""
+    });
     test({
         SUCCESS,
         "maxItems 1",

package/src/llama.cpp/tests/test-llama-grammar.cpp CHANGED Viewed

@@ -3,7 +3,8 @@
 #endif
 #include "llama.h"
-#include "llama-grammar.h"
+#include "../src/llama-grammar.h"
 #include <cassert>
 #include <stdexcept>

package/src/llama.cpp/tests/test-mtmd-c-api.c ADDED Viewed

@@ -0,0 +1,63 @@
+#include <stdio.h>
+#include <assert.h>
+#include "mtmd.h"
+int main(void) {
+    printf("\n\nTesting libmtmd C API...\n");
+    printf("--------\n\n");
+    struct mtmd_context_params params = mtmd_context_params_default();
+    printf("Default image marker: %s\n", params.image_marker);
+    mtmd_input_chunks * chunks = mtmd_test_create_input_chunks();
+    if (!chunks) {
+        fprintf(stderr, "Failed to create input chunks\n");
+        return 1;
+    }
+    size_t n_chunks = mtmd_input_chunks_size(chunks);
+    printf("Number of chunks: %zu\n", n_chunks);
+    assert(n_chunks > 0);
+    for (size_t i = 0; i < n_chunks; i++) {
+        const mtmd_input_chunk * chunk = mtmd_input_chunks_get(chunks, i);
+        assert(chunk != NULL);
+        enum mtmd_input_chunk_type type = mtmd_input_chunk_get_type(chunk);
+        printf("Chunk %zu type: %d\n", i, type);
+        if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
+            size_t n_tokens;
+            const llama_token * tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
+            printf("    Text chunk with %zu tokens\n", n_tokens);
+            assert(tokens != NULL);
+            assert(n_tokens > 0);
+            for (size_t j = 0; j < n_tokens; j++) {
+                assert(tokens[j] >= 0);
+                printf("    > Token %zu: %d\n", j, tokens[j]);
+            }
+        } else if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
+            const mtmd_image_tokens * image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
+            size_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
+            size_t nx = mtmd_image_tokens_get_nx(image_tokens);
+            size_t ny = mtmd_image_tokens_get_ny(image_tokens);
+            const char * id = mtmd_image_tokens_get_id(image_tokens);
+            assert(n_tokens > 0);
+            assert(nx > 0);
+            assert(ny > 0);
+            assert(id != NULL);
+            printf("    Image chunk with %zu tokens\n", n_tokens);
+            printf("    Image size: %zu x %zu\n", nx, ny);
+            printf("    Image ID: %s\n", id);
+        }
+    }
+    // Free the chunks
+    mtmd_input_chunks_free(chunks);
+    printf("\n\nDONE: test libmtmd C API...\n");
+    return 0;
+}

package/src/llama.cpp/tests/test-opt.cpp CHANGED Viewed

@@ -57,7 +57,8 @@ static helper_ctx_data helper_get_ctx_data(
         enum ggml_opt_loss_type loss_type          = GGML_OPT_LOSS_TYPE_SUM) {
     std::vector<ggml_opt_dataset_t> datasets(ndata);
     for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
-        ggml_opt_dataset_t dataset = ggml_opt_dataset_init(ne_datapoint, ne_label, ndata, ndata_shard);
+        ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
+            GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
         float * data   = ggml_get_data_f32(ggml_opt_dataset_data(  dataset));
         float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
@@ -74,7 +75,8 @@ static helper_ctx_data helper_get_ctx_data(
         datasets[ndata_shard-1] = dataset;
     }
-    ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(1, 0, ndata, /*ndata_shard =*/ 1);
+    ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
+        GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
     float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
@@ -113,7 +115,7 @@ static helper_ctx_data helper_get_ctx_data(
     struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
     ggml_set_name(weights, "weights");
-    ggml_set_param(ctx_static, weights);
+    ggml_set_param(weights);
     struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
@@ -127,8 +129,11 @@ static helper_ctx_data helper_get_ctx_data(
     GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
     const int32_t opt_period = nbatch_logical / nbatch_physical;
-    struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, ctx_compute, inputs, outputs, loss_type);
-    opt_params.opt_period = opt_period;
+    struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
+    opt_params.ctx_compute = ctx_compute;
+    opt_params.inputs      = inputs;
+    opt_params.outputs     = outputs;
+    opt_params.opt_period  = opt_period;
     if (!optimizer_defaults) {
         opt_params.get_opt_pars = helper_get_test_opt_pars;
     }
@@ -264,8 +269,9 @@ static std::pair<int, int> test_grad(ggml_backend_sched_t backend_sched, ggml_ba
     for (int idata = 0; idata < ndata; ++idata) {
         const float idataf = idata;
+        ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
         ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
-        ggml_opt_forward_backward(cd.opt_ctx, cd.result);
+        ggml_opt_eval(cd.opt_ctx, cd.result);
         ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
     }
@@ -334,8 +340,9 @@ static std::pair<int, int> test_forward_backward(
     } else {
         for (int idata = 0; idata < ndata; ++idata) {
             const float idataf = idata;
+            ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
             ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
-            ggml_opt_forward(cd.opt_ctx, cd.result);
+            ggml_opt_eval(cd.opt_ctx, cd.result);
             ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
         }
     }
@@ -367,7 +374,8 @@ static std::pair<int, int> test_forward_backward(
     float w0;
     ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
     for (int i = 0; i < 10; ++i) {
-        ggml_opt_forward_backward(cd.opt_ctx, nullptr);
+        ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
+        ggml_opt_eval(cd.opt_ctx, cd.result);
     }
     ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
@@ -387,8 +395,9 @@ static std::pair<int, int> test_forward_backward(
     } else {
         for (int idata = 0; idata < ndata; ++idata) {
             const float idataf = idata;
+            ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
             ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
-            ggml_opt_forward_backward(cd.opt_ctx, cd.result);
+            ggml_opt_eval(cd.opt_ctx, cd.result);
             ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
         }
     }
@@ -492,14 +501,16 @@ static std::pair<int, int> test_idata_split(ggml_backend_sched_t backend_sched,
             int idata = 0;
             for (; idata < idata_split; ++idata) {
                 const float idataf = idata;
+                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
                 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
-                ggml_opt_forward_backward(cd.opt_ctx, cd.result);
+                ggml_opt_eval(cd.opt_ctx, cd.result);
                 ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
             }
             for (; idata < ndata; ++idata) {
                 const float idataf = idata;
+                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
                 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
-                ggml_opt_forward(cd.opt_ctx, cd.result2);
+                ggml_opt_eval(cd.opt_ctx, cd.result2);
                 ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
             }
         }
@@ -573,7 +584,6 @@ static std::pair<int, int> test_gradient_accumulation(
     struct helper_ctx_data cd = helper_get_ctx_data(
         backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
-    struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
     std::vector<float> grad_history(ndata);
     for (int64_t idata = 0; idata < ndata; ++idata) {
@@ -584,15 +594,17 @@ static std::pair<int, int> test_gradient_accumulation(
         if (nbatch_physical == 1) {
             for (int idata = 0; idata < ndata; ++idata) {
                 const float idataf = idata;
+                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
                 ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
-                ggml_opt_forward_backward(cd.opt_ctx, cd.result);
+                ggml_opt_eval(cd.opt_ctx, cd.result);
                 ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
             }
         } else if (nbatch_physical == 2) {
             for (int idata = 0; idata < ndata; idata += 2) {
                 const float idataf[2] = {float(idata + 0), float(idata + 1)};
+                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
                 ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
-                ggml_opt_forward_backward(cd.opt_ctx, cd.result);
+                ggml_opt_eval(cd.opt_ctx, cd.result);
                 grad_history[idata + 0] = 0.0f;
                 ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
@@ -617,7 +629,7 @@ static std::pair<int, int> test_gradient_accumulation(
                 }
                 subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
                 subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
-                subtest_ok = subtest_ok && almost_equal(grad_history[5], 0.0, atol);
+                subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
             } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
                 if (nbatch_physical == 1) {
                     subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
@@ -630,7 +642,7 @@ static std::pair<int, int> test_gradient_accumulation(
                 }
                 subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
                 subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
-                subtest_ok = subtest_ok && almost_equal(grad_history[5], 0.0/ndata, atol);
+                subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
             } else {
                 GGML_ASSERT(false);
             }
@@ -692,7 +704,8 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
     std::mt19937 gen(12345);
     std::normal_distribution<float> nd{0.0f, 0.1f};
-    ggml_opt_dataset_t dataset = ggml_opt_dataset_init(1, 1, ndata_regression, ndata_regression);
+    ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
+        GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
     float * data   = ggml_get_data_f32(ggml_opt_dataset_data(  dataset));
     float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
@@ -733,15 +746,14 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
     struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
     ggml_set_name(a, "a");
-    ggml_set_param(ctx_static, a);
+    ggml_set_param(a);
     struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
     ggml_set_name(b, "b");
-    ggml_set_param(ctx_static, b);
+    ggml_set_param(b);
     struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
     ggml_set_name(f, "f");
-    ggml_set_param(ctx_static, f);
     ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
     const float a0 = 1.0f;
@@ -853,7 +865,7 @@ int main(void) {
         backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
         ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
-            backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
+            backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
         printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
         printf("  Device description: %s\n", ggml_backend_dev_description(devs[i]));

package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} RENAMED Viewed

@@ -1,8 +1,10 @@
 #include "ggml.h"
+#include "ggml-cpu.h"
 #include "llama.h"
-#include "llama-model.h"
 #include "common.h"
+#include "../src/llama-model.h"
 #include <algorithm>
 #include <cassert>
 #include <cinttypes>