@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -19,6 +19,8 @@ static std::string normalize_newlines(const std::string & s) {
|
|
|
19
19
|
#endif
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
#define U8C(x) (const char*)(u8##x)
|
|
23
|
+
|
|
22
24
|
static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
|
|
23
25
|
common_chat_msg msg;
|
|
24
26
|
msg.role = role;
|
|
@@ -35,6 +37,8 @@ int main(void) {
|
|
|
35
37
|
{"assistant", " I am an assistant "},
|
|
36
38
|
{"user", "Another question"},
|
|
37
39
|
};
|
|
40
|
+
|
|
41
|
+
// std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
|
|
38
42
|
struct TestCase {
|
|
39
43
|
std::string name;
|
|
40
44
|
std::string template_str;
|
|
@@ -177,8 +181,8 @@ int main(void) {
|
|
|
177
181
|
},
|
|
178
182
|
{
|
|
179
183
|
/* .name= */ "ChatGLM4",
|
|
180
|
-
/* .template_str= */
|
|
181
|
-
/* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant
|
|
184
|
+
/* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
|
|
185
|
+
/* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n",
|
|
182
186
|
/* .expected_output_jinja= */ "",
|
|
183
187
|
/* .bos_token= */ "",
|
|
184
188
|
/* .eos_token= */ "",
|
|
@@ -193,8 +197,8 @@ int main(void) {
|
|
|
193
197
|
},
|
|
194
198
|
{
|
|
195
199
|
/* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
|
|
196
|
-
/* .template_str= */
|
|
197
|
-
/* .expected_output= */
|
|
200
|
+
/* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
|
|
201
|
+
/* .expected_output= */ U8C("You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>"),
|
|
198
202
|
/* .expected_output_jinja= */ "",
|
|
199
203
|
/* .bos_token= */ "",
|
|
200
204
|
/* .eos_token= */ "",
|
|
@@ -202,7 +206,7 @@ int main(void) {
|
|
|
202
206
|
{
|
|
203
207
|
/* .name= */ "DeepSeek-V2",
|
|
204
208
|
/* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
|
|
205
|
-
/* .expected_output= */
|
|
209
|
+
/* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:"),
|
|
206
210
|
/* .expected_output_jinja= */ "",
|
|
207
211
|
/* .bos_token= */ "",
|
|
208
212
|
/* .eos_token= */ "<|end▁of▁sentence|>",
|
|
@@ -256,7 +260,7 @@ int main(void) {
|
|
|
256
260
|
},
|
|
257
261
|
{
|
|
258
262
|
/* .name= */ "Infinigence/Megrez-3B-Instruct",
|
|
259
|
-
/* .template_str= */
|
|
263
|
+
/* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"),
|
|
260
264
|
/* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
|
|
261
265
|
/* .expected_output_jinja= */ "",
|
|
262
266
|
/* .bos_token= */ "",
|
|
@@ -270,6 +274,22 @@ int main(void) {
|
|
|
270
274
|
/* .bos_token= */ "",
|
|
271
275
|
/* .eos_token= */ "",
|
|
272
276
|
},
|
|
277
|
+
{
|
|
278
|
+
/* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
|
|
279
|
+
/* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n",
|
|
280
|
+
/* .expected_output= */ "<s> Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
|
|
281
|
+
/* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
|
|
282
|
+
/* .bos_token= */ "",
|
|
283
|
+
/* .eos_token= */ "",
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
/* .name= */ "inclusionAI/Ling-lite",
|
|
287
|
+
/* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '<role>' + role + '</role>' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ '<role>ASSISTANT</role>' }}{% endif %}",
|
|
288
|
+
/* .expected_output= */ "<role>SYSTEM</role>You are a helpful assistant<role>HUMAN</role>Hello<role>ASSISTANT</role>Hi there<role>HUMAN</role>Who are you<role>ASSISTANT</role> I am an assistant <role>HUMAN</role>Another question<role>ASSISTANT</role>",
|
|
289
|
+
/* .expected_output_jinja= */ "",
|
|
290
|
+
/* .bos_token= */ "",
|
|
291
|
+
/* .eos_token= */ "",
|
|
292
|
+
},
|
|
273
293
|
};
|
|
274
294
|
std::vector<char> formatted_chat(1024);
|
|
275
295
|
int32_t res;
|
|
@@ -11,8 +11,9 @@
|
|
|
11
11
|
#include <string>
|
|
12
12
|
|
|
13
13
|
#include "chat.h"
|
|
14
|
-
|
|
15
|
-
#include "unicode.h"
|
|
14
|
+
|
|
15
|
+
#include "../src/unicode.h"
|
|
16
|
+
#include "../src/llama-grammar.h"
|
|
16
17
|
|
|
17
18
|
using json = nlohmann::ordered_json;
|
|
18
19
|
|
|
@@ -569,6 +570,7 @@ static void test_template_output_parsers() {
|
|
|
569
570
|
{
|
|
570
571
|
// Not supported yet
|
|
571
572
|
auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
|
|
573
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
572
574
|
assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
573
575
|
}
|
|
574
576
|
{
|
|
@@ -665,6 +667,7 @@ static void test_template_output_parsers() {
|
|
|
665
667
|
auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
|
|
666
668
|
std::vector<std::string> end_tokens{ "<|im_end|>" };
|
|
667
669
|
|
|
670
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
668
671
|
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
669
672
|
assert_equals(
|
|
670
673
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
|
@@ -793,6 +796,7 @@ static void test_template_output_parsers() {
|
|
|
793
796
|
auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
|
|
794
797
|
std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
|
|
795
798
|
|
|
799
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
796
800
|
assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
797
801
|
assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
|
798
802
|
common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
|
|
@@ -815,6 +819,7 @@ static void test_template_output_parsers() {
|
|
|
815
819
|
std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
|
|
816
820
|
|
|
817
821
|
assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
822
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
818
823
|
|
|
819
824
|
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
820
825
|
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
|
@@ -824,8 +829,12 @@ static void test_template_output_parsers() {
|
|
|
824
829
|
auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
|
|
825
830
|
std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
|
|
826
831
|
|
|
832
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
|
833
|
+
common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
827
834
|
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
828
|
-
|
|
835
|
+
common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
836
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
|
837
|
+
common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
829
838
|
|
|
830
839
|
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
831
840
|
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
|
@@ -851,6 +860,7 @@ static void test_template_output_parsers() {
|
|
|
851
860
|
auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
|
|
852
861
|
std::vector<std::string> end_tokens{ "<|eot_id|>" };
|
|
853
862
|
|
|
863
|
+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
854
864
|
assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
855
865
|
|
|
856
866
|
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
@@ -862,6 +872,7 @@ static void test_template_output_parsers() {
|
|
|
862
872
|
auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
|
|
863
873
|
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
|
|
864
874
|
|
|
875
|
+
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
865
876
|
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
866
877
|
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
|
|
867
878
|
|
|
@@ -891,6 +902,7 @@ static void test_template_output_parsers() {
|
|
|
891
902
|
auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
|
|
892
903
|
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
|
|
893
904
|
|
|
905
|
+
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
|
894
906
|
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
|
895
907
|
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
|
|
896
908
|
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
# undef NDEBUG
|
|
3
3
|
#endif
|
|
4
4
|
|
|
5
|
-
#include "unicode.h"
|
|
6
5
|
#include "sampling.h"
|
|
7
6
|
|
|
8
7
|
#include <cassert>
|
|
@@ -84,7 +83,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
|
|
|
84
83
|
|
|
85
84
|
fprintf(stderr,
|
|
86
85
|
"\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following "
|
|
87
|
-
"command: ./
|
|
86
|
+
"command: ./test-gbnf-validator test-grammar-integration.grammar.gbnf "
|
|
88
87
|
"test-grammar-integration.string.txt\n\n");
|
|
89
88
|
} else {
|
|
90
89
|
fprintf(stdout, "✅︎\n");
|
|
@@ -1086,6 +1085,65 @@ static void test_json_schema() {
|
|
|
1086
1085
|
});
|
|
1087
1086
|
}
|
|
1088
1087
|
|
|
1088
|
+
static void one_hot(llama_token_data_array & tok_arr, llama_token selected) {
|
|
1089
|
+
auto n_vocab = tok_arr.size;
|
|
1090
|
+
|
|
1091
|
+
tok_arr.selected = -1;
|
|
1092
|
+
tok_arr.sorted = false;
|
|
1093
|
+
for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
|
|
1094
|
+
tok_arr.data[token_id].id = token_id;
|
|
1095
|
+
tok_arr.data[token_id].logit = 0.0f;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
tok_arr.data[selected].logit = 100.0f;
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
static void test_sampler_chain(void) {
|
|
1102
|
+
auto sparams = llama_sampler_chain_default_params();
|
|
1103
|
+
sparams.no_perf = false;
|
|
1104
|
+
llama_sampler * sampler = llama_sampler_chain_init(sparams);
|
|
1105
|
+
|
|
1106
|
+
const auto grammar_data = R"(%llguidance {}
|
|
1107
|
+
start: /[A-Z ]*/)";
|
|
1108
|
+
|
|
1109
|
+
llama_sampler_chain_add(sampler, llama_sampler_init_llg(vocab, "lark", grammar_data));
|
|
1110
|
+
llama_sampler_chain_add(sampler, llama_sampler_init_dist(42));
|
|
1111
|
+
|
|
1112
|
+
auto input = "ALL YOUR BASE ARE BELONG TO US";
|
|
1113
|
+
auto tokens = common_tokenize(vocab, input, false, false);
|
|
1114
|
+
|
|
1115
|
+
auto n_vocab = llama_vocab_n_tokens(vocab);
|
|
1116
|
+
|
|
1117
|
+
std::vector<llama_token_data> cur;
|
|
1118
|
+
cur.reserve(n_vocab);
|
|
1119
|
+
for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
|
|
1120
|
+
cur.emplace_back(llama_token_data{ token_id, 0.0f, 0.0f });
|
|
1121
|
+
}
|
|
1122
|
+
auto tok_arr = llama_token_data_array{ cur.data(), cur.size(), -1, false };
|
|
1123
|
+
|
|
1124
|
+
for (const auto token : tokens) {
|
|
1125
|
+
one_hot(tok_arr, token);
|
|
1126
|
+
|
|
1127
|
+
fprintf(stderr, "applying token: %d\n", token);
|
|
1128
|
+
llama_sampler_apply(sampler, &tok_arr);
|
|
1129
|
+
|
|
1130
|
+
auto idx = tok_arr.selected;
|
|
1131
|
+
fprintf(stderr, " -> %d %f\n", cur[idx].id, cur[idx].logit);
|
|
1132
|
+
assert(cur[tok_arr.selected].id == token);
|
|
1133
|
+
llama_sampler_accept(sampler, token);
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
auto tok_eos = llama_vocab_eot(vocab);
|
|
1137
|
+
if (tok_eos == LLAMA_TOKEN_NULL) {
|
|
1138
|
+
tok_eos = llama_vocab_eos(vocab);
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
one_hot(tok_arr, tok_eos);
|
|
1142
|
+
|
|
1143
|
+
llama_sampler_apply(sampler, &tok_arr);
|
|
1144
|
+
assert(cur[tok_arr.selected].id == tok_eos);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1089
1147
|
int main(int argc, const char ** argv) {
|
|
1090
1148
|
fprintf(stdout, "Running llguidance integration tests...\n");
|
|
1091
1149
|
|
|
@@ -1135,6 +1193,9 @@ int main(int argc, const char ** argv) {
|
|
|
1135
1193
|
test_special_chars();
|
|
1136
1194
|
test_quantifiers();
|
|
1137
1195
|
test_json_schema();
|
|
1196
|
+
|
|
1197
|
+
test_sampler_chain();
|
|
1198
|
+
|
|
1138
1199
|
fprintf(stdout, "All tests passed.\n");
|
|
1139
1200
|
return 0;
|
|
1140
1201
|
}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
#include "json-schema-to-grammar.h"
|
|
6
6
|
|
|
7
|
-
#include "llama-grammar.h"
|
|
7
|
+
#include "../src/llama-grammar.h"
|
|
8
8
|
|
|
9
9
|
#include <cassert>
|
|
10
10
|
#include <fstream>
|
|
@@ -597,6 +597,22 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|
|
597
597
|
)"""
|
|
598
598
|
});
|
|
599
599
|
|
|
600
|
+
test({
|
|
601
|
+
SUCCESS,
|
|
602
|
+
"maxItems 0",
|
|
603
|
+
R"""({
|
|
604
|
+
"items": {
|
|
605
|
+
"type": "boolean"
|
|
606
|
+
},
|
|
607
|
+
"maxItems": 0
|
|
608
|
+
})""",
|
|
609
|
+
R"""(
|
|
610
|
+
boolean ::= ("true" | "false") space
|
|
611
|
+
root ::= "[" space "]" space
|
|
612
|
+
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
|
613
|
+
)"""
|
|
614
|
+
});
|
|
615
|
+
|
|
600
616
|
test({
|
|
601
617
|
SUCCESS,
|
|
602
618
|
"maxItems 1",
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <assert.h>
|
|
3
|
+
|
|
4
|
+
#include "mtmd.h"
|
|
5
|
+
|
|
6
|
+
int main(void) {
|
|
7
|
+
printf("\n\nTesting libmtmd C API...\n");
|
|
8
|
+
printf("--------\n\n");
|
|
9
|
+
|
|
10
|
+
struct mtmd_context_params params = mtmd_context_params_default();
|
|
11
|
+
printf("Default image marker: %s\n", params.image_marker);
|
|
12
|
+
|
|
13
|
+
mtmd_input_chunks * chunks = mtmd_test_create_input_chunks();
|
|
14
|
+
|
|
15
|
+
if (!chunks) {
|
|
16
|
+
fprintf(stderr, "Failed to create input chunks\n");
|
|
17
|
+
return 1;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
size_t n_chunks = mtmd_input_chunks_size(chunks);
|
|
21
|
+
printf("Number of chunks: %zu\n", n_chunks);
|
|
22
|
+
assert(n_chunks > 0);
|
|
23
|
+
|
|
24
|
+
for (size_t i = 0; i < n_chunks; i++) {
|
|
25
|
+
const mtmd_input_chunk * chunk = mtmd_input_chunks_get(chunks, i);
|
|
26
|
+
assert(chunk != NULL);
|
|
27
|
+
enum mtmd_input_chunk_type type = mtmd_input_chunk_get_type(chunk);
|
|
28
|
+
printf("Chunk %zu type: %d\n", i, type);
|
|
29
|
+
|
|
30
|
+
if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
31
|
+
size_t n_tokens;
|
|
32
|
+
const llama_token * tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
|
33
|
+
printf(" Text chunk with %zu tokens\n", n_tokens);
|
|
34
|
+
assert(tokens != NULL);
|
|
35
|
+
assert(n_tokens > 0);
|
|
36
|
+
for (size_t j = 0; j < n_tokens; j++) {
|
|
37
|
+
assert(tokens[j] >= 0);
|
|
38
|
+
printf(" > Token %zu: %d\n", j, tokens[j]);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
} else if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
42
|
+
const mtmd_image_tokens * image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
43
|
+
size_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
|
|
44
|
+
size_t nx = mtmd_image_tokens_get_nx(image_tokens);
|
|
45
|
+
size_t ny = mtmd_image_tokens_get_ny(image_tokens);
|
|
46
|
+
const char * id = mtmd_image_tokens_get_id(image_tokens);
|
|
47
|
+
assert(n_tokens > 0);
|
|
48
|
+
assert(nx > 0);
|
|
49
|
+
assert(ny > 0);
|
|
50
|
+
assert(id != NULL);
|
|
51
|
+
printf(" Image chunk with %zu tokens\n", n_tokens);
|
|
52
|
+
printf(" Image size: %zu x %zu\n", nx, ny);
|
|
53
|
+
printf(" Image ID: %s\n", id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Free the chunks
|
|
58
|
+
mtmd_input_chunks_free(chunks);
|
|
59
|
+
|
|
60
|
+
printf("\n\nDONE: test libmtmd C API...\n");
|
|
61
|
+
|
|
62
|
+
return 0;
|
|
63
|
+
}
|
|
@@ -57,7 +57,8 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
57
57
|
enum ggml_opt_loss_type loss_type = GGML_OPT_LOSS_TYPE_SUM) {
|
|
58
58
|
std::vector<ggml_opt_dataset_t> datasets(ndata);
|
|
59
59
|
for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
|
|
60
|
-
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
60
|
+
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
61
|
+
GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
|
|
61
62
|
|
|
62
63
|
float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
|
|
63
64
|
float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
|
|
@@ -74,7 +75,8 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
74
75
|
datasets[ndata_shard-1] = dataset;
|
|
75
76
|
}
|
|
76
77
|
|
|
77
|
-
ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
|
|
78
|
+
ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
|
|
79
|
+
GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
|
|
78
80
|
|
|
79
81
|
float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
|
|
80
82
|
|
|
@@ -113,7 +115,7 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
113
115
|
|
|
114
116
|
struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
|
|
115
117
|
ggml_set_name(weights, "weights");
|
|
116
|
-
ggml_set_param(
|
|
118
|
+
ggml_set_param(weights);
|
|
117
119
|
|
|
118
120
|
struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
|
|
119
121
|
|
|
@@ -127,8 +129,11 @@ static helper_ctx_data helper_get_ctx_data(
|
|
|
127
129
|
GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
|
|
128
130
|
const int32_t opt_period = nbatch_logical / nbatch_physical;
|
|
129
131
|
|
|
130
|
-
struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched,
|
|
131
|
-
opt_params.
|
|
132
|
+
struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
|
|
133
|
+
opt_params.ctx_compute = ctx_compute;
|
|
134
|
+
opt_params.inputs = inputs;
|
|
135
|
+
opt_params.outputs = outputs;
|
|
136
|
+
opt_params.opt_period = opt_period;
|
|
132
137
|
if (!optimizer_defaults) {
|
|
133
138
|
opt_params.get_opt_pars = helper_get_test_opt_pars;
|
|
134
139
|
}
|
|
@@ -264,8 +269,9 @@ static std::pair<int, int> test_grad(ggml_backend_sched_t backend_sched, ggml_ba
|
|
|
264
269
|
|
|
265
270
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
266
271
|
const float idataf = idata;
|
|
272
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
267
273
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
268
|
-
|
|
274
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
269
275
|
ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
|
|
270
276
|
}
|
|
271
277
|
|
|
@@ -334,8 +340,9 @@ static std::pair<int, int> test_forward_backward(
|
|
|
334
340
|
} else {
|
|
335
341
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
336
342
|
const float idataf = idata;
|
|
343
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
|
|
337
344
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
338
|
-
|
|
345
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
339
346
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
340
347
|
}
|
|
341
348
|
}
|
|
@@ -367,7 +374,8 @@ static std::pair<int, int> test_forward_backward(
|
|
|
367
374
|
float w0;
|
|
368
375
|
ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
|
|
369
376
|
for (int i = 0; i < 10; ++i) {
|
|
370
|
-
|
|
377
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
378
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
371
379
|
}
|
|
372
380
|
ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
|
|
373
381
|
|
|
@@ -387,8 +395,9 @@ static std::pair<int, int> test_forward_backward(
|
|
|
387
395
|
} else {
|
|
388
396
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
389
397
|
const float idataf = idata;
|
|
398
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
390
399
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
391
|
-
|
|
400
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
392
401
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
393
402
|
}
|
|
394
403
|
}
|
|
@@ -492,14 +501,16 @@ static std::pair<int, int> test_idata_split(ggml_backend_sched_t backend_sched,
|
|
|
492
501
|
int idata = 0;
|
|
493
502
|
for (; idata < idata_split; ++idata) {
|
|
494
503
|
const float idataf = idata;
|
|
504
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
495
505
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
496
|
-
|
|
506
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
497
507
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
498
508
|
}
|
|
499
509
|
for (; idata < ndata; ++idata) {
|
|
500
510
|
const float idataf = idata;
|
|
511
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
|
|
501
512
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
|
|
502
|
-
|
|
513
|
+
ggml_opt_eval(cd.opt_ctx, cd.result2);
|
|
503
514
|
ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
|
|
504
515
|
}
|
|
505
516
|
}
|
|
@@ -573,7 +584,6 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
573
584
|
|
|
574
585
|
struct helper_ctx_data cd = helper_get_ctx_data(
|
|
575
586
|
backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
|
|
576
|
-
struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
|
|
577
587
|
|
|
578
588
|
std::vector<float> grad_history(ndata);
|
|
579
589
|
for (int64_t idata = 0; idata < ndata; ++idata) {
|
|
@@ -584,15 +594,17 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
584
594
|
if (nbatch_physical == 1) {
|
|
585
595
|
for (int idata = 0; idata < ndata; ++idata) {
|
|
586
596
|
const float idataf = idata;
|
|
597
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
587
598
|
ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
|
|
588
|
-
|
|
599
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
589
600
|
ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
|
|
590
601
|
}
|
|
591
602
|
} else if (nbatch_physical == 2) {
|
|
592
603
|
for (int idata = 0; idata < ndata; idata += 2) {
|
|
593
604
|
const float idataf[2] = {float(idata + 0), float(idata + 1)};
|
|
605
|
+
ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
|
|
594
606
|
ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
|
|
595
|
-
|
|
607
|
+
ggml_opt_eval(cd.opt_ctx, cd.result);
|
|
596
608
|
|
|
597
609
|
grad_history[idata + 0] = 0.0f;
|
|
598
610
|
ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
|
|
@@ -617,7 +629,7 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
617
629
|
}
|
|
618
630
|
subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
|
|
619
631
|
subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
|
|
620
|
-
subtest_ok = subtest_ok && almost_equal(grad_history[5],
|
|
632
|
+
subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
|
|
621
633
|
} else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
|
|
622
634
|
if (nbatch_physical == 1) {
|
|
623
635
|
subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
|
|
@@ -630,7 +642,7 @@ static std::pair<int, int> test_gradient_accumulation(
|
|
|
630
642
|
}
|
|
631
643
|
subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
|
|
632
644
|
subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
|
|
633
|
-
subtest_ok = subtest_ok && almost_equal(grad_history[5],
|
|
645
|
+
subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
|
|
634
646
|
} else {
|
|
635
647
|
GGML_ASSERT(false);
|
|
636
648
|
}
|
|
@@ -692,7 +704,8 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
|
|
|
692
704
|
std::mt19937 gen(12345);
|
|
693
705
|
std::normal_distribution<float> nd{0.0f, 0.1f};
|
|
694
706
|
|
|
695
|
-
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
707
|
+
ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
|
|
708
|
+
GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
|
|
696
709
|
|
|
697
710
|
float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
|
|
698
711
|
float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
|
|
@@ -733,15 +746,14 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
|
|
|
733
746
|
|
|
734
747
|
struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
|
|
735
748
|
ggml_set_name(a, "a");
|
|
736
|
-
ggml_set_param(
|
|
749
|
+
ggml_set_param(a);
|
|
737
750
|
|
|
738
751
|
struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
|
|
739
752
|
ggml_set_name(b, "b");
|
|
740
|
-
ggml_set_param(
|
|
753
|
+
ggml_set_param(b);
|
|
741
754
|
|
|
742
755
|
struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
|
|
743
756
|
ggml_set_name(f, "f");
|
|
744
|
-
ggml_set_param(ctx_static, f);
|
|
745
757
|
|
|
746
758
|
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
|
|
747
759
|
const float a0 = 1.0f;
|
|
@@ -853,7 +865,7 @@ int main(void) {
|
|
|
853
865
|
backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
|
|
854
866
|
|
|
855
867
|
ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
|
|
856
|
-
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
|
|
868
|
+
backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
|
|
857
869
|
|
|
858
870
|
printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
|
|
859
871
|
printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
|