@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
#include <stddef.h>
|
|
5
|
+
#include <stdint.h>
|
|
6
|
+
|
|
7
|
+
struct clip_ctx;
|
|
8
|
+
|
|
9
|
+
struct clip_image_size {
|
|
10
|
+
int width;
|
|
11
|
+
int height;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
struct clip_image_f32;
|
|
15
|
+
struct clip_image_u8_batch;
|
|
16
|
+
struct clip_image_f32_batch;
|
|
17
|
+
|
|
18
|
+
struct clip_context_params {
|
|
19
|
+
bool use_gpu;
|
|
20
|
+
enum ggml_log_level verbosity;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params);
|
|
24
|
+
|
|
25
|
+
void clip_free(struct clip_ctx * ctx);
|
|
26
|
+
|
|
27
|
+
size_t clip_embd_nbytes(const struct clip_ctx * ctx);
|
|
28
|
+
size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_w, int img_h);
|
|
29
|
+
|
|
30
|
+
int32_t clip_get_image_size (const struct clip_ctx * ctx);
|
|
31
|
+
int32_t clip_get_patch_size (const struct clip_ctx * ctx);
|
|
32
|
+
int32_t clip_get_hidden_size(const struct clip_ctx * ctx);
|
|
33
|
+
|
|
34
|
+
// TODO: should be enum, not string
|
|
35
|
+
const char * clip_patch_merge_type(const struct clip_ctx * ctx);
|
|
36
|
+
|
|
37
|
+
const int32_t * clip_image_grid(const struct clip_ctx * ctx);
|
|
38
|
+
size_t get_clip_image_grid_size(const struct clip_ctx * ctx);
|
|
39
|
+
|
|
40
|
+
int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
41
|
+
|
|
42
|
+
// for M-RoPE, this will be the number of token positions in X and Y directions
|
|
43
|
+
// for other models, X will be the total number of tokens and Y will be 1
|
|
44
|
+
int clip_n_output_tokens_x(const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
45
|
+
int clip_n_output_tokens_y(const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
46
|
+
|
|
47
|
+
// this should be equal to the embedding dimension of the text model
|
|
48
|
+
int clip_n_mmproj_embd(const struct clip_ctx * ctx);
|
|
49
|
+
|
|
50
|
+
int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
|
|
51
|
+
void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
|
|
52
|
+
struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip);
|
|
53
|
+
|
|
54
|
+
struct clip_image_size * clip_image_size_init(void);
|
|
55
|
+
struct clip_image_u8 * clip_image_u8_init (void);
|
|
56
|
+
struct clip_image_f32 * clip_image_f32_init(void);
|
|
57
|
+
struct clip_image_f32_batch * clip_image_f32_batch_init(void); // only used by libllava
|
|
58
|
+
|
|
59
|
+
// nx, ny are the output image dimensions
|
|
60
|
+
unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny);
|
|
61
|
+
|
|
62
|
+
void clip_image_size_free (struct clip_image_size * img_size);
|
|
63
|
+
void clip_image_u8_free (struct clip_image_u8 * img);
|
|
64
|
+
void clip_image_f32_free(struct clip_image_f32 * img);
|
|
65
|
+
void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
|
|
66
|
+
void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
|
|
67
|
+
|
|
68
|
+
// use for accessing underlay data of clip_image_f32_batch
|
|
69
|
+
size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch); // equivalent to batch->size()
|
|
70
|
+
size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->nx
|
|
71
|
+
size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny
|
|
72
|
+
struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
|
|
76
|
+
* The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
|
|
77
|
+
*/
|
|
78
|
+
void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, struct clip_image_u8 * img);
|
|
79
|
+
|
|
80
|
+
bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
|
|
81
|
+
|
|
82
|
+
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
|
|
83
|
+
bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
|
|
84
|
+
|
|
85
|
+
/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
|
|
86
|
+
bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
|
|
87
|
+
|
|
88
|
+
struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
|
|
89
|
+
|
|
90
|
+
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
|
|
91
|
+
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
|
|
92
|
+
|
|
93
|
+
int clip_is_minicpmv(const struct clip_ctx * ctx);
|
|
94
|
+
bool clip_is_glm(const struct clip_ctx * ctx);
|
|
95
|
+
bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
|
96
|
+
bool clip_is_llava(const struct clip_ctx * ctx);
|
|
97
|
+
bool clip_is_gemma3(const struct clip_ctx * ctx);
|
|
98
|
+
|
|
99
|
+
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#include <cstdio>
|
|
2
|
+
#include <string>
|
|
3
|
+
|
|
4
|
+
int main(int argc, char** argv) {
|
|
5
|
+
std::string filename = "main";
|
|
6
|
+
if (argc >= 1) {
|
|
7
|
+
filename = argv[0];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Get only the program name from the full path
|
|
11
|
+
size_t pos = filename.find_last_of("/\\");
|
|
12
|
+
if (pos != std::string::npos) {
|
|
13
|
+
filename = filename.substr(pos+1);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
fprintf(stdout, "\n");
|
|
17
|
+
fprintf(stdout, "WARNING: The binary '%s' is deprecated.\n", filename.c_str());
|
|
18
|
+
fprintf(stdout, "Please use 'llama-mtmd-cli' instead.\n");
|
|
19
|
+
fprintf(stdout, "\n");
|
|
20
|
+
|
|
21
|
+
return EXIT_FAILURE;
|
|
22
|
+
}
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
#include "arg.h"
|
|
2
|
+
#include "log.h"
|
|
3
|
+
#include "common.h"
|
|
4
|
+
#include "sampling.h"
|
|
5
|
+
#include "llama.h"
|
|
6
|
+
#include "ggml.h"
|
|
7
|
+
#include "console.h"
|
|
8
|
+
#include "chat.h"
|
|
9
|
+
#include "mtmd.h"
|
|
10
|
+
|
|
11
|
+
#include <vector>
|
|
12
|
+
#include <limits.h>
|
|
13
|
+
#include <cinttypes>
|
|
14
|
+
|
|
15
|
+
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
|
16
|
+
#include <signal.h>
|
|
17
|
+
#include <unistd.h>
|
|
18
|
+
#elif defined (_WIN32)
|
|
19
|
+
#define WIN32_LEAN_AND_MEAN
|
|
20
|
+
#ifndef NOMINMAX
|
|
21
|
+
#define NOMINMAX
|
|
22
|
+
#endif
|
|
23
|
+
#include <windows.h>
|
|
24
|
+
#include <signal.h>
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
// volatile, because of signal being an interrupt
|
|
28
|
+
static volatile bool g_is_generating = false;
|
|
29
|
+
static volatile bool g_is_interrupted = false;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Please note that this is NOT a production-ready stuff.
|
|
33
|
+
* It is a playground for trying multimodal support in llama.cpp.
|
|
34
|
+
* For contributors: please keep this code simple and easy to understand.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
static void show_additional_info(int /*argc*/, char ** argv) {
|
|
38
|
+
LOG(
|
|
39
|
+
"Experimental CLI for multimodal\n\n"
|
|
40
|
+
"Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
|
|
41
|
+
" -m and --mmproj are required\n"
|
|
42
|
+
" -hf user/repo can replace both -m and --mmproj in most cases\n"
|
|
43
|
+
" --image and -p are optional, if NOT provided, the CLI will run in chat mode\n"
|
|
44
|
+
" to disable using GPU for mmproj model, add --no-mmproj-offload\n",
|
|
45
|
+
argv[0]
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
|
|
50
|
+
static void sigint_handler(int signo) {
|
|
51
|
+
if (signo == SIGINT) {
|
|
52
|
+
if (g_is_generating) {
|
|
53
|
+
g_is_generating = false;
|
|
54
|
+
} else {
|
|
55
|
+
console::cleanup();
|
|
56
|
+
if (g_is_interrupted) {
|
|
57
|
+
_exit(1);
|
|
58
|
+
}
|
|
59
|
+
g_is_interrupted = true;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
#endif
|
|
64
|
+
|
|
65
|
+
struct mtmd_cli_context {
|
|
66
|
+
mtmd::context_ptr ctx_vision;
|
|
67
|
+
common_init_result llama_init;
|
|
68
|
+
|
|
69
|
+
llama_model * model;
|
|
70
|
+
llama_context * lctx;
|
|
71
|
+
const llama_vocab * vocab;
|
|
72
|
+
llama_batch batch;
|
|
73
|
+
int n_batch;
|
|
74
|
+
|
|
75
|
+
mtmd::bitmaps bitmaps;
|
|
76
|
+
|
|
77
|
+
// note: we know that gemma3 template is "linear", meaning each turn is completely separated to another
|
|
78
|
+
// so here we don't need to keep track of chat history
|
|
79
|
+
common_chat_templates_ptr tmpls;
|
|
80
|
+
|
|
81
|
+
// support for legacy templates (models not having EOT token)
|
|
82
|
+
llama_tokens antiprompt_tokens;
|
|
83
|
+
|
|
84
|
+
int n_threads = 1;
|
|
85
|
+
llama_pos n_past = 0;
|
|
86
|
+
|
|
87
|
+
mtmd_cli_context(common_params & params) : llama_init(common_init_from_params(params)) {
|
|
88
|
+
model = llama_init.model.get();
|
|
89
|
+
lctx = llama_init.context.get();
|
|
90
|
+
vocab = llama_model_get_vocab(model);
|
|
91
|
+
n_threads = params.cpuparams.n_threads;
|
|
92
|
+
batch = llama_batch_init(params.n_batch, 0, 1);
|
|
93
|
+
n_batch = params.n_batch;
|
|
94
|
+
|
|
95
|
+
if (!model || !lctx) {
|
|
96
|
+
exit(1);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (!llama_model_chat_template(model, nullptr) && params.chat_template.empty()) {
|
|
100
|
+
LOG_ERR("Model does not have chat template.\n");
|
|
101
|
+
LOG_ERR(" For old llava models, you may need to use '--chat-template vicuna'\n");
|
|
102
|
+
LOG_ERR(" For MobileVLM models, use '--chat-template deepseek'\n");
|
|
103
|
+
LOG_ERR(" For Mistral Small 3.1, use '--chat-template mistral-v7'\n");
|
|
104
|
+
exit(1);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
tmpls = common_chat_templates_init(model, params.chat_template);
|
|
108
|
+
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(tmpls.get(), params.use_jinja).c_str());
|
|
109
|
+
|
|
110
|
+
init_vision_context(params);
|
|
111
|
+
|
|
112
|
+
// load antiprompt tokens for legacy templates
|
|
113
|
+
if (params.chat_template == "vicuna") {
|
|
114
|
+
antiprompt_tokens = common_tokenize(lctx, "ASSISTANT:", false, true);
|
|
115
|
+
} else if (params.chat_template == "deepseek") {
|
|
116
|
+
antiprompt_tokens = common_tokenize(lctx, "###", false, true);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
void init_vision_context(common_params & params) {
|
|
121
|
+
const char * clip_path = params.mmproj.path.c_str();
|
|
122
|
+
mtmd_context_params mparams = mtmd_context_params_default();
|
|
123
|
+
mparams.use_gpu = params.mmproj_use_gpu;
|
|
124
|
+
mparams.print_timings = true;
|
|
125
|
+
mparams.n_threads = params.cpuparams.n_threads;
|
|
126
|
+
mparams.verbosity = params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO;
|
|
127
|
+
ctx_vision.reset(mtmd_init_from_file(clip_path, model, mparams));
|
|
128
|
+
if (!ctx_vision.get()) {
|
|
129
|
+
LOG_ERR("Failed to load vision model from %s\n", clip_path);
|
|
130
|
+
exit(1);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
bool check_antiprompt(const llama_tokens & generated_tokens) {
|
|
135
|
+
if (antiprompt_tokens.empty() || generated_tokens.size() < antiprompt_tokens.size()) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
return std::equal(
|
|
139
|
+
generated_tokens.end() - antiprompt_tokens.size(),
|
|
140
|
+
generated_tokens.end(),
|
|
141
|
+
antiprompt_tokens.begin()
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
bool load_image(const std::string & fname) {
|
|
146
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(fname.c_str()));
|
|
147
|
+
if (!bmp.ptr) {
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
bitmaps.entries.push_back(std::move(bmp));
|
|
151
|
+
return true;
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int n_predict) {
|
|
156
|
+
llama_tokens generated_tokens;
|
|
157
|
+
for (int i = 0; i < n_predict; i++) {
|
|
158
|
+
if (i > n_predict || !g_is_generating || g_is_interrupted) {
|
|
159
|
+
LOG("\n");
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
llama_token token_id = common_sampler_sample(smpl, ctx.lctx, -1);
|
|
164
|
+
generated_tokens.push_back(token_id);
|
|
165
|
+
common_sampler_accept(smpl, token_id, true);
|
|
166
|
+
|
|
167
|
+
if (llama_vocab_is_eog(ctx.vocab, token_id) || ctx.check_antiprompt(generated_tokens)) {
|
|
168
|
+
LOG("\n");
|
|
169
|
+
break; // end of generation
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
LOG("%s", common_token_to_piece(ctx.lctx, token_id).c_str());
|
|
173
|
+
fflush(stdout);
|
|
174
|
+
|
|
175
|
+
if (g_is_interrupted) {
|
|
176
|
+
LOG("\n");
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// eval the token
|
|
181
|
+
common_batch_clear(ctx.batch);
|
|
182
|
+
common_batch_add(ctx.batch, token_id, ctx.n_past++, {0}, true);
|
|
183
|
+
if (llama_decode(ctx.lctx, ctx.batch)) {
|
|
184
|
+
LOG_ERR("failed to decode token\n");
|
|
185
|
+
return 1;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return 0;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, bool add_bos = false) {
|
|
192
|
+
common_chat_templates_inputs tmpl_inputs;
|
|
193
|
+
tmpl_inputs.messages = {msg};
|
|
194
|
+
tmpl_inputs.add_generation_prompt = true;
|
|
195
|
+
tmpl_inputs.use_jinja = false; // jinja is buggy here
|
|
196
|
+
auto formatted_chat = common_chat_templates_apply(ctx.tmpls.get(), tmpl_inputs);
|
|
197
|
+
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str());
|
|
198
|
+
|
|
199
|
+
mtmd_input_text text;
|
|
200
|
+
text.text = formatted_chat.prompt.c_str();
|
|
201
|
+
text.add_special = add_bos;
|
|
202
|
+
text.parse_special = true;
|
|
203
|
+
|
|
204
|
+
if (g_is_interrupted) return 0;
|
|
205
|
+
|
|
206
|
+
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
|
207
|
+
auto bitmaps_c_ptr = ctx.bitmaps.c_ptr();
|
|
208
|
+
int32_t res = mtmd_tokenize(ctx.ctx_vision.get(),
|
|
209
|
+
chunks.ptr.get(), // output
|
|
210
|
+
&text, // text
|
|
211
|
+
bitmaps_c_ptr.data(),
|
|
212
|
+
bitmaps_c_ptr.size());
|
|
213
|
+
if (res != 0) {
|
|
214
|
+
LOG_ERR("Unable to tokenize prompt, res = %d\n", res);
|
|
215
|
+
return 1;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
ctx.bitmaps.entries.clear();
|
|
219
|
+
|
|
220
|
+
llama_pos new_n_past;
|
|
221
|
+
if (mtmd_helper_eval_chunks(ctx.ctx_vision.get(),
|
|
222
|
+
ctx.lctx, // lctx
|
|
223
|
+
chunks.ptr.get(), // chunks
|
|
224
|
+
ctx.n_past, // n_past
|
|
225
|
+
0, // seq_id
|
|
226
|
+
ctx.n_batch, // n_batch
|
|
227
|
+
true, // logits_last
|
|
228
|
+
&new_n_past)) {
|
|
229
|
+
LOG_ERR("Unable to eval prompt\n");
|
|
230
|
+
return 1;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
ctx.n_past = new_n_past;
|
|
234
|
+
|
|
235
|
+
LOG("\n");
|
|
236
|
+
|
|
237
|
+
return 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
int main(int argc, char ** argv) {
|
|
241
|
+
ggml_time_init();
|
|
242
|
+
|
|
243
|
+
common_params params;
|
|
244
|
+
params.sampling.temp = 0.2; // lower temp by default for better quality
|
|
245
|
+
|
|
246
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
|
|
247
|
+
return 1;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
common_init();
|
|
251
|
+
|
|
252
|
+
if (params.mmproj.path.empty()) {
|
|
253
|
+
show_additional_info(argc, argv);
|
|
254
|
+
LOG_ERR("ERR: Missing --mmproj argument\n");
|
|
255
|
+
return 1;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
mtmd_cli_context ctx(params);
|
|
259
|
+
LOG("%s: loading model: %s\n", __func__, params.model.path.c_str());
|
|
260
|
+
|
|
261
|
+
bool is_single_turn = !params.prompt.empty() && !params.image.empty();
|
|
262
|
+
|
|
263
|
+
struct common_sampler * smpl = common_sampler_init(ctx.model, params.sampling);
|
|
264
|
+
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
|
|
265
|
+
|
|
266
|
+
// Ctrl+C handling
|
|
267
|
+
{
|
|
268
|
+
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
|
269
|
+
struct sigaction sigint_action;
|
|
270
|
+
sigint_action.sa_handler = sigint_handler;
|
|
271
|
+
sigemptyset (&sigint_action.sa_mask);
|
|
272
|
+
sigint_action.sa_flags = 0;
|
|
273
|
+
sigaction(SIGINT, &sigint_action, NULL);
|
|
274
|
+
#elif defined (_WIN32)
|
|
275
|
+
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
|
276
|
+
return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
|
|
277
|
+
};
|
|
278
|
+
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
|
279
|
+
#endif
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (g_is_interrupted) return 130;
|
|
283
|
+
|
|
284
|
+
if (is_single_turn) {
|
|
285
|
+
g_is_generating = true;
|
|
286
|
+
if (params.prompt.find("<__image__>") == std::string::npos) {
|
|
287
|
+
params.prompt += " <__image__>";
|
|
288
|
+
}
|
|
289
|
+
common_chat_msg msg;
|
|
290
|
+
msg.role = "user";
|
|
291
|
+
msg.content = params.prompt;
|
|
292
|
+
for (const auto & image : params.image) {
|
|
293
|
+
if (!ctx.load_image(image)) {
|
|
294
|
+
return 1; // error is already printed by libmtmd
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
if (eval_message(ctx, msg, true)) {
|
|
298
|
+
return 1;
|
|
299
|
+
}
|
|
300
|
+
if (!g_is_interrupted && generate_response(ctx, smpl, n_predict)) {
|
|
301
|
+
return 1;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
} else {
|
|
305
|
+
LOG("\n Running in chat mode, available commands:");
|
|
306
|
+
LOG("\n /image <path> load an image");
|
|
307
|
+
LOG("\n /clear clear the chat history");
|
|
308
|
+
LOG("\n /quit or /exit exit the program");
|
|
309
|
+
LOG("\n");
|
|
310
|
+
|
|
311
|
+
bool is_first_msg = true;
|
|
312
|
+
std::string content;
|
|
313
|
+
|
|
314
|
+
while (!g_is_interrupted) {
|
|
315
|
+
g_is_generating = false;
|
|
316
|
+
LOG("\n> ");
|
|
317
|
+
console::set_display(console::user_input);
|
|
318
|
+
std::string line;
|
|
319
|
+
console::readline(line, false);
|
|
320
|
+
if (g_is_interrupted) break;
|
|
321
|
+
console::set_display(console::reset);
|
|
322
|
+
line = string_strip(line);
|
|
323
|
+
if (line.empty()) {
|
|
324
|
+
continue;
|
|
325
|
+
}
|
|
326
|
+
if (line == "/quit" || line == "/exit") {
|
|
327
|
+
break;
|
|
328
|
+
}
|
|
329
|
+
if (line == "/clear") {
|
|
330
|
+
ctx.n_past = 0;
|
|
331
|
+
llama_kv_self_seq_rm(ctx.lctx, 0, 1, -1); // keep BOS
|
|
332
|
+
LOG("Chat history cleared\n\n");
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
g_is_generating = true;
|
|
336
|
+
if (line == "/image" || line.find("/image ") == 0) {
|
|
337
|
+
if (line.size() < 8) {
|
|
338
|
+
LOG_ERR("ERR: Missing image filename\n");
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
std::string image = line.substr(7);
|
|
342
|
+
if (ctx.load_image(image)) {
|
|
343
|
+
LOG("Image %s loaded\n", image.c_str());
|
|
344
|
+
content += "<__image__>";
|
|
345
|
+
}
|
|
346
|
+
// else, error is already printed by libmtmd
|
|
347
|
+
continue;
|
|
348
|
+
} else {
|
|
349
|
+
content += line;
|
|
350
|
+
}
|
|
351
|
+
common_chat_msg msg;
|
|
352
|
+
msg.role = "user";
|
|
353
|
+
msg.content = content;
|
|
354
|
+
int ret = eval_message(ctx, msg, is_first_msg);
|
|
355
|
+
if (ret) {
|
|
356
|
+
return 1;
|
|
357
|
+
}
|
|
358
|
+
if (g_is_interrupted) break;
|
|
359
|
+
if (generate_response(ctx, smpl, n_predict)) {
|
|
360
|
+
return 1;
|
|
361
|
+
}
|
|
362
|
+
content.clear();
|
|
363
|
+
is_first_msg = false;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
if (g_is_interrupted) LOG("\nInterrupted by user\n");
|
|
367
|
+
LOG("\n\n");
|
|
368
|
+
llama_perf_context_print(ctx.lctx);
|
|
369
|
+
return g_is_interrupted ? 130 : 0;
|
|
370
|
+
}
|