@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -1,332 +0,0 @@
|
|
|
1
|
-
#include "arg.h"
|
|
2
|
-
#include "base64.hpp"
|
|
3
|
-
#include "log.h"
|
|
4
|
-
#include "common.h"
|
|
5
|
-
#include "sampling.h"
|
|
6
|
-
#include "clip.h"
|
|
7
|
-
#include "llava.h"
|
|
8
|
-
#include "llama.h"
|
|
9
|
-
#include "ggml.h"
|
|
10
|
-
|
|
11
|
-
#include <cstdio>
|
|
12
|
-
#include <cstdlib>
|
|
13
|
-
#include <cstring>
|
|
14
|
-
#include <vector>
|
|
15
|
-
|
|
16
|
-
static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
|
|
17
|
-
int N = (int) tokens.size();
|
|
18
|
-
for (int i = 0; i < N; i += n_batch) {
|
|
19
|
-
int n_eval = (int) tokens.size() - i;
|
|
20
|
-
if (n_eval > n_batch) {
|
|
21
|
-
n_eval = n_batch;
|
|
22
|
-
}
|
|
23
|
-
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
|
|
24
|
-
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
|
|
25
|
-
return false;
|
|
26
|
-
}
|
|
27
|
-
*n_past += n_eval;
|
|
28
|
-
}
|
|
29
|
-
return true;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|
33
|
-
std::vector<llama_token> tokens;
|
|
34
|
-
tokens.push_back(id);
|
|
35
|
-
return eval_tokens(ctx_llama, tokens, 1, n_past);
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
|
39
|
-
std::string str2 = str;
|
|
40
|
-
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
|
41
|
-
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
|
42
|
-
return true;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
static const char * sample(struct common_sampler * smpl,
|
|
46
|
-
struct llama_context * ctx_llama,
|
|
47
|
-
int * n_past) {
|
|
48
|
-
const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
|
|
49
|
-
common_sampler_accept(smpl, id, true);
|
|
50
|
-
|
|
51
|
-
const llama_model * model = llama_get_model(ctx_llama);
|
|
52
|
-
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
53
|
-
|
|
54
|
-
static std::string ret;
|
|
55
|
-
if (llama_vocab_is_eog(vocab, id)) {
|
|
56
|
-
ret = "</s>";
|
|
57
|
-
} else {
|
|
58
|
-
ret = common_token_to_piece(ctx_llama, id);
|
|
59
|
-
}
|
|
60
|
-
eval_id(ctx_llama, id, n_past);
|
|
61
|
-
return ret.c_str();
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
static const char* IMG_BASE64_TAG_BEGIN = "<img src=\"data:image/jpeg;base64,";
|
|
65
|
-
static const char* IMG_BASE64_TAG_END = "\">";
|
|
66
|
-
|
|
67
|
-
static void find_image_tag_in_prompt(const std::string& prompt, size_t& begin_out, size_t& end_out) {
|
|
68
|
-
begin_out = prompt.find(IMG_BASE64_TAG_BEGIN);
|
|
69
|
-
end_out = prompt.find(IMG_BASE64_TAG_END, (begin_out == std::string::npos) ? 0UL : begin_out);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
static bool prompt_contains_image(const std::string& prompt) {
|
|
73
|
-
size_t begin, end;
|
|
74
|
-
find_image_tag_in_prompt(prompt, begin, end);
|
|
75
|
-
return (begin != std::string::npos);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// replaces the base64 image tag in the prompt with `replacement`
|
|
79
|
-
static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip_ctx * ctx_clip, int n_threads, const std::string& prompt) {
|
|
80
|
-
size_t img_base64_str_start, img_base64_str_end;
|
|
81
|
-
find_image_tag_in_prompt(prompt, img_base64_str_start, img_base64_str_end);
|
|
82
|
-
if (img_base64_str_start == std::string::npos || img_base64_str_end == std::string::npos) {
|
|
83
|
-
LOG_ERR("%s: invalid base64 image tag. must be %s<base64 byte string>%s\n", __func__, IMG_BASE64_TAG_BEGIN, IMG_BASE64_TAG_END);
|
|
84
|
-
return NULL;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
auto base64_bytes_start = img_base64_str_start + strlen(IMG_BASE64_TAG_BEGIN);
|
|
88
|
-
auto base64_bytes_count = img_base64_str_end - base64_bytes_start;
|
|
89
|
-
auto base64_str = prompt.substr(base64_bytes_start, base64_bytes_count );
|
|
90
|
-
|
|
91
|
-
auto required_bytes = base64::required_encode_size(base64_str.size());
|
|
92
|
-
auto img_bytes = std::vector<unsigned char>(required_bytes);
|
|
93
|
-
base64::decode(base64_str.begin(), base64_str.end(), img_bytes.begin());
|
|
94
|
-
|
|
95
|
-
auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, img_bytes.data(), img_bytes.size());
|
|
96
|
-
if (!embed) {
|
|
97
|
-
LOG_ERR("%s: could not load image from base64 string.\n", __func__);
|
|
98
|
-
return NULL;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
return embed;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
static std::string remove_image_from_prompt(const std::string& prompt, const char * replacement = "") {
|
|
105
|
-
size_t begin, end;
|
|
106
|
-
find_image_tag_in_prompt(prompt, begin, end);
|
|
107
|
-
if (begin == std::string::npos || end == std::string::npos) {
|
|
108
|
-
return prompt;
|
|
109
|
-
}
|
|
110
|
-
auto pre = prompt.substr(0, begin);
|
|
111
|
-
auto post = prompt.substr(end + strlen(IMG_BASE64_TAG_END));
|
|
112
|
-
return pre + replacement + post;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
struct llava_context {
|
|
116
|
-
struct clip_ctx * ctx_clip = NULL;
|
|
117
|
-
struct llama_context * ctx_llama = NULL;
|
|
118
|
-
struct llama_model * model = NULL;
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
static void print_usage(int, char ** argv) {
|
|
122
|
-
LOG("\n example usage:\n");
|
|
123
|
-
LOG("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
|
|
124
|
-
LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
static struct llava_image_embed * load_image(llava_context * ctx_llava, common_params * params, const std::string & fname) {
|
|
128
|
-
|
|
129
|
-
// load and preprocess the image
|
|
130
|
-
llava_image_embed * embed = NULL;
|
|
131
|
-
auto prompt = params->prompt;
|
|
132
|
-
if (prompt_contains_image(prompt)) {
|
|
133
|
-
if (!params->image.empty()) {
|
|
134
|
-
LOG_INF("using base64 encoded image instead of command line image path\n");
|
|
135
|
-
}
|
|
136
|
-
embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->cpuparams.n_threads, prompt);
|
|
137
|
-
if (!embed) {
|
|
138
|
-
LOG_ERR("%s: can't load image from prompt\n", __func__);
|
|
139
|
-
return NULL;
|
|
140
|
-
}
|
|
141
|
-
params->prompt = remove_image_from_prompt(prompt);
|
|
142
|
-
} else {
|
|
143
|
-
embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->cpuparams.n_threads, fname.c_str());
|
|
144
|
-
if (!embed) {
|
|
145
|
-
fprintf(stderr, "%s: is %s really an image file?\n", __func__, fname.c_str());
|
|
146
|
-
return NULL;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
return embed;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, common_params * params, const std::string & prompt) {
|
|
154
|
-
int n_past = 0;
|
|
155
|
-
|
|
156
|
-
const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
|
|
157
|
-
|
|
158
|
-
std::string system_prompt, user_prompt;
|
|
159
|
-
size_t image_pos = prompt.find("<image>");
|
|
160
|
-
if (image_pos != std::string::npos) {
|
|
161
|
-
// new templating mode: Provide the full prompt including system message and use <image> as a placeholder for the image
|
|
162
|
-
system_prompt = prompt.substr(0, image_pos);
|
|
163
|
-
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
|
|
164
|
-
LOG_INF("system_prompt: %s\n", system_prompt.c_str());
|
|
165
|
-
if (params->verbose_prompt) {
|
|
166
|
-
auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
|
167
|
-
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
168
|
-
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
LOG_INF("user_prompt: %s\n", user_prompt.c_str());
|
|
172
|
-
if (params->verbose_prompt) {
|
|
173
|
-
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
|
174
|
-
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
175
|
-
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
} else {
|
|
179
|
-
// llava-1.5 native mode
|
|
180
|
-
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
|
|
181
|
-
user_prompt = prompt + "\nASSISTANT:";
|
|
182
|
-
if (params->verbose_prompt) {
|
|
183
|
-
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
|
184
|
-
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
185
|
-
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, true);
|
|
191
|
-
llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past);
|
|
192
|
-
eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
|
|
193
|
-
|
|
194
|
-
// generate the response
|
|
195
|
-
|
|
196
|
-
LOG("\n");
|
|
197
|
-
|
|
198
|
-
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
|
|
199
|
-
if (!smpl) {
|
|
200
|
-
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
|
|
201
|
-
exit(1);
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
std::string response = "";
|
|
205
|
-
for (int i = 0; i < max_tgt_len; i++) {
|
|
206
|
-
const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
|
|
207
|
-
response += tmp;
|
|
208
|
-
if (strcmp(tmp, "</s>") == 0) break;
|
|
209
|
-
if (strstr(tmp, "###")) break; // Yi-VL behavior
|
|
210
|
-
LOG("%s", tmp);
|
|
211
|
-
if (strstr(response.c_str(), "<|im_end|>")) break; // Yi-34B llava-1.6 - for some reason those decode not as the correct token (tokenizer works)
|
|
212
|
-
if (strstr(response.c_str(), "<|im_start|>")) break; // Yi-34B llava-1.6
|
|
213
|
-
if (strstr(response.c_str(), "USER:")) break; // mistral llava-1.6
|
|
214
|
-
|
|
215
|
-
fflush(stdout);
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
common_sampler_free(smpl);
|
|
219
|
-
LOG("\n");
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
static struct llama_model * llava_init(common_params * params) {
|
|
223
|
-
llama_backend_init();
|
|
224
|
-
llama_numa_init(params->numa);
|
|
225
|
-
|
|
226
|
-
llama_model_params model_params = common_model_params_to_llama(*params);
|
|
227
|
-
|
|
228
|
-
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
|
|
229
|
-
if (model == NULL) {
|
|
230
|
-
LOG_ERR("%s: unable to load model\n" , __func__);
|
|
231
|
-
return NULL;
|
|
232
|
-
}
|
|
233
|
-
return model;
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
|
237
|
-
const char * clip_path = params->mmproj.c_str();
|
|
238
|
-
|
|
239
|
-
auto prompt = params->prompt;
|
|
240
|
-
if (prompt.empty()) {
|
|
241
|
-
prompt = "describe the image in detail.";
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
|
|
245
|
-
|
|
246
|
-
llama_context_params ctx_params = common_context_params_to_llama(*params);
|
|
247
|
-
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
|
|
248
|
-
|
|
249
|
-
llama_context * ctx_llama = llama_init_from_model(model, ctx_params);
|
|
250
|
-
|
|
251
|
-
if (ctx_llama == NULL) {
|
|
252
|
-
LOG_ERR("%s: failed to create the llama_context\n" , __func__);
|
|
253
|
-
return NULL;
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
auto * ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
|
|
257
|
-
|
|
258
|
-
ctx_llava->ctx_llama = ctx_llama;
|
|
259
|
-
ctx_llava->ctx_clip = ctx_clip;
|
|
260
|
-
ctx_llava->model = model;
|
|
261
|
-
return ctx_llava;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
static void llava_free(struct llava_context * ctx_llava) {
|
|
265
|
-
if (ctx_llava->ctx_clip) {
|
|
266
|
-
clip_free(ctx_llava->ctx_clip);
|
|
267
|
-
ctx_llava->ctx_clip = NULL;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
llama_free(ctx_llava->ctx_llama);
|
|
271
|
-
llama_model_free(ctx_llava->model);
|
|
272
|
-
llama_backend_free();
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
int main(int argc, char ** argv) {
|
|
276
|
-
ggml_time_init();
|
|
277
|
-
|
|
278
|
-
common_params params;
|
|
279
|
-
|
|
280
|
-
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
|
|
281
|
-
return 1;
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
common_init();
|
|
285
|
-
|
|
286
|
-
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
|
287
|
-
print_usage(argc, argv);
|
|
288
|
-
return 1;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
auto * model = llava_init(¶ms);
|
|
292
|
-
if (model == NULL) {
|
|
293
|
-
fprintf(stderr, "%s: error: failed to init llava model\n", __func__);
|
|
294
|
-
return 1;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
if (prompt_contains_image(params.prompt)) {
|
|
298
|
-
auto * ctx_llava = llava_init_context(¶ms, model);
|
|
299
|
-
|
|
300
|
-
auto * image_embed = load_image(ctx_llava, ¶ms, "");
|
|
301
|
-
|
|
302
|
-
// process the prompt
|
|
303
|
-
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
|
304
|
-
|
|
305
|
-
llama_perf_context_print(ctx_llava->ctx_llama);
|
|
306
|
-
llava_image_embed_free(image_embed);
|
|
307
|
-
ctx_llava->model = NULL;
|
|
308
|
-
llava_free(ctx_llava);
|
|
309
|
-
} else {
|
|
310
|
-
for (auto & image : params.image) {
|
|
311
|
-
auto * ctx_llava = llava_init_context(¶ms, model);
|
|
312
|
-
|
|
313
|
-
auto * image_embed = load_image(ctx_llava, ¶ms, image);
|
|
314
|
-
if (!image_embed) {
|
|
315
|
-
LOG_ERR("%s: failed to load image %s. Terminating\n\n", __func__, image.c_str());
|
|
316
|
-
return 1;
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
// process the prompt
|
|
320
|
-
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
|
321
|
-
|
|
322
|
-
llama_perf_context_print(ctx_llava->ctx_llama);
|
|
323
|
-
llava_image_embed_free(image_embed);
|
|
324
|
-
ctx_llava->model = NULL;
|
|
325
|
-
llava_free(ctx_llava);
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
llama_model_free(model);
|
|
330
|
-
|
|
331
|
-
return 0;
|
|
332
|
-
}
|