@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -1,354 +0,0 @@
|
|
|
1
|
-
#include "arg.h"
|
|
2
|
-
#include "log.h"
|
|
3
|
-
#include "common.h"
|
|
4
|
-
#include "sampling.h"
|
|
5
|
-
#include "clip.h"
|
|
6
|
-
#include "llava.h"
|
|
7
|
-
#include "llama.h"
|
|
8
|
-
#include "ggml.h"
|
|
9
|
-
|
|
10
|
-
#include <algorithm>
|
|
11
|
-
#include <cstdio>
|
|
12
|
-
#include <cstdlib>
|
|
13
|
-
#include <cstring>
|
|
14
|
-
#include <vector>
|
|
15
|
-
#include <iostream> // TODO: remove me
|
|
16
|
-
|
|
17
|
-
struct llava_context {
|
|
18
|
-
struct clip_ctx * ctx_clip = NULL;
|
|
19
|
-
struct llama_context * ctx_llama = NULL;
|
|
20
|
-
struct llama_model * model = NULL;
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
static void show_additional_info(int /*argc*/, char ** argv) {
|
|
24
|
-
LOG("\nexample usage:\n\n%s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
|
|
25
|
-
LOG("\nnote: a lower temperature value like 0.1 is recommended for better quality.\n");
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
static struct llama_model * llava_init(common_params * params) {
|
|
29
|
-
llama_backend_init();
|
|
30
|
-
llama_numa_init(params->numa);
|
|
31
|
-
|
|
32
|
-
llama_model_params model_params = common_model_params_to_llama(*params);
|
|
33
|
-
|
|
34
|
-
llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params);
|
|
35
|
-
if (model == NULL) {
|
|
36
|
-
LOG_ERR("%s: unable to load model\n" , __func__);
|
|
37
|
-
return NULL;
|
|
38
|
-
}
|
|
39
|
-
return model;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
|
43
|
-
auto prompt = params->prompt;
|
|
44
|
-
if (prompt.empty()) {
|
|
45
|
-
prompt = "describe the image in detail.";
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
llama_context_params ctx_params = common_context_params_to_llama(*params);
|
|
49
|
-
if (params->n_ctx < 2048) {
|
|
50
|
-
// warn user here, "Image processing requires at least 2048 context, setting context to 2048"
|
|
51
|
-
LOG_WRN("%s: Image processing requires at least 2048 context, setting context to 2048\n" , __func__);
|
|
52
|
-
ctx_params.n_ctx = 2048;
|
|
53
|
-
} else {
|
|
54
|
-
ctx_params.n_ctx = params->n_ctx;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
llama_context * ctx_llama = llama_init_from_model(model, ctx_params);
|
|
58
|
-
|
|
59
|
-
if (ctx_llama == NULL) {
|
|
60
|
-
LOG_ERR("%s: failed to create the llama_context\n" , __func__);
|
|
61
|
-
return NULL;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
auto * ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
|
|
65
|
-
|
|
66
|
-
ctx_llava->ctx_llama = ctx_llama;
|
|
67
|
-
ctx_llava->model = model;
|
|
68
|
-
return ctx_llava;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
static void llava_free(struct llava_context * ctx_llava) {
|
|
72
|
-
if (ctx_llava->ctx_clip) {
|
|
73
|
-
clip_free(ctx_llava->ctx_clip);
|
|
74
|
-
ctx_llava->ctx_clip = NULL;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
llama_free(ctx_llava->ctx_llama);
|
|
78
|
-
llama_model_free(ctx_llava->model);
|
|
79
|
-
llama_backend_free();
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
static struct clip_ctx * clip_init_context(common_params * params) {
|
|
83
|
-
const char * clip_path = params->mmproj.c_str();
|
|
84
|
-
|
|
85
|
-
auto prompt = params->prompt;
|
|
86
|
-
if (prompt.empty()) {
|
|
87
|
-
prompt = "describe the image in detail.";
|
|
88
|
-
}
|
|
89
|
-
struct clip_context_params clip_params = {
|
|
90
|
-
/* use_gpu */ params->n_gpu_layers != 0,
|
|
91
|
-
/* verbosity */ params->verbosity,
|
|
92
|
-
};
|
|
93
|
-
auto * ctx_clip = clip_init(clip_path, clip_params);
|
|
94
|
-
return ctx_clip;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
|
|
98
|
-
int N = (int) tokens.size();
|
|
99
|
-
for (int i = 0; i < N; i += n_batch) {
|
|
100
|
-
int n_eval = (int) tokens.size() - i;
|
|
101
|
-
if (n_eval > n_batch) {
|
|
102
|
-
n_eval = n_batch;
|
|
103
|
-
}
|
|
104
|
-
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
|
|
105
|
-
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
|
|
106
|
-
return false;
|
|
107
|
-
}
|
|
108
|
-
*n_past += n_eval;
|
|
109
|
-
}
|
|
110
|
-
return true;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|
114
|
-
std::vector<llama_token> tokens;
|
|
115
|
-
tokens.push_back(id);
|
|
116
|
-
return eval_tokens(ctx_llama, tokens, 1, n_past);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
|
120
|
-
std::string str2 = str;
|
|
121
|
-
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
|
122
|
-
return eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
static void process_eval_image_embed(struct llava_context * ctx_llava, const struct llava_image_embed * embeds, int n_batch, int * n_past, int idx) {
|
|
126
|
-
float * image_embed = (float *)malloc(clip_embd_nbytes(ctx_llava->ctx_clip));
|
|
127
|
-
std::memcpy(image_embed, embeds->embed + idx * clip_n_patches(ctx_llava->ctx_clip) * clip_n_mmproj_embd(ctx_llava->ctx_clip), clip_embd_nbytes(ctx_llava->ctx_clip));
|
|
128
|
-
|
|
129
|
-
auto * slice_embed = (llava_image_embed*)malloc(sizeof(llava_image_embed));
|
|
130
|
-
slice_embed->embed = image_embed;
|
|
131
|
-
slice_embed->n_image_pos = clip_n_patches(ctx_llava->ctx_clip);
|
|
132
|
-
llava_eval_image_embed(ctx_llava->ctx_llama, slice_embed, n_batch, n_past);
|
|
133
|
-
llava_image_embed_free(slice_embed);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
static void process_image(struct llava_context * ctx_llava, struct llava_image_embed * embeds, common_params * params, int &n_past) {
|
|
137
|
-
std::string system_prompt;
|
|
138
|
-
int idx = 0;
|
|
139
|
-
int num_image_embeds = embeds->n_image_pos / clip_n_patches(ctx_llava->ctx_clip);
|
|
140
|
-
int has_minicpmv_projector = clip_is_minicpmv(ctx_llava->ctx_clip);
|
|
141
|
-
if (has_minicpmv_projector == 2) {
|
|
142
|
-
system_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n";
|
|
143
|
-
}
|
|
144
|
-
else if (has_minicpmv_projector == 3) {
|
|
145
|
-
system_prompt = "<|im_start|>user\n";
|
|
146
|
-
}
|
|
147
|
-
else if (has_minicpmv_projector == 4) {
|
|
148
|
-
system_prompt = "<|im_start|>user\n";
|
|
149
|
-
}
|
|
150
|
-
LOG_INF("%s: image token past: %d\n", __func__, n_past);
|
|
151
|
-
eval_string(ctx_llava->ctx_llama, (system_prompt+"<image>").c_str(), params->n_batch, &n_past, false);
|
|
152
|
-
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
|
153
|
-
eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
|
|
154
|
-
if (num_image_embeds > 1) {
|
|
155
|
-
if (has_minicpmv_projector == 2) {
|
|
156
|
-
size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
|
|
157
|
-
eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
|
|
158
|
-
for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
|
|
159
|
-
for (size_t j = 0; j < num_image_embeds_col; ++j) {
|
|
160
|
-
eval_string(ctx_llava->ctx_llama, std::string("<image>").c_str(), params->n_batch, &n_past, false);
|
|
161
|
-
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
|
162
|
-
eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
|
|
163
|
-
if (j == num_image_embeds_col - 1) {
|
|
164
|
-
eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
|
|
169
|
-
}
|
|
170
|
-
else if (has_minicpmv_projector == 3 || has_minicpmv_projector == 4) {
|
|
171
|
-
size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
|
|
172
|
-
for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
|
|
173
|
-
for (size_t j = 0; j < num_image_embeds_col; ++j) {
|
|
174
|
-
eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
|
|
175
|
-
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
|
176
|
-
eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
|
|
177
|
-
if (j == num_image_embeds_col - 1) {
|
|
178
|
-
eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
LOG_INF("%s: image token past: %d\n", __func__, n_past);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
static const char * sample(struct common_sampler * smpl,
|
|
188
|
-
struct llama_context * ctx_llama,
|
|
189
|
-
int * n_past) {
|
|
190
|
-
const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
|
|
191
|
-
common_sampler_accept(smpl, id, true);
|
|
192
|
-
|
|
193
|
-
const llama_model * model = llama_get_model(ctx_llama);
|
|
194
|
-
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
195
|
-
|
|
196
|
-
static std::string ret;
|
|
197
|
-
if (llama_vocab_is_eog(vocab, id)) {
|
|
198
|
-
ret = "</s>";
|
|
199
|
-
} else {
|
|
200
|
-
ret = common_token_to_piece(ctx_llama, id);
|
|
201
|
-
}
|
|
202
|
-
eval_id(ctx_llama, id, n_past);
|
|
203
|
-
return ret.c_str();
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
static struct llava_context * minicpmv_init(common_params * params, const std::string & fname, int &n_past){
|
|
207
|
-
auto * ctx_clip = clip_init_context(params);
|
|
208
|
-
auto * embeds = llava_image_embed_make_with_filename(ctx_clip, params->cpuparams.n_threads, fname.c_str());
|
|
209
|
-
if (!embeds) {
|
|
210
|
-
LOG_ERR("failed to load image %s. Terminating\n\n", fname.c_str());
|
|
211
|
-
return NULL;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// process the prompt
|
|
215
|
-
if (params->prompt.empty() && params->interactive == false) {
|
|
216
|
-
LOG_ERR("prompt should be given or interactive mode should be on");
|
|
217
|
-
return NULL;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
auto * model = llava_init(params);
|
|
221
|
-
if (model == NULL) {
|
|
222
|
-
fprintf(stderr, "%s: error: failed to init minicpmv model\n", __func__);
|
|
223
|
-
return NULL;
|
|
224
|
-
}
|
|
225
|
-
const int64_t t_llava_init_start_us = ggml_time_us();
|
|
226
|
-
auto * ctx_llava = llava_init_context(params, model);
|
|
227
|
-
ctx_llava->ctx_clip = ctx_clip;
|
|
228
|
-
const int64_t t_llava_init_end_us = ggml_time_us();
|
|
229
|
-
float t_llava_init_ms = (t_llava_init_end_us - t_llava_init_start_us) / 1000.0;
|
|
230
|
-
LOG_INF("%s: llava init in %8.2f ms.\n", __func__, t_llava_init_ms);
|
|
231
|
-
|
|
232
|
-
const int64_t t_process_image_start_us = ggml_time_us();
|
|
233
|
-
process_image(ctx_llava, embeds, params, n_past);
|
|
234
|
-
const int64_t t_process_image_end_us = ggml_time_us();
|
|
235
|
-
float t_process_image_ms = (t_process_image_end_us - t_process_image_start_us) / 1000.0;
|
|
236
|
-
LOG_INF("%s: llama process image in %8.2f ms.\n", __func__, t_process_image_ms);
|
|
237
|
-
|
|
238
|
-
llava_image_embed_free(embeds);
|
|
239
|
-
return ctx_llava;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
static struct common_sampler * llama_init(struct llava_context * ctx_llava, common_params * params, const std::string & prompt, int & n_past, bool is_first = false){
|
|
243
|
-
std::string user_prompt = prompt;
|
|
244
|
-
int has_minicpmv_projector = clip_is_minicpmv(ctx_llava->ctx_clip);
|
|
245
|
-
if (!is_first) {
|
|
246
|
-
if (has_minicpmv_projector == 2) {
|
|
247
|
-
user_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" + prompt;
|
|
248
|
-
}
|
|
249
|
-
else if (has_minicpmv_projector == 3) {
|
|
250
|
-
user_prompt = "<|im_start|>user\n" + prompt;
|
|
251
|
-
}
|
|
252
|
-
else if (has_minicpmv_projector == 4) {
|
|
253
|
-
user_prompt = "<|im_start|>user\n" + prompt;
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
|
|
258
|
-
if (has_minicpmv_projector == 2) {
|
|
259
|
-
eval_string(ctx_llava->ctx_llama, "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", params->n_batch, &n_past, false);
|
|
260
|
-
}
|
|
261
|
-
else if (has_minicpmv_projector == 3) {
|
|
262
|
-
eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false);
|
|
263
|
-
}
|
|
264
|
-
else if (has_minicpmv_projector == 4) {
|
|
265
|
-
eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false);
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// generate the response
|
|
269
|
-
|
|
270
|
-
LOG_INF("\n");
|
|
271
|
-
|
|
272
|
-
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
|
|
273
|
-
return smpl;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
static const char * llama_loop(struct llava_context * ctx_llava,struct common_sampler * smpl, int &n_past){
|
|
277
|
-
|
|
278
|
-
const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
|
|
279
|
-
return tmp;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
int main(int argc, char ** argv) {
|
|
283
|
-
ggml_time_init();
|
|
284
|
-
|
|
285
|
-
common_params params;
|
|
286
|
-
|
|
287
|
-
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
|
|
288
|
-
return 1;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
common_init();
|
|
292
|
-
|
|
293
|
-
if (params.mmproj.empty() || (params.image.empty())) {
|
|
294
|
-
show_additional_info(argc, argv);
|
|
295
|
-
return 1;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
for (auto & image : params.image) {
|
|
299
|
-
int n_past = 0;
|
|
300
|
-
auto * ctx_llava = minicpmv_init(¶ms, image, n_past);
|
|
301
|
-
|
|
302
|
-
if (!params.prompt.empty()) {
|
|
303
|
-
LOG("<user>%s\n", params.prompt.c_str());
|
|
304
|
-
LOG("<assistant>");
|
|
305
|
-
auto * smpl = llama_init(ctx_llava, ¶ms, params.prompt, n_past, true);
|
|
306
|
-
const int max_tgt_len = params.n_predict < 0 ? 256 : params.n_predict;
|
|
307
|
-
std::string response;
|
|
308
|
-
bool have_tmp = false;
|
|
309
|
-
for (int i = 0; i < max_tgt_len; i++) {
|
|
310
|
-
const auto * tmp = llama_loop(ctx_llava, smpl, n_past);
|
|
311
|
-
response += tmp;
|
|
312
|
-
if (strcmp(tmp, "</s>") == 0){
|
|
313
|
-
if (!have_tmp) {
|
|
314
|
-
continue;
|
|
315
|
-
}
|
|
316
|
-
break;
|
|
317
|
-
}
|
|
318
|
-
if (strstr(tmp, "###")) break; // Yi-VL behavior
|
|
319
|
-
have_tmp = true;
|
|
320
|
-
printf("%s", tmp);
|
|
321
|
-
if (strstr(response.c_str(), "<user>")) break; // minicpm-v
|
|
322
|
-
|
|
323
|
-
fflush(stdout);
|
|
324
|
-
}
|
|
325
|
-
common_sampler_free(smpl);
|
|
326
|
-
}else {
|
|
327
|
-
while (true) {
|
|
328
|
-
LOG("<user>");
|
|
329
|
-
std::string prompt;
|
|
330
|
-
std::getline(std::cin, prompt);
|
|
331
|
-
LOG("<assistant>");
|
|
332
|
-
auto * smpl = llama_init(ctx_llava, ¶ms, prompt, n_past, true);
|
|
333
|
-
const int max_tgt_len = params.n_predict < 0 ? 256 : params.n_predict;
|
|
334
|
-
std::string response;
|
|
335
|
-
for (int i = 0; i < max_tgt_len; i++) {
|
|
336
|
-
const auto * tmp = llama_loop(ctx_llava, smpl, n_past);
|
|
337
|
-
response += tmp;
|
|
338
|
-
if (strcmp(tmp, "</s>") == 0) break;
|
|
339
|
-
printf("%s", tmp);// mistral llava-1.6
|
|
340
|
-
if (strstr(response.c_str(), "<user>")) break; // minicpm-v
|
|
341
|
-
fflush(stdout);
|
|
342
|
-
}
|
|
343
|
-
common_sampler_free(smpl);
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
printf("\n");
|
|
347
|
-
llama_perf_context_print(ctx_llava->ctx_llama);
|
|
348
|
-
|
|
349
|
-
ctx_llava->model = NULL;
|
|
350
|
-
llava_free(ctx_llava);
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
return 0;
|
|
354
|
-
}
|