@fugood/llama.node 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +17 -7
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +89 -27
- package/src/LlamaContext.h +2 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +240 -168
- package/src/llama.cpp/.github/workflows/docker.yml +8 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +14 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -4
- package/src/llama.cpp/common/arg.cpp +986 -770
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +212 -351
- package/src/llama.cpp/common/common.h +204 -117
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +163 -121
- package/src/llama.cpp/common/sampling.h +41 -20
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +134 -161
- package/src/llama.cpp/examples/CMakeLists.txt +33 -14
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +19 -18
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +41 -87
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +263 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +83 -22
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
- package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +73 -114
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
- package/src/llama.cpp/examples/server/server.cpp +2073 -1339
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +354 -277
- package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/simple/simple.cpp +130 -94
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
- package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +159 -417
- package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +93 -52
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +4 -8
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +779 -194
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +55 -10
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +4317 -2979
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -38
- package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
- package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +62 -20
- package/src/llama.cpp/tests/test-sampling.cpp +163 -138
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -20,7 +20,7 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
|
|
|
20
20
|
if (n_eval > n_batch) {
|
|
21
21
|
n_eval = n_batch;
|
|
22
22
|
}
|
|
23
|
-
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval
|
|
23
|
+
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
|
|
24
24
|
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
|
|
25
25
|
return false;
|
|
26
26
|
}
|
|
@@ -37,21 +37,21 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|
|
37
37
|
|
|
38
38
|
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
|
39
39
|
std::string str2 = str;
|
|
40
|
-
std::vector<llama_token> embd_inp =
|
|
40
|
+
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
|
41
41
|
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
|
42
42
|
return true;
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
static const char * sample(struct
|
|
45
|
+
static const char * sample(struct common_sampler * smpl,
|
|
46
46
|
struct llama_context * ctx_llama,
|
|
47
47
|
int * n_past) {
|
|
48
|
-
const llama_token id =
|
|
49
|
-
|
|
48
|
+
const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
|
|
49
|
+
common_sampler_accept(smpl, id, true);
|
|
50
50
|
static std::string ret;
|
|
51
51
|
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
|
|
52
52
|
ret = "</s>";
|
|
53
53
|
} else {
|
|
54
|
-
ret =
|
|
54
|
+
ret = common_token_to_piece(ctx_llama, id);
|
|
55
55
|
}
|
|
56
56
|
eval_id(ctx_llama, id, n_past);
|
|
57
57
|
return ret.c_str();
|
|
@@ -120,7 +120,7 @@ static void print_usage(int, char ** argv) {
|
|
|
120
120
|
LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
-
static struct llava_image_embed * load_image(llava_context * ctx_llava,
|
|
123
|
+
static struct llava_image_embed * load_image(llava_context * ctx_llava, common_params * params, const std::string & fname) {
|
|
124
124
|
|
|
125
125
|
// load and preprocess the image
|
|
126
126
|
llava_image_embed * embed = NULL;
|
|
@@ -146,7 +146,7 @@ static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_para
|
|
|
146
146
|
return embed;
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
-
static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed,
|
|
149
|
+
static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, common_params * params, const std::string & prompt) {
|
|
150
150
|
int n_past = 0;
|
|
151
151
|
|
|
152
152
|
const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
|
|
@@ -159,16 +159,16 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
159
159
|
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
|
|
160
160
|
LOG_INF("system_prompt: %s\n", system_prompt.c_str());
|
|
161
161
|
if (params->verbose_prompt) {
|
|
162
|
-
auto tmp =
|
|
162
|
+
auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
|
163
163
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
164
|
-
LOG_INF("%6d -> '%s'\n", tmp[i],
|
|
164
|
+
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
167
|
LOG_INF("user_prompt: %s\n", user_prompt.c_str());
|
|
168
168
|
if (params->verbose_prompt) {
|
|
169
|
-
auto tmp =
|
|
169
|
+
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
|
170
170
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
171
|
-
LOG_INF("%6d -> '%s'\n", tmp[i],
|
|
171
|
+
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
172
172
|
}
|
|
173
173
|
}
|
|
174
174
|
} else {
|
|
@@ -176,9 +176,9 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
176
176
|
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
|
|
177
177
|
user_prompt = prompt + "\nASSISTANT:";
|
|
178
178
|
if (params->verbose_prompt) {
|
|
179
|
-
auto tmp =
|
|
179
|
+
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
|
180
180
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
181
|
-
LOG_INF("%6d -> '%s'\n", tmp[i],
|
|
181
|
+
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
182
182
|
}
|
|
183
183
|
}
|
|
184
184
|
}
|
|
@@ -191,7 +191,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
191
191
|
|
|
192
192
|
LOG("\n");
|
|
193
193
|
|
|
194
|
-
struct
|
|
194
|
+
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
|
|
195
195
|
if (!smpl) {
|
|
196
196
|
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
|
|
197
197
|
exit(1);
|
|
@@ -211,15 +211,15 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
211
211
|
fflush(stdout);
|
|
212
212
|
}
|
|
213
213
|
|
|
214
|
-
|
|
214
|
+
common_sampler_free(smpl);
|
|
215
215
|
LOG("\n");
|
|
216
216
|
}
|
|
217
217
|
|
|
218
|
-
static struct llama_model * llava_init(
|
|
218
|
+
static struct llama_model * llava_init(common_params * params) {
|
|
219
219
|
llama_backend_init();
|
|
220
220
|
llama_numa_init(params->numa);
|
|
221
221
|
|
|
222
|
-
llama_model_params model_params =
|
|
222
|
+
llama_model_params model_params = common_model_params_to_llama(*params);
|
|
223
223
|
|
|
224
224
|
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
|
|
225
225
|
if (model == NULL) {
|
|
@@ -229,7 +229,7 @@ static struct llama_model * llava_init(gpt_params * params) {
|
|
|
229
229
|
return model;
|
|
230
230
|
}
|
|
231
231
|
|
|
232
|
-
static struct llava_context * llava_init_context(
|
|
232
|
+
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
|
233
233
|
const char * clip_path = params->mmproj.c_str();
|
|
234
234
|
|
|
235
235
|
auto prompt = params->prompt;
|
|
@@ -240,7 +240,7 @@ static struct llava_context * llava_init_context(gpt_params * params, llama_mode
|
|
|
240
240
|
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
|
|
241
241
|
|
|
242
242
|
|
|
243
|
-
llama_context_params ctx_params =
|
|
243
|
+
llama_context_params ctx_params = common_context_params_to_llama(*params);
|
|
244
244
|
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
|
|
245
245
|
|
|
246
246
|
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
|
|
@@ -272,13 +272,13 @@ static void llava_free(struct llava_context * ctx_llava) {
|
|
|
272
272
|
int main(int argc, char ** argv) {
|
|
273
273
|
ggml_time_init();
|
|
274
274
|
|
|
275
|
-
|
|
275
|
+
common_params params;
|
|
276
276
|
|
|
277
|
-
if (!
|
|
277
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
|
|
278
278
|
return 1;
|
|
279
279
|
}
|
|
280
280
|
|
|
281
|
-
|
|
281
|
+
common_init();
|
|
282
282
|
|
|
283
283
|
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
|
284
284
|
print_usage(argc, argv);
|
|
@@ -11,13 +11,17 @@
|
|
|
11
11
|
#include <limits>
|
|
12
12
|
#include <vector>
|
|
13
13
|
|
|
14
|
-
#
|
|
15
|
-
#define
|
|
16
|
-
|
|
17
|
-
#define
|
|
18
|
-
#define
|
|
19
|
-
#
|
|
20
|
-
#define
|
|
14
|
+
#if defined(LLAVA_LOG_OFF)
|
|
15
|
+
# define LOG_INF(...)
|
|
16
|
+
# define LOG_WRN(...)
|
|
17
|
+
# define LOG_ERR(...)
|
|
18
|
+
# define LOG_DBG(...)
|
|
19
|
+
#else // defined(LLAVA_LOG_OFF)
|
|
20
|
+
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
|
|
21
|
+
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
|
|
22
|
+
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
|
|
23
|
+
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
|
|
24
|
+
#endif // defined(LLAVA_LOG_OFF)
|
|
21
25
|
|
|
22
26
|
// RGB uint8 image
|
|
23
27
|
struct clip_image_u8 {
|
|
@@ -255,25 +259,33 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|
|
255
259
|
|
|
256
260
|
const char * mm_patch_merge_type = clip_patch_merge_type(ctx_clip);
|
|
257
261
|
|
|
258
|
-
if (clip_is_minicpmv(ctx_clip)) {
|
|
262
|
+
if (clip_is_minicpmv(ctx_clip) || clip_is_qwen2vl(ctx_clip)) {
|
|
259
263
|
std::vector<float *> image_embd_v;
|
|
260
264
|
image_embd_v.resize(img_res_v.size);
|
|
261
265
|
struct clip_image_size * load_image_size = clip_image_size_init();
|
|
266
|
+
|
|
262
267
|
for (size_t i = 0; i < img_res_v.size; i++) {
|
|
263
268
|
const int64_t t_img_enc_step_start_us = ggml_time_us();
|
|
264
|
-
image_embd_v[i] = (float *)malloc(
|
|
269
|
+
image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny));
|
|
265
270
|
int patch_size=14;
|
|
266
271
|
load_image_size->width = img_res_v.data[i].nx;
|
|
267
272
|
load_image_size->height = img_res_v.data[i].ny;
|
|
268
273
|
clip_add_load_image_size(ctx_clip, load_image_size);
|
|
274
|
+
|
|
269
275
|
bool encoded = false;
|
|
270
|
-
|
|
271
|
-
if (has_minicpmv_projector == 2) {
|
|
272
|
-
encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
|
|
273
|
-
}
|
|
274
|
-
else if (has_minicpmv_projector == 3) {
|
|
276
|
+
if (clip_is_qwen2vl(ctx_clip)) {
|
|
275
277
|
encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
|
|
276
278
|
}
|
|
279
|
+
else {
|
|
280
|
+
int has_minicpmv_projector = clip_is_minicpmv(ctx_clip);
|
|
281
|
+
if (has_minicpmv_projector == 2) {
|
|
282
|
+
encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
|
|
283
|
+
}
|
|
284
|
+
else if (has_minicpmv_projector == 3) {
|
|
285
|
+
encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
277
289
|
if (!encoded) {
|
|
278
290
|
LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
|
|
279
291
|
return false;
|
|
@@ -286,8 +298,11 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|
|
286
298
|
|
|
287
299
|
int n_img_pos_out = 0;
|
|
288
300
|
for (size_t i = 0; i < image_embd_v.size(); i++) {
|
|
289
|
-
std::memcpy(
|
|
290
|
-
|
|
301
|
+
std::memcpy(
|
|
302
|
+
image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip),
|
|
303
|
+
image_embd_v[i],
|
|
304
|
+
clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny));
|
|
305
|
+
n_img_pos_out += clip_n_patches_by_img(ctx_clip, &img_res_v.data[i]);
|
|
291
306
|
}
|
|
292
307
|
*n_img_pos = n_img_pos_out;
|
|
293
308
|
for (size_t i = 0; i < image_embd_v.size(); i++) {
|
|
@@ -383,7 +398,13 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co
|
|
|
383
398
|
if (clip_is_minicpmv(ctx_clip)) {
|
|
384
399
|
num_max_patches = 10;
|
|
385
400
|
}
|
|
386
|
-
float * image_embd
|
|
401
|
+
float * image_embd;
|
|
402
|
+
if (clip_is_qwen2vl(ctx_clip)) {
|
|
403
|
+
// qwen2vl don't split image into chunks, so `num_max_patches` is not needed.
|
|
404
|
+
image_embd = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img->nx, img->ny));
|
|
405
|
+
} else {
|
|
406
|
+
image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*num_max_patches); // TODO: base on gridsize/llava model
|
|
407
|
+
}
|
|
387
408
|
if (!image_embd) {
|
|
388
409
|
LOG_ERR("Unable to allocate memory for image embeddings\n");
|
|
389
410
|
return false;
|
|
@@ -401,6 +422,39 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co
|
|
|
401
422
|
return true;
|
|
402
423
|
}
|
|
403
424
|
|
|
425
|
+
struct llava_embd_batch {
|
|
426
|
+
std::vector<llama_pos> pos;
|
|
427
|
+
std::vector<int32_t> n_seq_id;
|
|
428
|
+
std::vector<llama_seq_id> seq_id_0;
|
|
429
|
+
std::vector<llama_seq_id *> seq_ids;
|
|
430
|
+
std::vector<int8_t> logits;
|
|
431
|
+
llama_batch batch;
|
|
432
|
+
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
|
433
|
+
pos .resize(n_tokens);
|
|
434
|
+
n_seq_id.resize(n_tokens);
|
|
435
|
+
seq_ids .resize(n_tokens + 1);
|
|
436
|
+
logits .resize(n_tokens);
|
|
437
|
+
seq_id_0.resize(1);
|
|
438
|
+
seq_id_0[0] = seq_id;
|
|
439
|
+
seq_ids [n_tokens] = nullptr;
|
|
440
|
+
batch = {
|
|
441
|
+
/*n_tokens =*/ n_tokens,
|
|
442
|
+
/*tokens =*/ nullptr,
|
|
443
|
+
/*embd =*/ embd,
|
|
444
|
+
/*pos =*/ pos.data(),
|
|
445
|
+
/*n_seq_id =*/ n_seq_id.data(),
|
|
446
|
+
/*seq_id =*/ seq_ids.data(),
|
|
447
|
+
/*logits =*/ logits.data(),
|
|
448
|
+
};
|
|
449
|
+
for (int i = 0; i < n_tokens; i++) {
|
|
450
|
+
batch.pos [i] = pos_0 + i;
|
|
451
|
+
batch.n_seq_id[i] = 1;
|
|
452
|
+
batch.seq_id [i] = seq_id_0.data();
|
|
453
|
+
batch.logits [i] = false;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
};
|
|
457
|
+
|
|
404
458
|
bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_embed * image_embed, int n_batch, int * n_past) {
|
|
405
459
|
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
|
|
406
460
|
|
|
@@ -409,8 +463,9 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
|
|
|
409
463
|
if (n_eval > n_batch) {
|
|
410
464
|
n_eval = n_batch;
|
|
411
465
|
}
|
|
412
|
-
|
|
413
|
-
|
|
466
|
+
float * embd = image_embed->embed+i*n_embd;
|
|
467
|
+
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, *n_past, 0);
|
|
468
|
+
if (llama_decode(ctx_llama, llava_batch.batch)) {
|
|
414
469
|
LOG_ERR("%s : failed to eval\n", __func__);
|
|
415
470
|
return false;
|
|
416
471
|
}
|
|
@@ -432,7 +487,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
|
|
|
432
487
|
bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, img, &image_embed, &n_image_pos);
|
|
433
488
|
if (!image_embed_result) {
|
|
434
489
|
clip_image_u8_free(img);
|
|
435
|
-
LOG_ERR("%s:
|
|
490
|
+
LOG_ERR("%s: couldn't embed the image\n", __func__);
|
|
436
491
|
return NULL;
|
|
437
492
|
}
|
|
438
493
|
|
|
@@ -464,10 +519,16 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
|
|
|
464
519
|
errno = 0;
|
|
465
520
|
size_t ret = fread(buffer, 1, fileSize, file); // Read the file into the buffer
|
|
466
521
|
if (ferror(file)) {
|
|
467
|
-
|
|
522
|
+
LOG_ERR("read error: %s", strerror(errno));
|
|
523
|
+
free(buffer);
|
|
524
|
+
fclose(file);
|
|
525
|
+
return false;
|
|
468
526
|
}
|
|
469
527
|
if (ret != (size_t) fileSize) {
|
|
470
|
-
|
|
528
|
+
LOG_ERR("unexpectedly reached end of file");
|
|
529
|
+
free(buffer);
|
|
530
|
+
fclose(file);
|
|
531
|
+
return false;
|
|
471
532
|
}
|
|
472
533
|
fclose(file); // Close the file
|
|
473
534
|
|
|
@@ -25,11 +25,11 @@ static void show_additional_info(int /*argc*/, char ** argv) {
|
|
|
25
25
|
LOG("\nnote: a lower temperature value like 0.1 is recommended for better quality.\n");
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
-
static struct llama_model * llava_init(
|
|
28
|
+
static struct llama_model * llava_init(common_params * params) {
|
|
29
29
|
llama_backend_init();
|
|
30
30
|
llama_numa_init(params->numa);
|
|
31
31
|
|
|
32
|
-
llama_model_params model_params =
|
|
32
|
+
llama_model_params model_params = common_model_params_to_llama(*params);
|
|
33
33
|
|
|
34
34
|
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
|
|
35
35
|
if (model == NULL) {
|
|
@@ -39,13 +39,13 @@ static struct llama_model * llava_init(gpt_params * params) {
|
|
|
39
39
|
return model;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
-
static struct llava_context * llava_init_context(
|
|
42
|
+
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
|
43
43
|
auto prompt = params->prompt;
|
|
44
44
|
if (prompt.empty()) {
|
|
45
45
|
prompt = "describe the image in detail.";
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
llama_context_params ctx_params =
|
|
48
|
+
llama_context_params ctx_params = common_context_params_to_llama(*params);
|
|
49
49
|
if (params->n_ctx < 2048) {
|
|
50
50
|
// warn user here, "Image processing requires at least 2048 context, setting context to 2048"
|
|
51
51
|
LOG_WRN("%s: Image processing requires at least 2048 context, setting context to 2048\n" , __func__);
|
|
@@ -79,7 +79,7 @@ static void llava_free(struct llava_context * ctx_llava) {
|
|
|
79
79
|
llama_backend_free();
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
static struct clip_ctx * clip_init_context(
|
|
82
|
+
static struct clip_ctx * clip_init_context(common_params * params) {
|
|
83
83
|
const char * clip_path = params->mmproj.c_str();
|
|
84
84
|
|
|
85
85
|
auto prompt = params->prompt;
|
|
@@ -97,7 +97,7 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
|
|
|
97
97
|
if (n_eval > n_batch) {
|
|
98
98
|
n_eval = n_batch;
|
|
99
99
|
}
|
|
100
|
-
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval
|
|
100
|
+
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
|
|
101
101
|
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
|
|
102
102
|
return false;
|
|
103
103
|
}
|
|
@@ -114,7 +114,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|
|
114
114
|
|
|
115
115
|
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
|
116
116
|
std::string str2 = str;
|
|
117
|
-
std::vector<llama_token> embd_inp =
|
|
117
|
+
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
|
118
118
|
return eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
|
119
119
|
}
|
|
120
120
|
|
|
@@ -129,7 +129,7 @@ static void process_eval_image_embed(struct llava_context * ctx_llava, const str
|
|
|
129
129
|
llava_image_embed_free(slice_embed);
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
-
static void process_image(struct llava_context * ctx_llava, struct llava_image_embed * embeds,
|
|
132
|
+
static void process_image(struct llava_context * ctx_llava, struct llava_image_embed * embeds, common_params * params, int &n_past) {
|
|
133
133
|
std::string system_prompt;
|
|
134
134
|
int idx = 0;
|
|
135
135
|
int num_image_embeds = embeds->n_image_pos / clip_n_patches(ctx_llava->ctx_clip);
|
|
@@ -162,22 +162,22 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e
|
|
|
162
162
|
LOG_INF("%s: image token past: %d\n", __func__, n_past);
|
|
163
163
|
}
|
|
164
164
|
|
|
165
|
-
static const char * sample(struct
|
|
165
|
+
static const char * sample(struct common_sampler * smpl,
|
|
166
166
|
struct llama_context * ctx_llama,
|
|
167
167
|
int * n_past) {
|
|
168
|
-
const llama_token id =
|
|
169
|
-
|
|
168
|
+
const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
|
|
169
|
+
common_sampler_accept(smpl, id, true);
|
|
170
170
|
static std::string ret;
|
|
171
171
|
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
|
|
172
172
|
ret = "</s>";
|
|
173
173
|
} else {
|
|
174
|
-
ret =
|
|
174
|
+
ret = common_token_to_piece(ctx_llama, id);
|
|
175
175
|
}
|
|
176
176
|
eval_id(ctx_llama, id, n_past);
|
|
177
177
|
return ret.c_str();
|
|
178
178
|
}
|
|
179
179
|
|
|
180
|
-
static struct llava_context * minicpmv_init(
|
|
180
|
+
static struct llava_context * minicpmv_init(common_params * params, const std::string & fname, int &n_past){
|
|
181
181
|
auto * ctx_clip = clip_init_context(params);
|
|
182
182
|
auto * embeds = llava_image_embed_make_with_filename(ctx_clip, params->cpuparams.n_threads, fname.c_str());
|
|
183
183
|
if (!embeds) {
|
|
@@ -213,7 +213,7 @@ static struct llava_context * minicpmv_init(gpt_params * params, const std::stri
|
|
|
213
213
|
return ctx_llava;
|
|
214
214
|
}
|
|
215
215
|
|
|
216
|
-
static struct
|
|
216
|
+
static struct common_sampler * llama_init(struct llava_context * ctx_llava, common_params * params, const std::string & prompt, int & n_past, bool is_first = false){
|
|
217
217
|
std::string user_prompt = prompt;
|
|
218
218
|
int has_minicpmv_projector = clip_is_minicpmv(ctx_llava->ctx_clip);
|
|
219
219
|
if (!is_first) {
|
|
@@ -237,11 +237,11 @@ static struct gpt_sampler * llama_init(struct llava_context * ctx_llava, gpt_par
|
|
|
237
237
|
|
|
238
238
|
LOG_INF("\n");
|
|
239
239
|
|
|
240
|
-
struct
|
|
240
|
+
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
|
|
241
241
|
return smpl;
|
|
242
242
|
}
|
|
243
243
|
|
|
244
|
-
static const char * llama_loop(struct llava_context * ctx_llava,struct
|
|
244
|
+
static const char * llama_loop(struct llava_context * ctx_llava,struct common_sampler * smpl, int &n_past){
|
|
245
245
|
|
|
246
246
|
const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
|
|
247
247
|
return tmp;
|
|
@@ -250,13 +250,13 @@ static const char * llama_loop(struct llava_context * ctx_llava,struct gpt_sampl
|
|
|
250
250
|
int main(int argc, char ** argv) {
|
|
251
251
|
ggml_time_init();
|
|
252
252
|
|
|
253
|
-
|
|
253
|
+
common_params params;
|
|
254
254
|
|
|
255
|
-
if (!
|
|
255
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
|
|
256
256
|
return 1;
|
|
257
257
|
}
|
|
258
258
|
|
|
259
|
-
|
|
259
|
+
common_init();
|
|
260
260
|
|
|
261
261
|
if (params.mmproj.empty() || (params.image.empty())) {
|
|
262
262
|
show_additional_info(argc, argv);
|
|
@@ -290,7 +290,7 @@ int main(int argc, char ** argv) {
|
|
|
290
290
|
|
|
291
291
|
fflush(stdout);
|
|
292
292
|
}
|
|
293
|
-
|
|
293
|
+
common_sampler_free(smpl);
|
|
294
294
|
}else {
|
|
295
295
|
while (true) {
|
|
296
296
|
LOG("<user>");
|
|
@@ -309,7 +309,7 @@ int main(int argc, char ** argv) {
|
|
|
309
309
|
if (strstr(response.c_str(), "<user>")) break; // minicpm-v
|
|
310
310
|
fflush(stdout);
|
|
311
311
|
}
|
|
312
|
-
|
|
312
|
+
common_sampler_free(smpl);
|
|
313
313
|
}
|
|
314
314
|
}
|
|
315
315
|
printf("\n");
|