@fugood/llama.node 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +17 -7
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +89 -27
- package/src/LlamaContext.h +2 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +240 -168
- package/src/llama.cpp/.github/workflows/docker.yml +8 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +14 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -4
- package/src/llama.cpp/common/arg.cpp +986 -770
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +212 -351
- package/src/llama.cpp/common/common.h +204 -117
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +163 -121
- package/src/llama.cpp/common/sampling.h +41 -20
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +134 -161
- package/src/llama.cpp/examples/CMakeLists.txt +33 -14
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +19 -18
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +41 -87
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +263 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +83 -22
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
- package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +73 -114
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
- package/src/llama.cpp/examples/server/server.cpp +2073 -1339
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +354 -277
- package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/simple/simple.cpp +130 -94
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
- package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +159 -417
- package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +93 -52
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +4 -8
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +779 -194
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +55 -10
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +4317 -2979
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -38
- package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
- package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +62 -20
- package/src/llama.cpp/tests/test-sampling.cpp +163 -138
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -4,24 +4,29 @@
|
|
|
4
4
|
// Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
|
|
5
5
|
#include "clip.h"
|
|
6
6
|
#include "ggml.h"
|
|
7
|
+
#include "ggml-cpu.h"
|
|
7
8
|
#include "ggml-alloc.h"
|
|
8
9
|
#include "ggml-backend.h"
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
11
|
+
//#ifdef GGML_USE_CUDA
|
|
12
|
+
//#include "ggml-cuda.h"
|
|
13
|
+
//#endif
|
|
14
|
+
//
|
|
15
|
+
//#ifdef GGML_USE_SYCL
|
|
16
|
+
//#include "ggml-sycl.h"
|
|
17
|
+
//#endif
|
|
18
|
+
//
|
|
19
|
+
//#ifdef GGML_USE_METAL
|
|
20
|
+
//#include "ggml-metal.h"
|
|
21
|
+
//#endif
|
|
22
|
+
//
|
|
23
|
+
//#ifdef GGML_USE_CANN
|
|
24
|
+
//#include "ggml-cann.h"
|
|
25
|
+
//#endif
|
|
26
|
+
//
|
|
27
|
+
//#ifdef GGML_USE_VULKAN
|
|
28
|
+
//#include "ggml-vulkan.h"
|
|
29
|
+
//#endif
|
|
25
30
|
|
|
26
31
|
#define STB_IMAGE_IMPLEMENTATION
|
|
27
32
|
#include "stb_image.h"
|
|
@@ -39,10 +44,17 @@
|
|
|
39
44
|
#include <cinttypes>
|
|
40
45
|
#include <limits>
|
|
41
46
|
|
|
42
|
-
#
|
|
43
|
-
#define
|
|
44
|
-
#define
|
|
45
|
-
#define
|
|
47
|
+
#if defined(LLAVA_LOG_OFF)
|
|
48
|
+
# define LOG_INF(...)
|
|
49
|
+
# define LOG_WRN(...)
|
|
50
|
+
# define LOG_ERR(...)
|
|
51
|
+
# define LOG_DBG(...)
|
|
52
|
+
#else // defined(LLAVA_LOG_OFF)
|
|
53
|
+
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
|
|
54
|
+
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
|
|
55
|
+
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
|
|
56
|
+
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
|
|
57
|
+
#endif // defined(LLAVA_LOG_OFF)
|
|
46
58
|
|
|
47
59
|
//#define CLIP_DEBUG_FUNCTIONS
|
|
48
60
|
|
|
@@ -90,7 +102,9 @@ static std::string format(const char * fmt, ...) {
|
|
|
90
102
|
#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector"
|
|
91
103
|
#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector"
|
|
92
104
|
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
|
|
105
|
+
#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger"
|
|
93
106
|
#define KEY_USE_GELU "clip.use_gelu"
|
|
107
|
+
#define KEY_USE_SILU "clip.use_silu"
|
|
94
108
|
#define KEY_N_EMBD "clip.%s.embedding_length"
|
|
95
109
|
#define KEY_N_FF "clip.%s.feed_forward_length"
|
|
96
110
|
#define KEY_N_BLOCK "clip.%s.block_count"
|
|
@@ -117,7 +131,8 @@ static std::string format(const char * fmt, ...) {
|
|
|
117
131
|
#define TN_TOKEN_EMBD "%s.token_embd.weight"
|
|
118
132
|
#define TN_POS_EMBD "%s.position_embd.weight"
|
|
119
133
|
#define TN_CLASS_EMBD "v.class_embd"
|
|
120
|
-
#define TN_PATCH_EMBD "v.patch_embd.weight"
|
|
134
|
+
#define TN_PATCH_EMBD "v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat
|
|
135
|
+
#define TN_PATCH_EMBD_1 "v.patch_embd.weight.1"
|
|
121
136
|
#define TN_PATCH_BIAS "v.patch_embd.bias"
|
|
122
137
|
#define TN_ATTN_K "%s.blk.%d.attn_k.%s"
|
|
123
138
|
#define TN_ATTN_Q "%s.blk.%d.attn_q.%s"
|
|
@@ -151,6 +166,7 @@ enum projector_type {
|
|
|
151
166
|
PROJECTOR_TYPE_LDP,
|
|
152
167
|
PROJECTOR_TYPE_LDPV2,
|
|
153
168
|
PROJECTOR_TYPE_RESAMPLER,
|
|
169
|
+
PROJECTOR_TYPE_MERGER,
|
|
154
170
|
PROJECTOR_TYPE_UNKNOWN,
|
|
155
171
|
};
|
|
156
172
|
|
|
@@ -159,6 +175,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
|
|
159
175
|
{ PROJECTOR_TYPE_LDP, "ldp" },
|
|
160
176
|
{ PROJECTOR_TYPE_LDPV2, "ldpv2"},
|
|
161
177
|
{ PROJECTOR_TYPE_RESAMPLER, "resampler"},
|
|
178
|
+
{ PROJECTOR_TYPE_MERGER, "qwen2vl_merger"},
|
|
162
179
|
};
|
|
163
180
|
|
|
164
181
|
|
|
@@ -451,7 +468,8 @@ struct clip_vision_model {
|
|
|
451
468
|
|
|
452
469
|
// embeddings
|
|
453
470
|
struct ggml_tensor * class_embedding;
|
|
454
|
-
struct ggml_tensor *
|
|
471
|
+
struct ggml_tensor * patch_embeddings_0;
|
|
472
|
+
struct ggml_tensor * patch_embeddings_1; // second Conv2D kernel when we decouple Conv3D along temproal dimension (Qwen2VL)
|
|
455
473
|
struct ggml_tensor * patch_bias;
|
|
456
474
|
struct ggml_tensor * position_embeddings;
|
|
457
475
|
|
|
@@ -541,6 +559,7 @@ struct clip_ctx {
|
|
|
541
559
|
bool has_vision_encoder = false;
|
|
542
560
|
bool has_llava_projector = false;
|
|
543
561
|
bool has_minicpmv_projector = false;
|
|
562
|
+
bool has_qwen2vl_merger = false;
|
|
544
563
|
int minicpmv_version = 2;
|
|
545
564
|
|
|
546
565
|
struct clip_vision_model vision_model;
|
|
@@ -549,6 +568,7 @@ struct clip_ctx {
|
|
|
549
568
|
float image_mean[3];
|
|
550
569
|
float image_std[3];
|
|
551
570
|
bool use_gelu = false;
|
|
571
|
+
bool use_silu = false;
|
|
552
572
|
int32_t ftype = 1;
|
|
553
573
|
|
|
554
574
|
bool has_class_embedding = true;
|
|
@@ -594,14 +614,26 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
594
614
|
image_size_height = imgs->data->ny;
|
|
595
615
|
}
|
|
596
616
|
}
|
|
617
|
+
else if (ctx->has_qwen2vl_merger) {
|
|
618
|
+
// use the image's native resolution when image is avaible
|
|
619
|
+
if (is_inf) {
|
|
620
|
+
// if (imgs->data->nx && imgs->data->ny) {
|
|
621
|
+
image_size_width = imgs->data->nx;
|
|
622
|
+
image_size_height = imgs->data->ny;
|
|
623
|
+
}
|
|
624
|
+
}
|
|
597
625
|
const int patch_size = hparams.patch_size;
|
|
598
626
|
const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size));
|
|
627
|
+
const int patches_w = image_size_width / patch_size;
|
|
628
|
+
const int patches_h = image_size_height / patch_size;
|
|
599
629
|
const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0);
|
|
630
|
+
const int num_position_ids = ctx->has_qwen2vl_merger ? num_positions * 4 : num_positions;
|
|
600
631
|
const int hidden_size = hparams.hidden_size;
|
|
601
632
|
const int n_head = hparams.n_head;
|
|
602
633
|
const int d_head = hidden_size / n_head;
|
|
603
634
|
int n_layer = hparams.n_layer;
|
|
604
635
|
const float eps = hparams.eps;
|
|
636
|
+
int mrope_sections[4] = {d_head/4, d_head/4, d_head/4, d_head/4};
|
|
605
637
|
|
|
606
638
|
const int batch_size = imgs->size;
|
|
607
639
|
|
|
@@ -622,10 +654,30 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
622
654
|
ggml_set_name(inp_raw, "inp_raw");
|
|
623
655
|
ggml_set_input(inp_raw);
|
|
624
656
|
|
|
625
|
-
struct ggml_tensor * inp = ggml_conv_2d(ctx0, model.
|
|
657
|
+
struct ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
|
|
626
658
|
|
|
627
|
-
|
|
628
|
-
|
|
659
|
+
if (ctx->has_qwen2vl_merger) {
|
|
660
|
+
GGML_ASSERT(image_size_width % (patch_size * 2) == 0);
|
|
661
|
+
GGML_ASSERT(image_size_height % (patch_size * 2) == 0);
|
|
662
|
+
|
|
663
|
+
auto inp_1 = ggml_conv_2d(ctx0, model.patch_embeddings_1, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
|
|
664
|
+
inp = ggml_add(ctx0, inp, inp_1);
|
|
665
|
+
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 2, 0, 3)); // [w, h, c, b] -> [c, w, h, b]
|
|
666
|
+
inp = ggml_reshape_4d(
|
|
667
|
+
ctx0, inp,
|
|
668
|
+
hidden_size * 2, patches_w / 2, patches_h, batch_size);
|
|
669
|
+
inp = ggml_reshape_4d(
|
|
670
|
+
ctx0, inp,
|
|
671
|
+
hidden_size * 2, patches_w / 2, 2, batch_size * (patches_h / 2));
|
|
672
|
+
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 0, 2, 1, 3));
|
|
673
|
+
inp = ggml_reshape_3d(
|
|
674
|
+
ctx0, inp,
|
|
675
|
+
hidden_size, patches_w * patches_h, batch_size);
|
|
676
|
+
}
|
|
677
|
+
else {
|
|
678
|
+
inp = ggml_reshape_3d(ctx0, inp, num_patches, hidden_size, batch_size);
|
|
679
|
+
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3));
|
|
680
|
+
}
|
|
629
681
|
|
|
630
682
|
if (ctx->has_patch_bias) {
|
|
631
683
|
// inp = ggml_add(ctx0, inp, ggml_repeat(ctx0, model.patch_bias, inp));
|
|
@@ -647,12 +699,14 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
647
699
|
}
|
|
648
700
|
}
|
|
649
701
|
|
|
650
|
-
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32,
|
|
702
|
+
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_position_ids);
|
|
651
703
|
ggml_set_name(positions, "positions");
|
|
652
704
|
ggml_set_input(positions);
|
|
653
705
|
|
|
654
|
-
|
|
655
|
-
|
|
706
|
+
if (!ctx->has_qwen2vl_merger) { // qwen2vl use rope position embedding
|
|
707
|
+
embeddings =
|
|
708
|
+
ggml_add(ctx0, embeddings, ggml_get_rows(ctx0, model.position_embeddings, positions));
|
|
709
|
+
}
|
|
656
710
|
|
|
657
711
|
if (ctx->has_minicpmv_projector) {
|
|
658
712
|
int pos_w = image_size_width/patch_size;
|
|
@@ -676,7 +730,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
676
730
|
}
|
|
677
731
|
|
|
678
732
|
// loop over layers
|
|
679
|
-
if (ctx->has_minicpmv_projector) {
|
|
733
|
+
if (ctx->has_minicpmv_projector || ctx->has_qwen2vl_merger) {
|
|
734
|
+
// TODO: figure out why we doing thing in this way ???
|
|
680
735
|
n_layer += 1;
|
|
681
736
|
}
|
|
682
737
|
for (int il = 0; il < n_layer - 1; il++) {
|
|
@@ -698,8 +753,13 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
698
753
|
struct ggml_tensor * Q =
|
|
699
754
|
ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].q_w, cur), model.layers[il].q_b);
|
|
700
755
|
|
|
701
|
-
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
|
|
702
756
|
Q = ggml_reshape_4d(ctx0, Q, d_head, n_head, num_positions, batch_size);
|
|
757
|
+
if (ctx->has_qwen2vl_merger) {
|
|
758
|
+
Q = ggml_rope_multi(
|
|
759
|
+
ctx0, Q, positions, nullptr,
|
|
760
|
+
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
|
|
761
|
+
}
|
|
762
|
+
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
|
|
703
763
|
Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
|
|
704
764
|
Q = ggml_reshape_3d(ctx0, Q, d_head, num_positions, n_head * batch_size);
|
|
705
765
|
|
|
@@ -707,6 +767,11 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
707
767
|
ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].k_w, cur), model.layers[il].k_b);
|
|
708
768
|
|
|
709
769
|
K = ggml_reshape_4d(ctx0, K, d_head, n_head, num_positions, batch_size);
|
|
770
|
+
if (ctx->has_qwen2vl_merger) {
|
|
771
|
+
K = ggml_rope_multi(
|
|
772
|
+
ctx0, K, positions, nullptr,
|
|
773
|
+
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
|
|
774
|
+
}
|
|
710
775
|
K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
|
|
711
776
|
K = ggml_reshape_3d(ctx0, K, d_head, num_positions, n_head * batch_size);
|
|
712
777
|
|
|
@@ -746,6 +811,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
746
811
|
|
|
747
812
|
if (ctx->use_gelu) {
|
|
748
813
|
cur = ggml_gelu_inplace(ctx0, cur);
|
|
814
|
+
} else if (ctx->use_silu) {
|
|
815
|
+
cur = ggml_silu_inplace(ctx0, cur);
|
|
749
816
|
} else {
|
|
750
817
|
cur = ggml_gelu_quick_inplace(ctx0, cur);
|
|
751
818
|
}
|
|
@@ -757,6 +824,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
757
824
|
cur = ggml_add(ctx0, embeddings, cur);
|
|
758
825
|
|
|
759
826
|
embeddings = cur;
|
|
827
|
+
|
|
760
828
|
}
|
|
761
829
|
|
|
762
830
|
// post-layernorm
|
|
@@ -828,7 +896,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
828
896
|
mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3));
|
|
829
897
|
mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
|
|
830
898
|
// stride = 1, padding = 1, bias is nullptr
|
|
831
|
-
block_1 =
|
|
899
|
+
block_1 = ggml_conv_2d_dw(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, 1, 1, 1, 1, 1, 1);
|
|
832
900
|
|
|
833
901
|
// layer norm
|
|
834
902
|
// // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
|
|
@@ -876,7 +944,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
876
944
|
// block_2
|
|
877
945
|
{
|
|
878
946
|
// stride = 2
|
|
879
|
-
block_1 =
|
|
947
|
+
block_1 = ggml_conv_2d_dw(ctx0, model.mm_model_block_2_block_0_0_w, block_1, 2, 2, 1, 1, 1, 1);
|
|
880
948
|
|
|
881
949
|
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
|
|
882
950
|
// layer norm
|
|
@@ -937,7 +1005,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
937
1005
|
// mlp_2 ne [24, 24, 2048, 1]
|
|
938
1006
|
mlp_2 = ggml_pool_2d(ctx0, mlp_2, GGML_OP_POOL_AVG, 2, 2, 2, 2, 0, 0);
|
|
939
1007
|
// weight ne = [3, 3, 2048, 1]
|
|
940
|
-
struct ggml_tensor * peg_0 =
|
|
1008
|
+
struct ggml_tensor * peg_0 = ggml_conv_2d_dw(ctx0, model.mm_model_peg_0_w, mlp_2, 1, 1, 1, 1, 1, 1);
|
|
941
1009
|
peg_0 = ggml_cont(ctx0, ggml_permute(ctx0, peg_0, 1, 2, 0, 3));
|
|
942
1010
|
peg_0 = ggml_add(ctx0, peg_0, model.mm_model_peg_0_b);
|
|
943
1011
|
mlp_2 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_2, 1, 2, 0, 3));
|
|
@@ -1018,6 +1086,19 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
|
|
1018
1086
|
GGML_ASSERT(false);
|
|
1019
1087
|
}
|
|
1020
1088
|
}
|
|
1089
|
+
else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) {
|
|
1090
|
+
embeddings = ggml_reshape_3d(ctx0, embeddings, hidden_size * 4, num_positions / 4, batch_size);
|
|
1091
|
+
|
|
1092
|
+
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
|
|
1093
|
+
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
|
|
1094
|
+
|
|
1095
|
+
// GELU activation
|
|
1096
|
+
embeddings = ggml_gelu(ctx0, embeddings);
|
|
1097
|
+
|
|
1098
|
+
// Second linear layer
|
|
1099
|
+
embeddings = ggml_mul_mat(ctx0, model.mm_1_w, embeddings);
|
|
1100
|
+
embeddings = ggml_add(ctx0, embeddings, model.mm_1_b);
|
|
1101
|
+
}
|
|
1021
1102
|
|
|
1022
1103
|
// build the graph
|
|
1023
1104
|
ggml_build_forward_expand(gf, embeddings);
|
|
@@ -1141,25 +1222,30 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|
|
1141
1222
|
}
|
|
1142
1223
|
}
|
|
1143
1224
|
|
|
1144
|
-
|
|
1145
|
-
new_clip->backend = ggml_backend_cuda_init(0);
|
|
1146
|
-
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
new_clip->backend = ggml_backend_metal_init();
|
|
1151
|
-
LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
new_clip->backend = ggml_backend_cann_init(0);
|
|
1156
|
-
LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
new_clip->backend = ggml_backend_vk_init(0);
|
|
1161
|
-
LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
|
1162
|
-
|
|
1225
|
+
//#ifdef GGML_USE_CUDA
|
|
1226
|
+
// new_clip->backend = ggml_backend_cuda_init(0);
|
|
1227
|
+
// LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
|
1228
|
+
//#endif
|
|
1229
|
+
//
|
|
1230
|
+
//#ifdef GGML_USE_METAL
|
|
1231
|
+
// new_clip->backend = ggml_backend_metal_init();
|
|
1232
|
+
// LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
|
1233
|
+
//#endif
|
|
1234
|
+
//
|
|
1235
|
+
//#ifdef GGML_USE_CANN
|
|
1236
|
+
// new_clip->backend = ggml_backend_cann_init(0);
|
|
1237
|
+
// LOG_INF("%s: CLIP using CANN backend\n", __func__);
|
|
1238
|
+
//#endif
|
|
1239
|
+
//
|
|
1240
|
+
//#ifdef GGML_USE_VULKAN
|
|
1241
|
+
// new_clip->backend = ggml_backend_vk_init(0);
|
|
1242
|
+
// LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
|
1243
|
+
//#endif
|
|
1244
|
+
//
|
|
1245
|
+
//#ifdef GGML_USE_SYCL
|
|
1246
|
+
// new_clip->backend = ggml_backend_sycl_init(0);
|
|
1247
|
+
// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
|
|
1248
|
+
//#endif
|
|
1163
1249
|
|
|
1164
1250
|
if (!new_clip->backend) {
|
|
1165
1251
|
new_clip->backend = ggml_backend_cpu_init();
|
|
@@ -1189,6 +1275,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|
|
1189
1275
|
new_clip->minicpmv_version = gguf_get_val_i32(ctx, idx);
|
|
1190
1276
|
}
|
|
1191
1277
|
|
|
1278
|
+
idx = gguf_find_key(ctx, KEY_HAS_QWEN2VL_MERGER);
|
|
1279
|
+
if (idx != -1) {
|
|
1280
|
+
new_clip->has_qwen2vl_merger = gguf_get_val_bool(ctx, idx);
|
|
1281
|
+
}
|
|
1192
1282
|
// GGML_ASSERT(new_clip->has_llava_projector); // see monatis/clip.cpp for image and/or text encoding for semantic search
|
|
1193
1283
|
|
|
1194
1284
|
GGML_ASSERT(new_clip->has_vision_encoder);
|
|
@@ -1197,6 +1287,13 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|
|
1197
1287
|
idx = get_key_idx(ctx, KEY_USE_GELU);
|
|
1198
1288
|
new_clip->use_gelu = gguf_get_val_bool(ctx, idx);
|
|
1199
1289
|
|
|
1290
|
+
try {
|
|
1291
|
+
idx = get_key_idx(ctx, KEY_USE_SILU);
|
|
1292
|
+
new_clip->use_silu = gguf_get_val_bool(ctx, idx);
|
|
1293
|
+
} catch (std::runtime_error & /*e*/) {
|
|
1294
|
+
new_clip->use_silu = false;
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1200
1297
|
if (verbosity >= 1) {
|
|
1201
1298
|
LOG_INF("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder);
|
|
1202
1299
|
LOG_INF("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder);
|
|
@@ -1372,11 +1469,16 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|
|
1372
1469
|
}
|
|
1373
1470
|
|
|
1374
1471
|
try {
|
|
1375
|
-
vision_model.
|
|
1472
|
+
vision_model.patch_embeddings_0 = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
|
|
1376
1473
|
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
|
1377
1474
|
} catch(const std::exception& /*e*/) {
|
|
1378
1475
|
LOG_ERR("%s: failed to load vision model tensors\n", __func__);
|
|
1379
1476
|
}
|
|
1477
|
+
try {
|
|
1478
|
+
vision_model.patch_embeddings_1 = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD_1);
|
|
1479
|
+
} catch(const std::exception& /*e*/) {
|
|
1480
|
+
new_clip->has_qwen2vl_merger = false;
|
|
1481
|
+
}
|
|
1380
1482
|
|
|
1381
1483
|
// LLaVA projection
|
|
1382
1484
|
if (new_clip->proj_type == PROJECTOR_TYPE_MLP || new_clip->proj_type == PROJECTOR_TYPE_MLP_NORM) {
|
|
@@ -1464,6 +1566,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|
|
1464
1566
|
vision_model.mm_model_ln_post_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "weight"));
|
|
1465
1567
|
vision_model.mm_model_ln_post_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "bias"));
|
|
1466
1568
|
}
|
|
1569
|
+
else if (new_clip->proj_type == PROJECTOR_TYPE_MERGER) {
|
|
1570
|
+
vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight"));
|
|
1571
|
+
vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias"));
|
|
1572
|
+
vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
|
|
1573
|
+
vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
|
|
1574
|
+
}
|
|
1467
1575
|
else {
|
|
1468
1576
|
std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type];
|
|
1469
1577
|
throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
|
|
@@ -1502,6 +1610,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|
|
1502
1610
|
new_clip->compute_alloc = ggml_gallocr_new(ggml_backend_get_default_buffer_type(new_clip->backend));
|
|
1503
1611
|
clip_image_f32_batch batch;
|
|
1504
1612
|
batch.size = 1;
|
|
1613
|
+
batch.data = nullptr;
|
|
1505
1614
|
ggml_cgraph * gf = clip_image_build_graph(new_clip, &batch, nullptr, false);
|
|
1506
1615
|
ggml_gallocr_reserve(new_clip->compute_alloc, gf);
|
|
1507
1616
|
size_t compute_memory_buffer_size = ggml_gallocr_get_buffer_size(new_clip->compute_alloc, 0);
|
|
@@ -1515,6 +1624,10 @@ void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size
|
|
|
1515
1624
|
ctx_clip->load_image_size = load_image_size;
|
|
1516
1625
|
}
|
|
1517
1626
|
|
|
1627
|
+
struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip) {
|
|
1628
|
+
return ctx_clip->load_image_size;
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1518
1631
|
struct clip_image_size * clip_image_size_init() {
|
|
1519
1632
|
struct clip_image_size * load_image_size = new struct clip_image_size();
|
|
1520
1633
|
load_image_size->width = 448;
|
|
@@ -1967,6 +2080,23 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
|
|
|
1967
2080
|
}
|
|
1968
2081
|
return true;
|
|
1969
2082
|
}
|
|
2083
|
+
else if (ctx->has_qwen2vl_merger) {
|
|
2084
|
+
clip_image_u8 * resized = clip_image_u8_init();
|
|
2085
|
+
auto patch_size = clip_patch_size(ctx) * 2;
|
|
2086
|
+
int nx = ceil((float)img->nx / patch_size) * patch_size;
|
|
2087
|
+
int ny = ceil((float)img->ny / patch_size) * patch_size;
|
|
2088
|
+
bicubic_resize(*img, *resized, nx, ny);
|
|
2089
|
+
|
|
2090
|
+
res_imgs->data = new clip_image_f32[1];
|
|
2091
|
+
// clip_image_f32 * res = clip_image_f32_init();
|
|
2092
|
+
normalize_image_u8_to_f32(resized, res_imgs->data, ctx->image_mean, ctx->image_std);
|
|
2093
|
+
// res_imgs->data[0] = *res;
|
|
2094
|
+
res_imgs->size = 1;
|
|
2095
|
+
|
|
2096
|
+
// clip_image_f32_free(res);
|
|
2097
|
+
clip_image_u8_free(resized);
|
|
2098
|
+
return true;
|
|
2099
|
+
}
|
|
1970
2100
|
|
|
1971
2101
|
bool pad_to_square = true;
|
|
1972
2102
|
if (!ctx->has_vision_encoder) {
|
|
@@ -2156,6 +2286,13 @@ size_t clip_embd_nbytes(const struct clip_ctx * ctx) {
|
|
|
2156
2286
|
return clip_n_patches(ctx) * clip_n_mmproj_embd(ctx) * sizeof(float);
|
|
2157
2287
|
}
|
|
2158
2288
|
|
|
2289
|
+
size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w) {
|
|
2290
|
+
clip_image_f32 img;
|
|
2291
|
+
img.nx = img_w;
|
|
2292
|
+
img.ny = img_h;
|
|
2293
|
+
return clip_n_patches_by_img(ctx, &img) * clip_n_mmproj_embd(ctx) * sizeof(float);
|
|
2294
|
+
}
|
|
2295
|
+
|
|
2159
2296
|
int32_t clip_image_size(const struct clip_ctx * ctx) {
|
|
2160
2297
|
return ctx->vision_model.hparams.image_size;
|
|
2161
2298
|
}
|
|
@@ -2177,6 +2314,13 @@ const int32_t * clip_image_grid(const struct clip_ctx * ctx) {
|
|
|
2177
2314
|
}
|
|
2178
2315
|
|
|
2179
2316
|
int clip_n_patches(const struct clip_ctx * ctx) {
|
|
2317
|
+
clip_image_f32 img;
|
|
2318
|
+
img.nx = ctx->vision_model.hparams.image_size;
|
|
2319
|
+
img.ny = ctx->vision_model.hparams.image_size;
|
|
2320
|
+
return clip_n_patches_by_img(ctx, &img);
|
|
2321
|
+
}
|
|
2322
|
+
|
|
2323
|
+
int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * img) {
|
|
2180
2324
|
const auto & params = ctx->vision_model.hparams;
|
|
2181
2325
|
|
|
2182
2326
|
int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size);
|
|
@@ -2190,6 +2334,11 @@ int clip_n_patches(const struct clip_ctx * ctx) {
|
|
|
2190
2334
|
else if (ctx->minicpmv_version == 3) {
|
|
2191
2335
|
n_patches = 64;
|
|
2192
2336
|
}
|
|
2337
|
+
} else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) {
|
|
2338
|
+
int patch_size = params.patch_size * 2;
|
|
2339
|
+
int x_patch = img->nx / patch_size + (int)(img->nx % patch_size > 0);
|
|
2340
|
+
int y_patch = img->ny / patch_size + (int)(img->ny % patch_size > 0);
|
|
2341
|
+
n_patches = x_patch * y_patch;
|
|
2193
2342
|
}
|
|
2194
2343
|
|
|
2195
2344
|
return n_patches;
|
|
@@ -2318,7 +2467,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
|
2318
2467
|
const int image_size = hparams.image_size;
|
|
2319
2468
|
int image_size_width = image_size;
|
|
2320
2469
|
int image_size_height = image_size;
|
|
2321
|
-
if (ctx->has_minicpmv_projector) {
|
|
2470
|
+
if (ctx->has_minicpmv_projector | ctx->has_qwen2vl_merger) {
|
|
2322
2471
|
image_size_width = imgs->data[0].nx;
|
|
2323
2472
|
image_size_height = imgs->data[0].ny;
|
|
2324
2473
|
}
|
|
@@ -2338,7 +2487,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
|
2338
2487
|
for (size_t i = 0; i < imgs->size; i++) {
|
|
2339
2488
|
const int nx = imgs->data[i].nx;
|
|
2340
2489
|
const int ny = imgs->data[i].ny;
|
|
2341
|
-
if (!ctx->has_minicpmv_projector) {
|
|
2490
|
+
if (!(ctx->has_minicpmv_projector | ctx->has_qwen2vl_merger)) {
|
|
2342
2491
|
GGML_ASSERT(nx == image_size && ny == image_size);
|
|
2343
2492
|
}
|
|
2344
2493
|
|
|
@@ -2396,9 +2545,9 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
|
2396
2545
|
auto pos_embed_t = get_2d_sincos_pos_embed(embed_dim, std::make_pair(pos_w, pos_h));
|
|
2397
2546
|
|
|
2398
2547
|
float * pos_embed_data = (float *)malloc(ggml_nbytes(pos_embed));
|
|
2399
|
-
for(int i=0;i<pos_w * pos_h
|
|
2400
|
-
for(int j=0;j<embed_dim
|
|
2401
|
-
pos_embed_data[i*embed_dim+j]=pos_embed_t[i][j];
|
|
2548
|
+
for(int i=0;i < pos_w * pos_h; ++i){
|
|
2549
|
+
for(int j=0; j < embed_dim; ++j){
|
|
2550
|
+
pos_embed_data[i * embed_dim + j] = pos_embed_t[i][j];
|
|
2402
2551
|
}
|
|
2403
2552
|
}
|
|
2404
2553
|
|
|
@@ -2418,7 +2567,34 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
|
2418
2567
|
}
|
|
2419
2568
|
}
|
|
2420
2569
|
|
|
2421
|
-
{
|
|
2570
|
+
if (ctx->has_qwen2vl_merger) {
|
|
2571
|
+
struct ggml_tensor * positions = ggml_graph_get_tensor(gf, "positions");
|
|
2572
|
+
|
|
2573
|
+
const int pw = image_size_width / patch_size;
|
|
2574
|
+
const int ph = image_size_height / patch_size;
|
|
2575
|
+
int* positions_data = (int*)malloc(ggml_nbytes(positions));
|
|
2576
|
+
|
|
2577
|
+
int ptr = 0;
|
|
2578
|
+
for (int y = 0; y < ph; y+=2)
|
|
2579
|
+
{
|
|
2580
|
+
for (int x = 0; x < pw; x+=2)
|
|
2581
|
+
{
|
|
2582
|
+
for (int dy = 0; dy < 2; dy++) {
|
|
2583
|
+
for (int dx = 0; dx < 2; dx++) {
|
|
2584
|
+
positions_data[ptr] = y + dy;
|
|
2585
|
+
positions_data[num_patches + ptr] = x + dx;
|
|
2586
|
+
positions_data[num_patches * 2 + ptr] = y + dy;
|
|
2587
|
+
positions_data[num_patches * 3 + ptr] = x + dx;
|
|
2588
|
+
ptr++;
|
|
2589
|
+
}
|
|
2590
|
+
}
|
|
2591
|
+
}
|
|
2592
|
+
}
|
|
2593
|
+
|
|
2594
|
+
ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions));
|
|
2595
|
+
free(positions_data);
|
|
2596
|
+
}
|
|
2597
|
+
else {
|
|
2422
2598
|
struct ggml_tensor * positions = ggml_graph_get_tensor(gf, "positions");
|
|
2423
2599
|
|
|
2424
2600
|
int* positions_data = (int*)malloc(ggml_nbytes(positions));
|
|
@@ -2427,16 +2603,16 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
|
2427
2603
|
}
|
|
2428
2604
|
ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions));
|
|
2429
2605
|
free(positions_data);
|
|
2430
|
-
}
|
|
2431
2606
|
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2607
|
+
{
|
|
2608
|
+
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
|
2609
|
+
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
|
2610
|
+
for (int i = 0; i < num_patches; i++) {
|
|
2611
|
+
patches_data[i] = i + 1;
|
|
2612
|
+
}
|
|
2613
|
+
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
|
2614
|
+
free(patches_data);
|
|
2437
2615
|
}
|
|
2438
|
-
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
|
2439
|
-
free(patches_data);
|
|
2440
2616
|
}
|
|
2441
2617
|
}
|
|
2442
2618
|
|
|
@@ -2609,6 +2785,9 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
|
|
2609
2785
|
return 3584;
|
|
2610
2786
|
}
|
|
2611
2787
|
}
|
|
2788
|
+
if (ctx->proj_type == PROJECTOR_TYPE_MERGER) {
|
|
2789
|
+
return ctx->vision_model.mm_1_b->ne[0];
|
|
2790
|
+
}
|
|
2612
2791
|
|
|
2613
2792
|
std::string proj_type = PROJECTOR_TYPE_NAMES[ctx->proj_type];
|
|
2614
2793
|
throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
|
|
@@ -2620,3 +2799,21 @@ int clip_is_minicpmv(const struct clip_ctx * ctx) {
|
|
|
2620
2799
|
}
|
|
2621
2800
|
return 0;
|
|
2622
2801
|
}
|
|
2802
|
+
|
|
2803
|
+
bool clip_is_qwen2vl(const struct clip_ctx * ctx) {
|
|
2804
|
+
return ctx->has_qwen2vl_merger;
|
|
2805
|
+
}
|
|
2806
|
+
|
|
2807
|
+
|
|
2808
|
+
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
|
|
2809
|
+
clip_image_f32 clip_img;
|
|
2810
|
+
clip_img.buf.resize(h * w * 3);
|
|
2811
|
+
for (int i = 0; i < h*w*3; i++)
|
|
2812
|
+
{
|
|
2813
|
+
clip_img.buf[i] = img[i];
|
|
2814
|
+
}
|
|
2815
|
+
clip_img.nx = w;
|
|
2816
|
+
clip_img.ny = h;
|
|
2817
|
+
clip_image_encode(ctx, n_threads, &clip_img, vec);
|
|
2818
|
+
return true;
|
|
2819
|
+
}
|
|
@@ -45,6 +45,7 @@ CLIP_API struct clip_ctx * clip_model_load_cpu(const char * fname, int verbosity
|
|
|
45
45
|
CLIP_API void clip_free(struct clip_ctx * ctx);
|
|
46
46
|
|
|
47
47
|
CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx);
|
|
48
|
+
CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w);
|
|
48
49
|
|
|
49
50
|
CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx);
|
|
50
51
|
CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx);
|
|
@@ -55,11 +56,13 @@ CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);
|
|
|
55
56
|
|
|
56
57
|
CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
|
|
57
58
|
|
|
58
|
-
CLIP_API int clip_n_patches
|
|
59
|
-
CLIP_API int
|
|
59
|
+
CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
|
|
60
|
+
CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
61
|
+
CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx);
|
|
60
62
|
|
|
61
63
|
CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
|
|
62
64
|
CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
|
|
65
|
+
CLIP_API struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip);
|
|
63
66
|
|
|
64
67
|
CLIP_API struct clip_image_size * clip_image_size_init();
|
|
65
68
|
CLIP_API struct clip_image_u8 * clip_image_u8_init ();
|
|
@@ -86,6 +89,9 @@ CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, cons
|
|
|
86
89
|
CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);
|
|
87
90
|
|
|
88
91
|
CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
|
|
92
|
+
CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
|
93
|
+
|
|
94
|
+
CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
|
89
95
|
|
|
90
96
|
#ifdef __cplusplus
|
|
91
97
|
}
|