@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -18,14 +18,17 @@
|
|
|
18
18
|
# define CLIP_API
|
|
19
19
|
#endif
|
|
20
20
|
|
|
21
|
-
struct clip_ctx;
|
|
22
|
-
|
|
23
21
|
#ifdef __cplusplus
|
|
24
22
|
extern "C" {
|
|
25
23
|
#endif
|
|
26
24
|
|
|
27
25
|
struct clip_ctx;
|
|
28
26
|
|
|
27
|
+
struct clip_image_size {
|
|
28
|
+
int width;
|
|
29
|
+
int height;
|
|
30
|
+
};
|
|
31
|
+
|
|
29
32
|
struct clip_image_u8_batch {
|
|
30
33
|
struct clip_image_u8 * data;
|
|
31
34
|
size_t size;
|
|
@@ -55,6 +58,10 @@ CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
|
|
|
55
58
|
CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
|
|
56
59
|
CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx);
|
|
57
60
|
|
|
61
|
+
CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
|
|
62
|
+
CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
|
|
63
|
+
|
|
64
|
+
CLIP_API struct clip_image_size * clip_image_size_init();
|
|
58
65
|
CLIP_API struct clip_image_u8 * clip_image_u8_init ();
|
|
59
66
|
CLIP_API struct clip_image_f32 * clip_image_f32_init();
|
|
60
67
|
|
|
@@ -78,6 +85,8 @@ CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, cons
|
|
|
78
85
|
|
|
79
86
|
CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);
|
|
80
87
|
|
|
88
|
+
CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
|
|
89
|
+
|
|
81
90
|
#ifdef __cplusplus
|
|
82
91
|
}
|
|
83
92
|
#endif
|
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
#include "
|
|
1
|
+
#include "arg.h"
|
|
2
|
+
#include "base64.hpp"
|
|
2
3
|
#include "log.h"
|
|
3
4
|
#include "common.h"
|
|
5
|
+
#include "sampling.h"
|
|
4
6
|
#include "clip.h"
|
|
5
7
|
#include "llava.h"
|
|
6
8
|
#include "llama.h"
|
|
7
|
-
|
|
8
|
-
#include "base64.hpp"
|
|
9
|
+
#include "ggml.h"
|
|
9
10
|
|
|
10
11
|
#include <cstdio>
|
|
11
12
|
#include <cstdlib>
|
|
13
|
+
#include <cstring>
|
|
12
14
|
#include <vector>
|
|
13
15
|
|
|
14
16
|
static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
|
|
@@ -18,8 +20,8 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
|
|
|
18
20
|
if (n_eval > n_batch) {
|
|
19
21
|
n_eval = n_batch;
|
|
20
22
|
}
|
|
21
|
-
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval
|
|
22
|
-
|
|
23
|
+
if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
|
|
24
|
+
LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
|
|
23
25
|
return false;
|
|
24
26
|
}
|
|
25
27
|
*n_past += n_eval;
|
|
@@ -35,21 +37,21 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|
|
35
37
|
|
|
36
38
|
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
|
37
39
|
std::string str2 = str;
|
|
38
|
-
std::vector<llama_token> embd_inp =
|
|
40
|
+
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
|
39
41
|
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
|
40
42
|
return true;
|
|
41
43
|
}
|
|
42
44
|
|
|
43
|
-
static const char * sample(struct
|
|
45
|
+
static const char * sample(struct common_sampler * smpl,
|
|
44
46
|
struct llama_context * ctx_llama,
|
|
45
47
|
int * n_past) {
|
|
46
|
-
const llama_token id =
|
|
47
|
-
|
|
48
|
+
const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
|
|
49
|
+
common_sampler_accept(smpl, id, true);
|
|
48
50
|
static std::string ret;
|
|
49
51
|
if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
|
|
50
52
|
ret = "</s>";
|
|
51
53
|
} else {
|
|
52
|
-
ret =
|
|
54
|
+
ret = common_token_to_piece(ctx_llama, id);
|
|
53
55
|
}
|
|
54
56
|
eval_id(ctx_llama, id, n_past);
|
|
55
57
|
return ret.c_str();
|
|
@@ -74,7 +76,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip
|
|
|
74
76
|
size_t img_base64_str_start, img_base64_str_end;
|
|
75
77
|
find_image_tag_in_prompt(prompt, img_base64_str_start, img_base64_str_end);
|
|
76
78
|
if (img_base64_str_start == std::string::npos || img_base64_str_end == std::string::npos) {
|
|
77
|
-
|
|
79
|
+
LOG_ERR("%s: invalid base64 image tag. must be %s<base64 byte string>%s\n", __func__, IMG_BASE64_TAG_BEGIN, IMG_BASE64_TAG_END);
|
|
78
80
|
return NULL;
|
|
79
81
|
}
|
|
80
82
|
|
|
@@ -88,7 +90,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip
|
|
|
88
90
|
|
|
89
91
|
auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, img_bytes.data(), img_bytes.size());
|
|
90
92
|
if (!embed) {
|
|
91
|
-
|
|
93
|
+
LOG_ERR("%s: could not load image from base64 string.\n", __func__);
|
|
92
94
|
return NULL;
|
|
93
95
|
}
|
|
94
96
|
|
|
@@ -112,31 +114,29 @@ struct llava_context {
|
|
|
112
114
|
struct llama_model * model = NULL;
|
|
113
115
|
};
|
|
114
116
|
|
|
115
|
-
static void print_usage(int
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
LOG_TEE("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
|
|
120
|
-
LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
|
|
117
|
+
static void print_usage(int, char ** argv) {
|
|
118
|
+
LOG("\n example usage:\n");
|
|
119
|
+
LOG("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
|
|
120
|
+
LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
-
static struct llava_image_embed * load_image(llava_context * ctx_llava,
|
|
123
|
+
static struct llava_image_embed * load_image(llava_context * ctx_llava, common_params * params, const std::string & fname) {
|
|
124
124
|
|
|
125
125
|
// load and preprocess the image
|
|
126
126
|
llava_image_embed * embed = NULL;
|
|
127
127
|
auto prompt = params->prompt;
|
|
128
128
|
if (prompt_contains_image(prompt)) {
|
|
129
129
|
if (!params->image.empty()) {
|
|
130
|
-
|
|
130
|
+
LOG_INF("using base64 encoded image instead of command line image path\n");
|
|
131
131
|
}
|
|
132
|
-
embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->n_threads, prompt);
|
|
132
|
+
embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->cpuparams.n_threads, prompt);
|
|
133
133
|
if (!embed) {
|
|
134
|
-
|
|
134
|
+
LOG_ERR("%s: can't load image from prompt\n", __func__);
|
|
135
135
|
return NULL;
|
|
136
136
|
}
|
|
137
137
|
params->prompt = remove_image_from_prompt(prompt);
|
|
138
138
|
} else {
|
|
139
|
-
embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->n_threads, fname.c_str());
|
|
139
|
+
embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->cpuparams.n_threads, fname.c_str());
|
|
140
140
|
if (!embed) {
|
|
141
141
|
fprintf(stderr, "%s: is %s really an image file?\n", __func__, fname.c_str());
|
|
142
142
|
return NULL;
|
|
@@ -146,7 +146,7 @@ static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_para
|
|
|
146
146
|
return embed;
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
-
static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed,
|
|
149
|
+
static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, common_params * params, const std::string & prompt) {
|
|
150
150
|
int n_past = 0;
|
|
151
151
|
|
|
152
152
|
const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
|
|
@@ -157,18 +157,18 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
157
157
|
// new templating mode: Provide the full prompt including system message and use <image> as a placeholder for the image
|
|
158
158
|
system_prompt = prompt.substr(0, image_pos);
|
|
159
159
|
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
|
|
160
|
-
|
|
160
|
+
LOG_INF("system_prompt: %s\n", system_prompt.c_str());
|
|
161
161
|
if (params->verbose_prompt) {
|
|
162
|
-
auto tmp =
|
|
162
|
+
auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
|
163
163
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
164
|
-
|
|
164
|
+
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
|
-
|
|
167
|
+
LOG_INF("user_prompt: %s\n", user_prompt.c_str());
|
|
168
168
|
if (params->verbose_prompt) {
|
|
169
|
-
auto tmp =
|
|
169
|
+
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
|
170
170
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
171
|
-
|
|
171
|
+
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
172
172
|
}
|
|
173
173
|
}
|
|
174
174
|
} else {
|
|
@@ -176,9 +176,9 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
176
176
|
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
|
|
177
177
|
user_prompt = prompt + "\nASSISTANT:";
|
|
178
178
|
if (params->verbose_prompt) {
|
|
179
|
-
auto tmp =
|
|
179
|
+
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
|
180
180
|
for (int i = 0; i < (int) tmp.size(); i++) {
|
|
181
|
-
|
|
181
|
+
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
|
182
182
|
}
|
|
183
183
|
}
|
|
184
184
|
}
|
|
@@ -189,21 +189,21 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
189
189
|
|
|
190
190
|
// generate the response
|
|
191
191
|
|
|
192
|
-
|
|
192
|
+
LOG("\n");
|
|
193
193
|
|
|
194
|
-
struct
|
|
195
|
-
if (!
|
|
196
|
-
|
|
194
|
+
struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sparams);
|
|
195
|
+
if (!smpl) {
|
|
196
|
+
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
|
|
197
197
|
exit(1);
|
|
198
198
|
}
|
|
199
199
|
|
|
200
200
|
std::string response = "";
|
|
201
201
|
for (int i = 0; i < max_tgt_len; i++) {
|
|
202
|
-
const char * tmp = sample(
|
|
202
|
+
const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
|
|
203
203
|
response += tmp;
|
|
204
204
|
if (strcmp(tmp, "</s>") == 0) break;
|
|
205
205
|
if (strstr(tmp, "###")) break; // Yi-VL behavior
|
|
206
|
-
|
|
206
|
+
LOG("%s", tmp);
|
|
207
207
|
if (strstr(response.c_str(), "<|im_end|>")) break; // Yi-34B llava-1.6 - for some reason those decode not as the correct token (tokenizer works)
|
|
208
208
|
if (strstr(response.c_str(), "<|im_start|>")) break; // Yi-34B llava-1.6
|
|
209
209
|
if (strstr(response.c_str(), "USER:")) break; // mistral llava-1.6
|
|
@@ -211,25 +211,25 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|
|
211
211
|
fflush(stdout);
|
|
212
212
|
}
|
|
213
213
|
|
|
214
|
-
|
|
215
|
-
|
|
214
|
+
common_sampler_free(smpl);
|
|
215
|
+
LOG("\n");
|
|
216
216
|
}
|
|
217
217
|
|
|
218
|
-
static struct llama_model * llava_init(
|
|
218
|
+
static struct llama_model * llava_init(common_params * params) {
|
|
219
219
|
llama_backend_init();
|
|
220
220
|
llama_numa_init(params->numa);
|
|
221
221
|
|
|
222
|
-
llama_model_params model_params =
|
|
222
|
+
llama_model_params model_params = common_model_params_to_llama(*params);
|
|
223
223
|
|
|
224
224
|
llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
|
|
225
225
|
if (model == NULL) {
|
|
226
|
-
|
|
226
|
+
LOG_ERR("%s: unable to load model\n" , __func__);
|
|
227
227
|
return NULL;
|
|
228
228
|
}
|
|
229
229
|
return model;
|
|
230
230
|
}
|
|
231
231
|
|
|
232
|
-
static struct llava_context * llava_init_context(
|
|
232
|
+
static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
|
|
233
233
|
const char * clip_path = params->mmproj.c_str();
|
|
234
234
|
|
|
235
235
|
auto prompt = params->prompt;
|
|
@@ -240,17 +240,17 @@ static struct llava_context * llava_init_context(gpt_params * params, llama_mode
|
|
|
240
240
|
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
|
|
241
241
|
|
|
242
242
|
|
|
243
|
-
llama_context_params ctx_params =
|
|
243
|
+
llama_context_params ctx_params = common_context_params_to_llama(*params);
|
|
244
244
|
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
|
|
245
245
|
|
|
246
246
|
llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
|
|
247
247
|
|
|
248
248
|
if (ctx_llama == NULL) {
|
|
249
|
-
|
|
249
|
+
LOG_ERR("%s: failed to create the llama_context\n" , __func__);
|
|
250
250
|
return NULL;
|
|
251
251
|
}
|
|
252
252
|
|
|
253
|
-
auto ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
|
|
253
|
+
auto * ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
|
|
254
254
|
|
|
255
255
|
ctx_llava->ctx_llama = ctx_llama;
|
|
256
256
|
ctx_llava->ctx_clip = ctx_clip;
|
|
@@ -269,65 +269,54 @@ static void llava_free(struct llava_context * ctx_llava) {
|
|
|
269
269
|
llama_backend_free();
|
|
270
270
|
}
|
|
271
271
|
|
|
272
|
-
static void llama_log_callback_logTee(ggml_log_level level, const char * text, void * user_data) {
|
|
273
|
-
(void) level;
|
|
274
|
-
(void) user_data;
|
|
275
|
-
LOG_TEE("%s", text);
|
|
276
|
-
}
|
|
277
|
-
|
|
278
272
|
int main(int argc, char ** argv) {
|
|
279
273
|
ggml_time_init();
|
|
280
274
|
|
|
281
|
-
|
|
275
|
+
common_params params;
|
|
282
276
|
|
|
283
|
-
if (!
|
|
284
|
-
print_usage(argc, argv, params);
|
|
277
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
|
|
285
278
|
return 1;
|
|
286
279
|
}
|
|
287
280
|
|
|
288
|
-
|
|
289
|
-
log_set_target(log_filename_generator("llava", "log"));
|
|
290
|
-
LOG_TEE("Log start\n");
|
|
291
|
-
log_dump_cmdline(argc, argv);
|
|
292
|
-
llama_log_set(llama_log_callback_logTee, nullptr);
|
|
293
|
-
#endif // LOG_DISABLE_LOGS
|
|
281
|
+
common_init();
|
|
294
282
|
|
|
295
283
|
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
|
296
|
-
print_usage(argc, argv
|
|
284
|
+
print_usage(argc, argv);
|
|
297
285
|
return 1;
|
|
298
286
|
}
|
|
299
|
-
|
|
287
|
+
|
|
288
|
+
auto * model = llava_init(¶ms);
|
|
300
289
|
if (model == NULL) {
|
|
301
290
|
fprintf(stderr, "%s: error: failed to init llava model\n", __func__);
|
|
302
291
|
return 1;
|
|
303
292
|
}
|
|
304
293
|
|
|
305
294
|
if (prompt_contains_image(params.prompt)) {
|
|
306
|
-
auto ctx_llava = llava_init_context(¶ms, model);
|
|
295
|
+
auto * ctx_llava = llava_init_context(¶ms, model);
|
|
307
296
|
|
|
308
|
-
auto image_embed = load_image(ctx_llava, ¶ms, "");
|
|
297
|
+
auto * image_embed = load_image(ctx_llava, ¶ms, "");
|
|
309
298
|
|
|
310
299
|
// process the prompt
|
|
311
300
|
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
|
312
301
|
|
|
313
|
-
|
|
302
|
+
llama_perf_context_print(ctx_llava->ctx_llama);
|
|
314
303
|
llava_image_embed_free(image_embed);
|
|
315
304
|
ctx_llava->model = NULL;
|
|
316
305
|
llava_free(ctx_llava);
|
|
317
306
|
} else {
|
|
318
307
|
for (auto & image : params.image) {
|
|
319
|
-
auto ctx_llava = llava_init_context(¶ms, model);
|
|
308
|
+
auto * ctx_llava = llava_init_context(¶ms, model);
|
|
320
309
|
|
|
321
|
-
auto image_embed = load_image(ctx_llava, ¶ms, image);
|
|
310
|
+
auto * image_embed = load_image(ctx_llava, ¶ms, image);
|
|
322
311
|
if (!image_embed) {
|
|
323
|
-
|
|
312
|
+
LOG_ERR("%s: failed to load image %s. Terminating\n\n", __func__, image.c_str());
|
|
324
313
|
return 1;
|
|
325
314
|
}
|
|
326
315
|
|
|
327
316
|
// process the prompt
|
|
328
317
|
process_prompt(ctx_llava, image_embed, ¶ms, params.prompt);
|
|
329
318
|
|
|
330
|
-
|
|
319
|
+
llama_perf_context_print(ctx_llava->ctx_llama);
|
|
331
320
|
llava_image_embed_free(image_embed);
|
|
332
321
|
ctx_llava->model = NULL;
|
|
333
322
|
llava_free(ctx_llava);
|