@fugood/llama.node 0.3.16 → 0.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +5 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +8 -0
- package/src/LlamaCompletionWorker.h +1 -0
- package/src/LlamaContext.cpp +3 -2
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +124 -0
- package/src/llama.cpp/.github/workflows/build.yml +70 -27
- package/src/llama.cpp/.github/workflows/docker.yml +6 -6
- package/src/llama.cpp/.github/workflows/server.yml +7 -11
- package/src/llama.cpp/CMakeLists.txt +23 -1
- package/src/llama.cpp/common/CMakeLists.txt +6 -3
- package/src/llama.cpp/common/arg.cpp +809 -105
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +1 -1
- package/src/llama.cpp/common/common.cpp +31 -521
- package/src/llama.cpp/common/common.h +17 -36
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +30 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +15 -7
- package/src/llama.cpp/common/minja/minja.hpp +119 -93
- package/src/llama.cpp/common/sampling.cpp +3 -0
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +0 -9
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +7 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +210 -8
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +39 -24
- package/src/llama.cpp/examples/llava/clip-impl.h +345 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2152 -1803
- package/src/llama.cpp/examples/llava/clip.h +39 -22
- package/src/llama.cpp/examples/llava/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/examples/llava/llava.cpp +64 -52
- package/src/llama.cpp/examples/llava/mtmd-cli.cpp +344 -0
- package/src/llama.cpp/examples/llava/mtmd.cpp +708 -0
- package/src/llama.cpp/examples/llava/mtmd.h +168 -0
- package/src/llama.cpp/examples/llava/{qwen2vl-cli.cpp → qwen2vl-test.cpp} +83 -31
- package/src/llama.cpp/examples/main/main.cpp +16 -5
- package/src/llama.cpp/examples/parallel/parallel.cpp +3 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +17 -3
- package/src/llama.cpp/examples/quantize/quantize.cpp +115 -2
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +4 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +163 -8
- package/src/llama.cpp/examples/run/CMakeLists.txt +12 -1
- package/src/llama.cpp/examples/run/run.cpp +14 -28
- package/src/llama.cpp/examples/server/httplib.h +313 -247
- package/src/llama.cpp/examples/server/server.cpp +238 -139
- package/src/llama.cpp/examples/server/utils.hpp +51 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +6 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +66 -99
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +48 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -192
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +754 -404
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1003 -13519
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +2 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +3 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +533 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8809 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +258 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +70 -3
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -260
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +293 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +96 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +2 -292
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +967 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +204 -280
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +646 -114
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +17 -8
- package/src/llama.cpp/ggml/src/ggml.c +141 -245
- package/src/llama.cpp/ggml/src/gguf.cpp +1 -0
- package/src/llama.cpp/include/llama.h +30 -11
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +2 -0
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/src/CMakeLists.txt +3 -2
- package/src/llama.cpp/src/llama-adapter.cpp +37 -1
- package/src/llama.cpp/src/llama-arch.cpp +160 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-chat.cpp +82 -17
- package/src/llama.cpp/src/llama-chat.h +6 -2
- package/src/llama.cpp/src/llama-context.cpp +108 -92
- package/src/llama.cpp/src/llama-context.h +1 -2
- package/src/llama.cpp/src/llama-graph.cpp +189 -119
- package/src/llama.cpp/src/llama-graph.h +26 -6
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +70 -123
- package/src/llama.cpp/src/llama-kv-cache.h +41 -115
- package/src/llama.cpp/src/llama-memory.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +10 -5
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model.cpp +1760 -534
- package/src/llama.cpp/src/llama-model.h +13 -1
- package/src/llama.cpp/src/llama-quant.cpp +29 -8
- package/src/llama.cpp/src/llama-sampling.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.cpp +44 -6
- package/src/llama.cpp/src/llama.cpp +1 -1
- package/src/llama.cpp/tests/CMakeLists.txt +43 -30
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +82 -43
- package/src/llama.cpp/tests/test-chat-template.cpp +34 -13
- package/src/llama.cpp/tests/test-chat.cpp +12 -2
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
|
@@ -36,6 +36,46 @@ static uint64_t get_time_ns() {
|
|
|
36
36
|
return std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
+
static bool tensor_buft_override_equal(const llama_model_tensor_buft_override& a, const llama_model_tensor_buft_override& b) {
|
|
40
|
+
if (a.pattern != b.pattern) {
|
|
41
|
+
// cString comparison that may be null
|
|
42
|
+
if (a.pattern == nullptr || b.pattern == nullptr) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
if (strcmp(a.pattern, b.pattern) != 0) {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (a.buft != b.buft) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
static bool vec_tensor_buft_override_equal(const std::vector<llama_model_tensor_buft_override>& a, const std::vector<llama_model_tensor_buft_override>& b) {
|
|
56
|
+
if (a.size() != b.size()) {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
for (size_t i = 0; i < a.size(); i++) {
|
|
60
|
+
if (!tensor_buft_override_equal(a[i], b[i])) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
static bool vec_vec_tensor_buft_override_equal(const std::vector<std::vector<llama_model_tensor_buft_override>>& a, const std::vector<std::vector<llama_model_tensor_buft_override>>& b) {
|
|
68
|
+
if (a.size() != b.size()) {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
for (size_t i = 0; i < a.size(); i++) {
|
|
72
|
+
if (!vec_tensor_buft_override_equal(a[i], b[i])) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
|
|
39
79
|
template <class T> static std::string join(const std::vector<T> & values, const std::string & delim) {
|
|
40
80
|
std::ostringstream str;
|
|
41
81
|
for (size_t i = 0; i < values.size(); i++) {
|
|
@@ -160,6 +200,7 @@ struct cmd_params {
|
|
|
160
200
|
std::vector<int> n_prompt;
|
|
161
201
|
std::vector<int> n_gen;
|
|
162
202
|
std::vector<std::pair<int, int>> n_pg;
|
|
203
|
+
std::vector<int> n_depth;
|
|
163
204
|
std::vector<int> n_batch;
|
|
164
205
|
std::vector<int> n_ubatch;
|
|
165
206
|
std::vector<ggml_type> type_k;
|
|
@@ -175,6 +216,7 @@ struct cmd_params {
|
|
|
175
216
|
std::vector<bool> no_kv_offload;
|
|
176
217
|
std::vector<bool> flash_attn;
|
|
177
218
|
std::vector<std::vector<float>> tensor_split;
|
|
219
|
+
std::vector<std::vector<llama_model_tensor_buft_override>> tensor_buft_overrides;
|
|
178
220
|
std::vector<bool> use_mmap;
|
|
179
221
|
std::vector<bool> embeddings;
|
|
180
222
|
ggml_numa_strategy numa;
|
|
@@ -192,6 +234,7 @@ static const cmd_params cmd_params_defaults = {
|
|
|
192
234
|
/* n_prompt */ { 512 },
|
|
193
235
|
/* n_gen */ { 128 },
|
|
194
236
|
/* n_pg */ {},
|
|
237
|
+
/* n_depth */ { 0 },
|
|
195
238
|
/* n_batch */ { 2048 },
|
|
196
239
|
/* n_ubatch */ { 512 },
|
|
197
240
|
/* type_k */ { GGML_TYPE_F16 },
|
|
@@ -207,6 +250,7 @@ static const cmd_params cmd_params_defaults = {
|
|
|
207
250
|
/* no_kv_offload */ { false },
|
|
208
251
|
/* flash_attn */ { false },
|
|
209
252
|
/* tensor_split */ { std::vector<float>(llama_max_devices(), 0.0f) },
|
|
253
|
+
/* tensor_buft_overrides*/ { std::vector<llama_model_tensor_buft_override>{{nullptr,nullptr}} },
|
|
210
254
|
/* use_mmap */ { true },
|
|
211
255
|
/* embeddings */ { false },
|
|
212
256
|
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
|
|
@@ -230,6 +274,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
|
|
230
274
|
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
|
231
275
|
printf(" -pg <pp,tg> (default: %s)\n",
|
|
232
276
|
join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
|
|
277
|
+
printf(" -d, --n-depth <n> (default: %s)\n", join(cmd_params_defaults.n_depth, ",").c_str());
|
|
233
278
|
printf(" -b, --batch-size <n> (default: %s)\n",
|
|
234
279
|
join(cmd_params_defaults.n_batch, ",").c_str());
|
|
235
280
|
printf(" -ub, --ubatch-size <n> (default: %s)\n",
|
|
@@ -265,6 +310,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
|
|
265
310
|
printf(" -embd, --embeddings <0|1> (default: %s)\n",
|
|
266
311
|
join(cmd_params_defaults.embeddings, ",").c_str());
|
|
267
312
|
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
|
|
313
|
+
printf(" -ot --override-tensors <tensor name pattern>=<buffer type>;... (default: disabled)\n");
|
|
268
314
|
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
|
269
315
|
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
|
|
270
316
|
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
|
|
@@ -366,6 +412,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|
|
366
412
|
break;
|
|
367
413
|
}
|
|
368
414
|
params.n_pg.push_back({ std::stoi(p[0]), std::stoi(p[1]) });
|
|
415
|
+
} else if (arg == "-d" || arg == "--n-depth") {
|
|
416
|
+
if (++i >= argc) {
|
|
417
|
+
invalid_param = true;
|
|
418
|
+
break;
|
|
419
|
+
}
|
|
420
|
+
auto p = string_split<int>(argv[i], split_delim);
|
|
421
|
+
params.n_depth.insert(params.n_depth.end(), p.begin(), p.end());
|
|
369
422
|
} else if (arg == "-b" || arg == "--batch-size") {
|
|
370
423
|
if (++i >= argc) {
|
|
371
424
|
invalid_param = true;
|
|
@@ -557,6 +610,87 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|
|
557
610
|
}
|
|
558
611
|
params.tensor_split.push_back(tensor_split);
|
|
559
612
|
}
|
|
613
|
+
} else if (arg == "-ot" || arg == "--override-tensor") {
|
|
614
|
+
if (++i >= argc) {
|
|
615
|
+
invalid_param = true;
|
|
616
|
+
break;
|
|
617
|
+
}
|
|
618
|
+
auto value = argv[i];
|
|
619
|
+
/* static */ std::map<std::string, ggml_backend_buffer_type_t> buft_list;
|
|
620
|
+
if (buft_list.empty()) {
|
|
621
|
+
// enumerate all the devices and add their buffer types to the list
|
|
622
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
|
623
|
+
auto * dev = ggml_backend_dev_get(i);
|
|
624
|
+
auto * buft = ggml_backend_dev_buffer_type(dev);
|
|
625
|
+
if (buft) {
|
|
626
|
+
buft_list[ggml_backend_buft_name(buft)] = buft;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
auto override_group_span_len = std::strcspn(value, ",");
|
|
631
|
+
bool last_group = false;
|
|
632
|
+
do {
|
|
633
|
+
if (override_group_span_len == 0) {
|
|
634
|
+
// Adds an empty override-tensors for an empty span
|
|
635
|
+
params.tensor_buft_overrides.push_back({{}});
|
|
636
|
+
if (value[override_group_span_len] == '\0') {
|
|
637
|
+
value = &value[override_group_span_len];
|
|
638
|
+
last_group = true;
|
|
639
|
+
} else {
|
|
640
|
+
value = &value[override_group_span_len + 1];
|
|
641
|
+
override_group_span_len = std::strcspn(value, ",");
|
|
642
|
+
}
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
// Stamps null terminators into the argv
|
|
646
|
+
// value for this option to avoid the
|
|
647
|
+
// memory leak present in the implementation
|
|
648
|
+
// over in arg.cpp. Acceptable because we
|
|
649
|
+
// only parse these args once in this program.
|
|
650
|
+
auto override_group = value;
|
|
651
|
+
if (value[override_group_span_len] == '\0') {
|
|
652
|
+
value = &value[override_group_span_len];
|
|
653
|
+
last_group = true;
|
|
654
|
+
} else {
|
|
655
|
+
value[override_group_span_len] = '\0';
|
|
656
|
+
value = &value[override_group_span_len + 1];
|
|
657
|
+
}
|
|
658
|
+
std::vector<llama_model_tensor_buft_override> group_tensor_buft_overrides{};
|
|
659
|
+
auto override_span_len = std::strcspn(override_group, ";");
|
|
660
|
+
while (override_span_len > 0) {
|
|
661
|
+
auto override = override_group;
|
|
662
|
+
if (override_group[override_span_len] != '\0') {
|
|
663
|
+
override_group[override_span_len] = '\0';
|
|
664
|
+
override_group = &override_group[override_span_len + 1];
|
|
665
|
+
} else {
|
|
666
|
+
override_group = &override_group[override_span_len];
|
|
667
|
+
}
|
|
668
|
+
auto tensor_name_span_len = std::strcspn(override, "=");
|
|
669
|
+
if (tensor_name_span_len >= override_span_len) {
|
|
670
|
+
invalid_param = true;
|
|
671
|
+
break;
|
|
672
|
+
}
|
|
673
|
+
override[tensor_name_span_len] = '\0';
|
|
674
|
+
auto tensor_name = override;
|
|
675
|
+
auto buffer_type = &override[tensor_name_span_len + 1];
|
|
676
|
+
if (buft_list.find(buffer_type) == buft_list.end()) {
|
|
677
|
+
printf("Available buffer types:\n");
|
|
678
|
+
for (const auto & it : buft_list) {
|
|
679
|
+
printf(" %s\n", ggml_backend_buft_name(it.second));
|
|
680
|
+
}
|
|
681
|
+
invalid_param = true;
|
|
682
|
+
break;
|
|
683
|
+
}
|
|
684
|
+
group_tensor_buft_overrides.push_back({tensor_name, buft_list.at(buffer_type)});
|
|
685
|
+
override_span_len = std::strcspn(override_group, ";");
|
|
686
|
+
}
|
|
687
|
+
if (invalid_param) {
|
|
688
|
+
break;
|
|
689
|
+
}
|
|
690
|
+
group_tensor_buft_overrides.push_back({nullptr,nullptr});
|
|
691
|
+
params.tensor_buft_overrides.push_back(group_tensor_buft_overrides);
|
|
692
|
+
override_group_span_len = std::strcspn(value, ",");
|
|
693
|
+
} while (!last_group);
|
|
560
694
|
} else if (arg == "-r" || arg == "--repetitions") {
|
|
561
695
|
if (++i >= argc) {
|
|
562
696
|
invalid_param = true;
|
|
@@ -615,6 +749,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|
|
615
749
|
if (params.n_pg.empty()) {
|
|
616
750
|
params.n_pg = cmd_params_defaults.n_pg;
|
|
617
751
|
}
|
|
752
|
+
if (params.n_depth.empty()) {
|
|
753
|
+
params.n_depth = cmd_params_defaults.n_depth;
|
|
754
|
+
}
|
|
618
755
|
if (params.n_batch.empty()) {
|
|
619
756
|
params.n_batch = cmd_params_defaults.n_batch;
|
|
620
757
|
}
|
|
@@ -648,6 +785,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|
|
648
785
|
if (params.tensor_split.empty()) {
|
|
649
786
|
params.tensor_split = cmd_params_defaults.tensor_split;
|
|
650
787
|
}
|
|
788
|
+
if (params.tensor_buft_overrides.empty()) {
|
|
789
|
+
params.tensor_buft_overrides = cmd_params_defaults.tensor_buft_overrides;
|
|
790
|
+
}
|
|
651
791
|
if (params.use_mmap.empty()) {
|
|
652
792
|
params.use_mmap = cmd_params_defaults.use_mmap;
|
|
653
793
|
}
|
|
@@ -674,6 +814,7 @@ struct cmd_params_instance {
|
|
|
674
814
|
std::string model;
|
|
675
815
|
int n_prompt;
|
|
676
816
|
int n_gen;
|
|
817
|
+
int n_depth;
|
|
677
818
|
int n_batch;
|
|
678
819
|
int n_ubatch;
|
|
679
820
|
ggml_type type_k;
|
|
@@ -689,6 +830,7 @@ struct cmd_params_instance {
|
|
|
689
830
|
bool no_kv_offload;
|
|
690
831
|
bool flash_attn;
|
|
691
832
|
std::vector<float> tensor_split;
|
|
833
|
+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
|
692
834
|
bool use_mmap;
|
|
693
835
|
bool embeddings;
|
|
694
836
|
|
|
@@ -733,19 +875,26 @@ struct cmd_params_instance {
|
|
|
733
875
|
mparams.tensor_split = tensor_split.data();
|
|
734
876
|
mparams.use_mmap = use_mmap;
|
|
735
877
|
|
|
878
|
+
if (tensor_buft_overrides.empty()) {
|
|
879
|
+
mparams.tensor_buft_overrides = nullptr;
|
|
880
|
+
} else {
|
|
881
|
+
GGML_ASSERT(tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern");
|
|
882
|
+
mparams.tensor_buft_overrides = tensor_buft_overrides.data();
|
|
883
|
+
}
|
|
884
|
+
|
|
736
885
|
return mparams;
|
|
737
886
|
}
|
|
738
887
|
|
|
739
888
|
bool equal_mparams(const cmd_params_instance & other) const {
|
|
740
889
|
return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
|
|
741
890
|
split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
|
|
742
|
-
tensor_split == other.tensor_split;
|
|
891
|
+
tensor_split == other.tensor_split && vec_tensor_buft_override_equal(tensor_buft_overrides, other.tensor_buft_overrides);
|
|
743
892
|
}
|
|
744
893
|
|
|
745
894
|
llama_context_params to_llama_cparams() const {
|
|
746
895
|
llama_context_params cparams = llama_context_default_params();
|
|
747
896
|
|
|
748
|
-
cparams.n_ctx = n_prompt + n_gen;
|
|
897
|
+
cparams.n_ctx = n_prompt + n_gen + n_depth;
|
|
749
898
|
cparams.n_batch = n_batch;
|
|
750
899
|
cparams.n_ubatch = n_ubatch;
|
|
751
900
|
cparams.type_k = type_k;
|
|
@@ -769,6 +918,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
769
918
|
for (const auto & sm : params.split_mode)
|
|
770
919
|
for (const auto & mg : params.main_gpu)
|
|
771
920
|
for (const auto & ts : params.tensor_split)
|
|
921
|
+
for (const auto & ot : params.tensor_buft_overrides)
|
|
772
922
|
for (const auto & mmp : params.use_mmap)
|
|
773
923
|
for (const auto & embd : params.embeddings)
|
|
774
924
|
for (const auto & nb : params.n_batch)
|
|
@@ -780,6 +930,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
780
930
|
for (const auto & nt : params.n_threads)
|
|
781
931
|
for (const auto & cm : params.cpu_mask)
|
|
782
932
|
for (const auto & cs : params.cpu_strict)
|
|
933
|
+
for (const auto & nd : params.n_depth)
|
|
783
934
|
for (const auto & pl : params.poll) {
|
|
784
935
|
for (const auto & n_prompt : params.n_prompt) {
|
|
785
936
|
if (n_prompt == 0) {
|
|
@@ -789,6 +940,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
789
940
|
/* .model = */ m,
|
|
790
941
|
/* .n_prompt = */ n_prompt,
|
|
791
942
|
/* .n_gen = */ 0,
|
|
943
|
+
/* .n_depth = */ nd,
|
|
792
944
|
/* .n_batch = */ nb,
|
|
793
945
|
/* .n_ubatch = */ nub,
|
|
794
946
|
/* .type_k = */ tk,
|
|
@@ -804,6 +956,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
804
956
|
/* .no_kv_offload= */ nkvo,
|
|
805
957
|
/* .flash_attn = */ fa,
|
|
806
958
|
/* .tensor_split = */ ts,
|
|
959
|
+
/* .tensor_buft_overrides = */ ot,
|
|
807
960
|
/* .use_mmap = */ mmp,
|
|
808
961
|
/* .embeddings = */ embd,
|
|
809
962
|
};
|
|
@@ -818,6 +971,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
818
971
|
/* .model = */ m,
|
|
819
972
|
/* .n_prompt = */ 0,
|
|
820
973
|
/* .n_gen = */ n_gen,
|
|
974
|
+
/* .n_depth = */ nd,
|
|
821
975
|
/* .n_batch = */ nb,
|
|
822
976
|
/* .n_ubatch = */ nub,
|
|
823
977
|
/* .type_k = */ tk,
|
|
@@ -833,6 +987,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
833
987
|
/* .no_kv_offload= */ nkvo,
|
|
834
988
|
/* .flash_attn = */ fa,
|
|
835
989
|
/* .tensor_split = */ ts,
|
|
990
|
+
/* .tensor_buft_overrides = */ ot,
|
|
836
991
|
/* .use_mmap = */ mmp,
|
|
837
992
|
/* .embeddings = */ embd,
|
|
838
993
|
};
|
|
@@ -847,6 +1002,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
847
1002
|
/* .model = */ m,
|
|
848
1003
|
/* .n_prompt = */ n_pg.first,
|
|
849
1004
|
/* .n_gen = */ n_pg.second,
|
|
1005
|
+
/* .n_depth = */ nd,
|
|
850
1006
|
/* .n_batch = */ nb,
|
|
851
1007
|
/* .n_ubatch = */ nub,
|
|
852
1008
|
/* .type_k = */ tk,
|
|
@@ -862,6 +1018,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|
|
862
1018
|
/* .no_kv_offload= */ nkvo,
|
|
863
1019
|
/* .flash_attn = */ fa,
|
|
864
1020
|
/* .tensor_split = */ ts,
|
|
1021
|
+
/* .tensor_buft_overrides = */ ot,
|
|
865
1022
|
/* .use_mmap = */ mmp,
|
|
866
1023
|
/* .embeddings = */ embd,
|
|
867
1024
|
};
|
|
@@ -896,10 +1053,12 @@ struct test {
|
|
|
896
1053
|
bool no_kv_offload;
|
|
897
1054
|
bool flash_attn;
|
|
898
1055
|
std::vector<float> tensor_split;
|
|
1056
|
+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
|
899
1057
|
bool use_mmap;
|
|
900
1058
|
bool embeddings;
|
|
901
1059
|
int n_prompt;
|
|
902
1060
|
int n_gen;
|
|
1061
|
+
int n_depth;
|
|
903
1062
|
std::string test_time;
|
|
904
1063
|
std::vector<uint64_t> samples_ns;
|
|
905
1064
|
|
|
@@ -927,10 +1086,12 @@ struct test {
|
|
|
927
1086
|
no_kv_offload = inst.no_kv_offload;
|
|
928
1087
|
flash_attn = inst.flash_attn;
|
|
929
1088
|
tensor_split = inst.tensor_split;
|
|
1089
|
+
tensor_buft_overrides = inst.tensor_buft_overrides;
|
|
930
1090
|
use_mmap = inst.use_mmap;
|
|
931
1091
|
embeddings = inst.embeddings;
|
|
932
1092
|
n_prompt = inst.n_prompt;
|
|
933
1093
|
n_gen = inst.n_gen;
|
|
1094
|
+
n_depth = inst.n_depth;
|
|
934
1095
|
// RFC 3339 date-time format
|
|
935
1096
|
time_t t = time(NULL);
|
|
936
1097
|
std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t));
|
|
@@ -972,9 +1133,9 @@ struct test {
|
|
|
972
1133
|
"build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename",
|
|
973
1134
|
"model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
|
|
974
1135
|
"cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
|
|
975
|
-
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "
|
|
976
|
-
"embeddings", "n_prompt",
|
|
977
|
-
"
|
|
1136
|
+
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "tensor_buft_overrides",
|
|
1137
|
+
"use_mmap", "embeddings", "n_prompt", "n_gen", "n_depth", "test_time",
|
|
1138
|
+
"avg_ns", "stddev_ns", "avg_ts", "stddev_ts",
|
|
978
1139
|
};
|
|
979
1140
|
return fields;
|
|
980
1141
|
}
|
|
@@ -984,8 +1145,8 @@ struct test {
|
|
|
984
1145
|
static field_type get_field_type(const std::string & field) {
|
|
985
1146
|
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" ||
|
|
986
1147
|
field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" ||
|
|
987
|
-
field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "
|
|
988
|
-
field == "stddev_ns") {
|
|
1148
|
+
field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "n_depth" ||
|
|
1149
|
+
field == "avg_ns" || field == "stddev_ns") {
|
|
989
1150
|
return INT;
|
|
990
1151
|
}
|
|
991
1152
|
if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" ||
|
|
@@ -1000,6 +1161,7 @@ struct test {
|
|
|
1000
1161
|
|
|
1001
1162
|
std::vector<std::string> get_values() const {
|
|
1002
1163
|
std::string tensor_split_str;
|
|
1164
|
+
std::string tensor_buft_overrides_str;
|
|
1003
1165
|
int max_nonzero = 0;
|
|
1004
1166
|
for (size_t i = 0; i < llama_max_devices(); i++) {
|
|
1005
1167
|
if (tensor_split[i] > 0) {
|
|
@@ -1014,6 +1176,26 @@ struct test {
|
|
|
1014
1176
|
tensor_split_str += "/";
|
|
1015
1177
|
}
|
|
1016
1178
|
}
|
|
1179
|
+
if (tensor_buft_overrides.size() == 1) {
|
|
1180
|
+
// Last element of tensor_buft_overrides is always a null pattern
|
|
1181
|
+
// so if it is only one element long, it must be a null pattern.
|
|
1182
|
+
GGML_ASSERT(tensor_buft_overrides[0].pattern == nullptr);
|
|
1183
|
+
tensor_buft_overrides_str += "none";
|
|
1184
|
+
} else {
|
|
1185
|
+
for (size_t i = 0; i < tensor_buft_overrides.size()-1; i++) {
|
|
1186
|
+
// Last element of tensor_buft_overrides is always a null pattern
|
|
1187
|
+
if (tensor_buft_overrides[i].pattern == nullptr) {
|
|
1188
|
+
tensor_buft_overrides_str += "none";
|
|
1189
|
+
} else {
|
|
1190
|
+
tensor_buft_overrides_str += tensor_buft_overrides[i].pattern;
|
|
1191
|
+
tensor_buft_overrides_str += "=";
|
|
1192
|
+
tensor_buft_overrides_str += ggml_backend_buft_name(tensor_buft_overrides[i].buft);
|
|
1193
|
+
}
|
|
1194
|
+
if (i + 2 < tensor_buft_overrides.size()) {
|
|
1195
|
+
tensor_buft_overrides_str += ";";
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1017
1199
|
std::vector<std::string> values = { build_commit,
|
|
1018
1200
|
std::to_string(build_number),
|
|
1019
1201
|
cpu_info,
|
|
@@ -1037,10 +1219,12 @@ struct test {
|
|
|
1037
1219
|
std::to_string(no_kv_offload),
|
|
1038
1220
|
std::to_string(flash_attn),
|
|
1039
1221
|
tensor_split_str,
|
|
1222
|
+
tensor_buft_overrides_str,
|
|
1040
1223
|
std::to_string(use_mmap),
|
|
1041
1224
|
std::to_string(embeddings),
|
|
1042
1225
|
std::to_string(n_prompt),
|
|
1043
1226
|
std::to_string(n_gen),
|
|
1227
|
+
std::to_string(n_depth),
|
|
1044
1228
|
test_time,
|
|
1045
1229
|
std::to_string(avg_ns()),
|
|
1046
1230
|
std::to_string(stdev_ns()),
|
|
@@ -1218,7 +1402,7 @@ struct markdown_printer : public printer {
|
|
|
1218
1402
|
return 4;
|
|
1219
1403
|
}
|
|
1220
1404
|
if (field == "test") {
|
|
1221
|
-
return
|
|
1405
|
+
return 15;
|
|
1222
1406
|
}
|
|
1223
1407
|
|
|
1224
1408
|
int width = std::max((int) field.length(), 10);
|
|
@@ -1254,6 +1438,9 @@ struct markdown_printer : public printer {
|
|
|
1254
1438
|
if (field == "tensor_split") {
|
|
1255
1439
|
return "ts";
|
|
1256
1440
|
}
|
|
1441
|
+
if (field == "tensor_buft_overrides") {
|
|
1442
|
+
return "ot";
|
|
1443
|
+
}
|
|
1257
1444
|
return field;
|
|
1258
1445
|
}
|
|
1259
1446
|
|
|
@@ -1307,6 +1494,9 @@ struct markdown_printer : public printer {
|
|
|
1307
1494
|
if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
|
|
1308
1495
|
fields.emplace_back("tensor_split");
|
|
1309
1496
|
}
|
|
1497
|
+
if (params.tensor_buft_overrides.size() > 1 || !vec_vec_tensor_buft_override_equal(params.tensor_buft_overrides, cmd_params_defaults.tensor_buft_overrides)) {
|
|
1498
|
+
fields.emplace_back("tensor_buft_overrides");
|
|
1499
|
+
}
|
|
1310
1500
|
if (params.use_mmap.size() > 1 || params.use_mmap != cmd_params_defaults.use_mmap) {
|
|
1311
1501
|
fields.emplace_back("use_mmap");
|
|
1312
1502
|
}
|
|
@@ -1362,6 +1552,10 @@ struct markdown_printer : public printer {
|
|
|
1362
1552
|
} else {
|
|
1363
1553
|
snprintf(buf, sizeof(buf), "pp%d+tg%d", t.n_prompt, t.n_gen);
|
|
1364
1554
|
}
|
|
1555
|
+
if (t.n_depth > 0) {
|
|
1556
|
+
int len = strlen(buf);
|
|
1557
|
+
snprintf(buf + len, sizeof(buf) - len, " @ d%d", t.n_depth);
|
|
1558
|
+
}
|
|
1365
1559
|
value = buf;
|
|
1366
1560
|
} else if (field == "t/s") {
|
|
1367
1561
|
snprintf(buf, sizeof(buf), "%.2f ± %.2f", t.avg_ts(), t.stdev_ts());
|
|
@@ -1620,6 +1814,14 @@ int main(int argc, char ** argv) {
|
|
|
1620
1814
|
for (int i = 0; i < params.reps; i++) {
|
|
1621
1815
|
llama_kv_self_clear(ctx);
|
|
1622
1816
|
|
|
1817
|
+
if (t.n_depth > 0) {
|
|
1818
|
+
if (params.progress) {
|
|
1819
|
+
fprintf(stderr, "llama-bench: benchmark %d/%zu: depth run %d/%d\n", params_idx, params_count,
|
|
1820
|
+
i + 1, params.reps);
|
|
1821
|
+
}
|
|
1822
|
+
test_prompt(ctx, t.n_depth, t.n_batch, t.n_threads);
|
|
1823
|
+
}
|
|
1824
|
+
|
|
1623
1825
|
uint64_t t_start = get_time_ns();
|
|
1624
1826
|
|
|
1625
1827
|
if (t.n_prompt > 0) {
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# llava (legacy)
|
|
2
|
+
|
|
1
3
|
add_library(llava OBJECT
|
|
2
4
|
llava.cpp
|
|
3
5
|
llava.h
|
|
@@ -22,40 +24,53 @@ if (BUILD_SHARED_LIBS)
|
|
|
22
24
|
install(TARGETS llava_shared LIBRARY)
|
|
23
25
|
endif()
|
|
24
26
|
|
|
27
|
+
# mtmd
|
|
28
|
+
|
|
29
|
+
add_library(mtmd OBJECT
|
|
30
|
+
mtmd.cpp
|
|
31
|
+
mtmd.h
|
|
32
|
+
clip.cpp
|
|
33
|
+
clip.h
|
|
34
|
+
clip-impl.h
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
target_link_libraries(mtmd PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
38
|
+
|
|
39
|
+
target_include_directories(mtmd PUBLIC .)
|
|
40
|
+
target_include_directories(mtmd PRIVATE ../..)
|
|
41
|
+
target_include_directories(mtmd PRIVATE ../../common) # for stb_image.h
|
|
42
|
+
|
|
43
|
+
target_compile_features(mtmd PRIVATE cxx_std_17)
|
|
44
|
+
|
|
45
|
+
add_library(mtmd_static STATIC $<TARGET_OBJECTS:mtmd>)
|
|
46
|
+
if (BUILD_SHARED_LIBS)
|
|
47
|
+
set_target_properties(mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
48
|
+
target_compile_definitions(mtmd PRIVATE LLAMA_SHARED LLAMA_BUILD)
|
|
49
|
+
add_library(mtmd_shared SHARED $<TARGET_OBJECTS:mtmd>)
|
|
50
|
+
target_link_libraries(mtmd_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
51
|
+
install(TARGETS mtmd_shared LIBRARY)
|
|
52
|
+
endif()
|
|
53
|
+
|
|
25
54
|
if (NOT MSVC)
|
|
26
55
|
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
|
|
56
|
+
target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h
|
|
27
57
|
endif()
|
|
28
58
|
|
|
29
59
|
if(TARGET BUILD_INFO)
|
|
30
60
|
add_dependencies(llava BUILD_INFO)
|
|
61
|
+
add_dependencies(mtmd BUILD_INFO)
|
|
31
62
|
endif()
|
|
32
63
|
|
|
33
|
-
|
|
34
|
-
add_executable(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
|
38
|
-
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
39
|
-
|
|
40
|
-
set(TARGET llama-minicpmv-cli)
|
|
41
|
-
add_executable(${TARGET} minicpmv-cli.cpp)
|
|
42
|
-
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-minicpmv-cli)
|
|
43
|
-
install(TARGETS ${TARGET} RUNTIME)
|
|
44
|
-
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
|
45
|
-
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
46
|
-
|
|
47
|
-
set(TARGET llama-qwen2vl-cli)
|
|
48
|
-
add_executable(${TARGET} qwen2vl-cli.cpp)
|
|
49
|
-
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-qwen2vl-cli)
|
|
50
|
-
install(TARGETS ${TARGET} RUNTIME)
|
|
51
|
-
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
|
52
|
-
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
64
|
+
add_executable(llama-llava-cli deprecation-warning.cpp)
|
|
65
|
+
add_executable(llama-gemma3-cli deprecation-warning.cpp)
|
|
66
|
+
add_executable(llama-minicpmv-cli deprecation-warning.cpp)
|
|
67
|
+
add_executable(llama-qwen2vl-cli deprecation-warning.cpp)
|
|
53
68
|
|
|
54
|
-
set(TARGET llama-
|
|
55
|
-
add_executable(${TARGET}
|
|
56
|
-
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-
|
|
69
|
+
set(TARGET llama-mtmd-cli)
|
|
70
|
+
add_executable(${TARGET} mtmd-cli.cpp)
|
|
71
|
+
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli)
|
|
57
72
|
install(TARGETS ${TARGET} RUNTIME)
|
|
58
|
-
target_link_libraries(${TARGET} PRIVATE common
|
|
73
|
+
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
59
74
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
60
75
|
|
|
61
76
|
set(TARGET llama-llava-clip-quantize-cli)
|