@fugood/llama.node 0.3.17 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +39 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +366 -19
- package/src/LlamaCompletionWorker.h +30 -10
- package/src/LlamaContext.cpp +213 -5
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
- package/src/llama.cpp/.github/workflows/build.yml +41 -762
- package/src/llama.cpp/.github/workflows/docker.yml +5 -2
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +12 -12
- package/src/llama.cpp/CMakeLists.txt +5 -17
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +31 -3
- package/src/llama.cpp/common/arg.cpp +48 -29
- package/src/llama.cpp/common/chat.cpp +128 -106
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +37 -1
- package/src/llama.cpp/common/common.h +18 -9
- package/src/llama.cpp/common/llguidance.cpp +1 -0
- package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/src/llama.cpp/common/minja/minja.hpp +69 -36
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +57 -50
- package/src/llama.cpp/examples/CMakeLists.txt +2 -23
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml.h +10 -7
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
- package/src/llama.cpp/ggml/src/ggml.c +29 -20
- package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/src/llama.cpp/include/llama.h +52 -11
- package/src/llama.cpp/requirements/requirements-all.txt +3 -3
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +3 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +17 -7
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +389 -501
- package/src/llama.cpp/src/llama-context.h +44 -32
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +20 -38
- package/src/llama.cpp/src/llama-graph.h +12 -8
- package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
- package/src/llama.cpp/src/llama-kv-cache.h +271 -85
- package/src/llama.cpp/src/llama-memory.h +11 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +316 -69
- package/src/llama.cpp/src/llama-model.h +8 -1
- package/src/llama.cpp/src/llama-quant.cpp +15 -13
- package/src/llama.cpp/src/llama-sampling.cpp +18 -6
- package/src/llama.cpp/src/llama-vocab.cpp +42 -4
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +14 -0
- package/src/llama.cpp/tests/CMakeLists.txt +10 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
- package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
- package/src/llama.cpp/tests/test-chat.cpp +3 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
- package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
- package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
- package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
- package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.h +0 -135
- package/src/llama.cpp/examples/llava/llava.cpp +0 -586
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/mtmd.h +0 -168
- package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
|
@@ -1,6 +1,80 @@
|
|
|
1
1
|
#include "LlamaCompletionWorker.h"
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
|
+
// Computes FNV-1a hash of the data
|
|
5
|
+
static std::string fnv_hash(const uint8_t * data, size_t len) {
|
|
6
|
+
const uint64_t fnv_prime = 0x100000001b3ULL;
|
|
7
|
+
uint64_t hash = 0xcbf29ce484222325ULL;
|
|
8
|
+
|
|
9
|
+
for (size_t i = 0; i < len; ++i) {
|
|
10
|
+
hash ^= data[i];
|
|
11
|
+
hash *= fnv_prime;
|
|
12
|
+
}
|
|
13
|
+
return std::to_string(hash);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
static const std::string base64_chars =
|
|
17
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
18
|
+
"abcdefghijklmnopqrstuvwxyz"
|
|
19
|
+
"0123456789+/";
|
|
20
|
+
|
|
21
|
+
// Base64 decoding function
|
|
22
|
+
static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
|
|
23
|
+
std::vector<uint8_t> decoded;
|
|
24
|
+
int in_len = encoded_string.size();
|
|
25
|
+
int i = 0;
|
|
26
|
+
int j = 0;
|
|
27
|
+
int in_ = 0;
|
|
28
|
+
unsigned char char_array_4[4], char_array_3[3];
|
|
29
|
+
|
|
30
|
+
while (in_len-- && (encoded_string[in_] != '=')) {
|
|
31
|
+
if (isspace(encoded_string[in_])) {
|
|
32
|
+
in_++;
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
|
|
37
|
+
break;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
char_array_4[i++] = encoded_string[in_]; in_++;
|
|
41
|
+
if (i == 4) {
|
|
42
|
+
for (i = 0; i < 4; i++) {
|
|
43
|
+
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
|
47
|
+
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
|
48
|
+
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
|
49
|
+
|
|
50
|
+
for (i = 0; i < 3; i++) {
|
|
51
|
+
decoded.push_back(char_array_3[i]);
|
|
52
|
+
}
|
|
53
|
+
i = 0;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (i) {
|
|
58
|
+
for (j = i; j < 4; j++) {
|
|
59
|
+
char_array_4[j] = 0;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for (j = 0; j < 4; j++) {
|
|
63
|
+
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
|
67
|
+
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
|
68
|
+
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
|
69
|
+
|
|
70
|
+
for (j = 0; j < i - 1; j++) {
|
|
71
|
+
decoded.push_back(char_array_3[j]);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return decoded;
|
|
76
|
+
}
|
|
77
|
+
|
|
4
78
|
size_t common_part(const std::vector<llama_token> &a,
|
|
5
79
|
const std::vector<llama_token> &b) {
|
|
6
80
|
size_t i = 0;
|
|
@@ -10,6 +84,230 @@ size_t common_part(const std::vector<llama_token> &a,
|
|
|
10
84
|
return i;
|
|
11
85
|
}
|
|
12
86
|
|
|
87
|
+
// Process images and add them to the tokenized input
|
|
88
|
+
llama_pos processImage(
|
|
89
|
+
const mtmd_context* mtmd_ctx,
|
|
90
|
+
llama_context* ctx,
|
|
91
|
+
LlamaSessionPtr sess,
|
|
92
|
+
const std::vector<std::string>& image_paths,
|
|
93
|
+
const common_params& params,
|
|
94
|
+
std::vector<llama_token>& text_tokens
|
|
95
|
+
) {
|
|
96
|
+
if (mtmd_ctx == nullptr) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Multimodal path
|
|
101
|
+
std::string full_prompt = params.prompt;
|
|
102
|
+
// Add image marker if it doesn't already exist
|
|
103
|
+
if (full_prompt.find("<__image__>") == std::string::npos) {
|
|
104
|
+
full_prompt += " <__image__>";
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Prepare bitmaps array for all images
|
|
108
|
+
mtmd::bitmaps bitmaps;
|
|
109
|
+
|
|
110
|
+
// Load all images
|
|
111
|
+
for (const auto& image_path : image_paths) {
|
|
112
|
+
fprintf(stdout, "[DEBUG] Loading image: %s\n",
|
|
113
|
+
image_path.substr(0, 50).c_str()); // Only log part of path for base64
|
|
114
|
+
|
|
115
|
+
// Check if it's a base64 image
|
|
116
|
+
if (image_path.compare(0, 11, "data:image/") == 0) {
|
|
117
|
+
|
|
118
|
+
// Parse base64 data
|
|
119
|
+
std::vector<std::string> parts;
|
|
120
|
+
size_t comma_pos = image_path.find(',');
|
|
121
|
+
if (comma_pos == std::string::npos) {
|
|
122
|
+
bitmaps.entries.clear();
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
std::string header = image_path.substr(0, comma_pos);
|
|
127
|
+
std::string base64_data = image_path.substr(comma_pos + 1);
|
|
128
|
+
|
|
129
|
+
if (header.find("base64") == std::string::npos) {
|
|
130
|
+
bitmaps.entries.clear();
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Decode base64
|
|
135
|
+
try {
|
|
136
|
+
// Decode base64 to binary
|
|
137
|
+
std::vector<uint8_t> image_data = base64_decode(base64_data);
|
|
138
|
+
|
|
139
|
+
// Load bitmap from memory buffer using direct initialization
|
|
140
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(image_data.data(), image_data.size()));
|
|
141
|
+
if (!bmp.ptr) {
|
|
142
|
+
bitmaps.entries.clear();
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Calculate bitmap hash (for KV caching)
|
|
147
|
+
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
|
|
148
|
+
bmp.set_id(hash.c_str());
|
|
149
|
+
bitmaps.entries.push_back(std::move(bmp));
|
|
150
|
+
} catch (const std::exception& e) {
|
|
151
|
+
bitmaps.entries.clear();
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
} else if (image_path.compare(0, 7, "http://") == 0 || image_path.compare(0, 8, "https://") == 0) {
|
|
155
|
+
// HTTP URLs are not supported yet
|
|
156
|
+
bitmaps.entries.clear();
|
|
157
|
+
return false;
|
|
158
|
+
} else {
|
|
159
|
+
// Check if file exists
|
|
160
|
+
FILE* file = fopen(image_path.c_str(), "rb");
|
|
161
|
+
if (file == nullptr) {
|
|
162
|
+
bitmaps.entries.clear();
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Get file size
|
|
167
|
+
fseek(file, 0, SEEK_END);
|
|
168
|
+
long file_size = ftell(file);
|
|
169
|
+
fseek(file, 0, SEEK_SET);
|
|
170
|
+
fclose(file);
|
|
171
|
+
|
|
172
|
+
// Create bitmap directly
|
|
173
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(image_path.c_str()));
|
|
174
|
+
if (!bmp.ptr) {
|
|
175
|
+
bitmaps.entries.clear();
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Calculate bitmap hash (for KV caching)
|
|
180
|
+
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
|
|
181
|
+
bmp.set_id(hash.c_str());
|
|
182
|
+
bitmaps.entries.push_back(std::move(bmp));
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
mtmd_input_chunks* chunks = mtmd_input_chunks_init();
|
|
187
|
+
if (chunks == nullptr) {
|
|
188
|
+
bitmaps.entries.clear();
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Create input text
|
|
193
|
+
mtmd_input_text input_text;
|
|
194
|
+
input_text.text = full_prompt.c_str(); // Use the full prompt with image marker
|
|
195
|
+
input_text.add_special = true; // Add BOS token if this is the first message
|
|
196
|
+
input_text.parse_special = true; // Parse special tokens like <__image__>
|
|
197
|
+
|
|
198
|
+
// Tokenize the text and images
|
|
199
|
+
fprintf(stdout, "[DEBUG] Tokenizing text and %zu images\n", bitmaps.entries.size());
|
|
200
|
+
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
|
201
|
+
|
|
202
|
+
// Cast away const for mtmd_tokenize
|
|
203
|
+
int32_t res = mtmd_tokenize(
|
|
204
|
+
const_cast<mtmd_context*>(mtmd_ctx),
|
|
205
|
+
chunks,
|
|
206
|
+
&input_text,
|
|
207
|
+
bitmaps_c_ptr.data(),
|
|
208
|
+
bitmaps_c_ptr.size()
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
if (res != 0) {
|
|
212
|
+
mtmd_input_chunks_free(chunks);
|
|
213
|
+
bitmaps.entries.clear();
|
|
214
|
+
return false;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Log chunk information
|
|
218
|
+
size_t num_chunks = mtmd_input_chunks_size(chunks);
|
|
219
|
+
fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n", num_chunks);
|
|
220
|
+
|
|
221
|
+
// Clear text_tokens before adding new tokens
|
|
222
|
+
text_tokens.clear();
|
|
223
|
+
|
|
224
|
+
// Create a vector to store all tokens (both text and image)
|
|
225
|
+
std::vector<llama_token> all_tokens;
|
|
226
|
+
|
|
227
|
+
// Track the total number of tokens (both text and image)
|
|
228
|
+
size_t total_token_count = 0;
|
|
229
|
+
|
|
230
|
+
// chunk pos
|
|
231
|
+
std::vector<size_t> chunk_pos;
|
|
232
|
+
for (size_t i = 0; i < num_chunks; i++) {
|
|
233
|
+
chunk_pos.push_back(total_token_count);
|
|
234
|
+
|
|
235
|
+
const mtmd_input_chunk* chunk = mtmd_input_chunks_get(chunks, i);
|
|
236
|
+
mtmd_input_chunk_type chunk_type = mtmd_input_chunk_get_type(chunk);
|
|
237
|
+
|
|
238
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
239
|
+
size_t n_tokens;
|
|
240
|
+
const llama_token* tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
|
241
|
+
|
|
242
|
+
// Add text tokens
|
|
243
|
+
text_tokens.insert(text_tokens.end(), tokens, tokens + n_tokens);
|
|
244
|
+
all_tokens.insert(all_tokens.end(), tokens, tokens + n_tokens);
|
|
245
|
+
total_token_count += n_tokens;
|
|
246
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
247
|
+
const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
248
|
+
size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
|
|
249
|
+
size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
|
250
|
+
|
|
251
|
+
for (size_t j = 0; j < n_pos; j++) {
|
|
252
|
+
all_tokens.push_back(LLAMA_TOKEN_NULL);
|
|
253
|
+
}
|
|
254
|
+
total_token_count += n_pos;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
llama_pos n_past = common_part(*sess->tokens_ptr(), all_tokens);
|
|
259
|
+
|
|
260
|
+
llama_pos new_n_past = n_past;
|
|
261
|
+
|
|
262
|
+
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
|
263
|
+
fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
|
|
264
|
+
|
|
265
|
+
// Process chunk only if it's after the current n_past
|
|
266
|
+
if (chunk_pos[i] >= new_n_past) {
|
|
267
|
+
bool chunk_logits_last = (i == num_chunks - 1);
|
|
268
|
+
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
269
|
+
|
|
270
|
+
// Cast away const for mtmd_helper_eval_chunk_single
|
|
271
|
+
int32_t res = mtmd_helper_eval_chunk_single(
|
|
272
|
+
const_cast<mtmd_context*>(mtmd_ctx),
|
|
273
|
+
ctx,
|
|
274
|
+
chunk,
|
|
275
|
+
n_past,
|
|
276
|
+
0,
|
|
277
|
+
params.n_batch, // batch size
|
|
278
|
+
chunk_logits_last,
|
|
279
|
+
&new_n_past
|
|
280
|
+
);
|
|
281
|
+
|
|
282
|
+
if (res != 0) {
|
|
283
|
+
mtmd_input_chunks_free(chunks);
|
|
284
|
+
bitmaps.entries.clear();
|
|
285
|
+
return false;
|
|
286
|
+
}
|
|
287
|
+
n_past = new_n_past;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (n_past == total_token_count) {
|
|
292
|
+
// we have to evaluate at least 1 token to generate logits.
|
|
293
|
+
n_past--;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Update sampling context to process token sequences
|
|
297
|
+
for (auto & token : all_tokens) {
|
|
298
|
+
if (token == LLAMA_TOKEN_NULL) {
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
// Set the tokens
|
|
303
|
+
sess->set_tokens(std::move(all_tokens));
|
|
304
|
+
|
|
305
|
+
// Clean up image resources
|
|
306
|
+
mtmd_input_chunks_free(chunks);
|
|
307
|
+
bitmaps.entries.clear();
|
|
308
|
+
return n_past;
|
|
309
|
+
}
|
|
310
|
+
|
|
13
311
|
size_t findStoppingStrings(const std::string &text,
|
|
14
312
|
const size_t last_token_size,
|
|
15
313
|
const std::vector<std::string> &stop_words) {
|
|
@@ -36,9 +334,11 @@ LlamaCompletionWorker::LlamaCompletionWorker(
|
|
|
36
334
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
335
|
Napi::Function callback, common_params params,
|
|
38
336
|
std::vector<std::string> stop_words,
|
|
39
|
-
int32_t chat_format
|
|
337
|
+
int32_t chat_format,
|
|
338
|
+
std::vector<std::string> image_paths)
|
|
40
339
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
41
|
-
_params(params), _stop_words(stop_words), _chat_format(chat_format)
|
|
340
|
+
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
341
|
+
_image_paths(image_paths) {
|
|
42
342
|
if (!callback.IsEmpty()) {
|
|
43
343
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
44
344
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -70,18 +370,59 @@ void LlamaCompletionWorker::Execute() {
|
|
|
70
370
|
LlamaCppSampling sampling{common_sampler_init(model, _params.sampling),
|
|
71
371
|
common_sampler_free};
|
|
72
372
|
|
|
73
|
-
std::vector<llama_token> prompt_tokens
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
if (
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
373
|
+
std::vector<llama_token> prompt_tokens;
|
|
374
|
+
|
|
375
|
+
// Process images if any are provided
|
|
376
|
+
if (!_image_paths.empty()) {
|
|
377
|
+
const auto* mtmd_ctx = _sess->get_mtmd_ctx();
|
|
378
|
+
|
|
379
|
+
if (mtmd_ctx != nullptr) {
|
|
380
|
+
// Process the images and get the tokens
|
|
381
|
+
n_cur = processImage(
|
|
382
|
+
mtmd_ctx,
|
|
383
|
+
ctx,
|
|
384
|
+
_sess,
|
|
385
|
+
_image_paths,
|
|
386
|
+
_params,
|
|
387
|
+
prompt_tokens
|
|
388
|
+
);
|
|
389
|
+
|
|
390
|
+
if (n_cur <= 0) {
|
|
391
|
+
SetError("Failed to process images");
|
|
392
|
+
_sess->get_mutex().unlock();
|
|
393
|
+
return;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
fprintf(stdout, "[DEBUG] Image processing successful, n_cur=%zu, tokens=%zu\n",
|
|
397
|
+
n_cur, _sess->tokens_ptr()->size());
|
|
398
|
+
|
|
399
|
+
n_input = _sess->tokens_ptr()->size();
|
|
400
|
+
if (n_cur == n_input) {
|
|
401
|
+
--n_cur;
|
|
402
|
+
}
|
|
403
|
+
n_input -= n_cur;
|
|
404
|
+
llama_kv_self_seq_rm(ctx, 0, n_cur, -1);
|
|
405
|
+
} else {
|
|
406
|
+
SetError("Multimodal context not initialized");
|
|
407
|
+
_sess->get_mutex().unlock();
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
} else {
|
|
411
|
+
// Text-only path
|
|
412
|
+
prompt_tokens = ::common_tokenize(ctx, _params.prompt, add_bos);
|
|
413
|
+
n_input = prompt_tokens.size();
|
|
414
|
+
|
|
415
|
+
if (_sess->tokens_ptr()->size() > 0) {
|
|
416
|
+
n_cur = common_part(*(_sess->tokens_ptr()), prompt_tokens);
|
|
417
|
+
if (n_cur == n_input) {
|
|
418
|
+
--n_cur;
|
|
419
|
+
}
|
|
420
|
+
n_input -= n_cur;
|
|
421
|
+
llama_kv_self_seq_rm(ctx, 0, n_cur, -1);
|
|
80
422
|
}
|
|
81
|
-
|
|
82
|
-
|
|
423
|
+
// Set the tokens
|
|
424
|
+
_sess->set_tokens(std::move(prompt_tokens));
|
|
83
425
|
}
|
|
84
|
-
_sess->set_tokens(std::move(prompt_tokens));
|
|
85
426
|
|
|
86
427
|
const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
|
|
87
428
|
_sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
|
|
@@ -99,8 +440,8 @@ void LlamaCompletionWorker::Execute() {
|
|
|
99
440
|
const int n_left = n_cur - n_keep - 1;
|
|
100
441
|
const int n_discard = n_left / 2;
|
|
101
442
|
|
|
102
|
-
|
|
103
|
-
|
|
443
|
+
llama_kv_self_seq_rm(ctx, 0, n_keep + 1, n_keep + n_discard + 1);
|
|
444
|
+
llama_kv_self_seq_add(ctx, 0, n_keep + 1 + n_discard, n_cur, -n_discard);
|
|
104
445
|
|
|
105
446
|
// shift the tokens
|
|
106
447
|
embd->insert(embd->begin() + n_keep + 1,
|
|
@@ -110,12 +451,18 @@ void LlamaCompletionWorker::Execute() {
|
|
|
110
451
|
n_cur -= n_discard;
|
|
111
452
|
_result.truncated = true;
|
|
112
453
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if
|
|
116
|
-
|
|
117
|
-
|
|
454
|
+
|
|
455
|
+
// For multimodal input, n_past might already be set
|
|
456
|
+
// Only decode text tokens if we have any input left
|
|
457
|
+
if (n_input > 0) {
|
|
458
|
+
int ret = llama_decode(
|
|
459
|
+
ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
460
|
+
if (ret < 0) {
|
|
461
|
+
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
462
|
+
break;
|
|
463
|
+
}
|
|
118
464
|
}
|
|
465
|
+
|
|
119
466
|
// sample the next token
|
|
120
467
|
const llama_token new_token_id =
|
|
121
468
|
common_sampler_sample(sampling.get(), ctx, -1);
|
|
@@ -1,5 +1,11 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
1
3
|
#include "common.hpp"
|
|
4
|
+
#include <atomic>
|
|
2
5
|
#include <functional>
|
|
6
|
+
#include <napi.h>
|
|
7
|
+
#include "tools/mtmd/mtmd.h"
|
|
8
|
+
#include "tools/mtmd/clip.h"
|
|
3
9
|
|
|
4
10
|
struct CompletionResult {
|
|
5
11
|
std::string text = "";
|
|
@@ -14,28 +20,42 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
|
14
20
|
public:
|
|
15
21
|
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
16
22
|
Napi::Function callback, common_params params,
|
|
17
|
-
std::vector<std::string> stop_words
|
|
18
|
-
int32_t chat_format
|
|
23
|
+
std::vector<std::string> stop_words,
|
|
24
|
+
int32_t chat_format,
|
|
25
|
+
std::vector<std::string> image_paths = {});
|
|
19
26
|
|
|
20
27
|
~LlamaCompletionWorker();
|
|
21
28
|
|
|
22
|
-
|
|
29
|
+
Napi::Promise GetPromise() { return Napi::Promise::Deferred::Promise(); }
|
|
30
|
+
|
|
31
|
+
void OnComplete(std::function<void()> cb) {
|
|
32
|
+
_onComplete = cb;
|
|
33
|
+
}
|
|
23
34
|
|
|
24
|
-
|
|
35
|
+
void SetStop() {
|
|
36
|
+
_stop = true;
|
|
37
|
+
}
|
|
25
38
|
|
|
26
39
|
protected:
|
|
27
|
-
void Execute();
|
|
28
|
-
void OnOK();
|
|
29
|
-
void OnError(const Napi::Error &err);
|
|
40
|
+
void Execute() override;
|
|
41
|
+
void OnOK() override;
|
|
42
|
+
void OnError(const Napi::Error &err) override;
|
|
30
43
|
|
|
31
44
|
private:
|
|
32
45
|
LlamaSessionPtr _sess;
|
|
33
46
|
common_params _params;
|
|
34
47
|
std::vector<std::string> _stop_words;
|
|
35
48
|
int32_t _chat_format;
|
|
36
|
-
|
|
49
|
+
std::vector<std::string> _image_paths;
|
|
50
|
+
std::function<void()> _onComplete;
|
|
37
51
|
bool _has_callback = false;
|
|
38
52
|
bool _stop = false;
|
|
39
|
-
|
|
40
|
-
|
|
53
|
+
Napi::ThreadSafeFunction _tsfn;
|
|
54
|
+
struct {
|
|
55
|
+
size_t tokens_evaluated = 0;
|
|
56
|
+
size_t tokens_predicted = 0;
|
|
57
|
+
bool truncated = false;
|
|
58
|
+
bool context_full = false;
|
|
59
|
+
std::string text;
|
|
60
|
+
} _result;
|
|
41
61
|
};
|