@fugood/llama.node 0.4.6 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +22 -4
- package/lib/index.js +42 -18
- package/lib/index.ts +57 -23
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +22 -381
- package/src/LlamaCompletionWorker.h +2 -4
- package/src/LlamaContext.cpp +40 -100
- package/src/LlamaContext.h +1 -0
- package/src/TokenizeWorker.cpp +33 -4
- package/src/TokenizeWorker.h +2 -5
- package/src/common.hpp +389 -0
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -2,12 +2,23 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
LlamaSessionPtr &sess, std::string text)
|
|
6
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
|
|
5
|
+
LlamaSessionPtr &sess, std::string text, std::vector<std::string> media_paths)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _media_paths(media_paths) {}
|
|
7
7
|
|
|
8
8
|
void TokenizeWorker::Execute() {
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
auto mtmd_ctx = _sess->get_mtmd_ctx();
|
|
10
|
+
if (!_media_paths.empty()) {
|
|
11
|
+
try {
|
|
12
|
+
_result = tokenizeWithMedia(mtmd_ctx, _text, _media_paths);
|
|
13
|
+
mtmd_input_chunks_free(_result.chunks);
|
|
14
|
+
} catch (const std::exception &e) {
|
|
15
|
+
SetError(e.what());
|
|
16
|
+
}
|
|
17
|
+
} else {
|
|
18
|
+
const auto tokens = common_tokenize(_sess->context(), _text, false);
|
|
19
|
+
_result.tokens = tokens;
|
|
20
|
+
_result.has_media = false;
|
|
21
|
+
}
|
|
11
22
|
}
|
|
12
23
|
|
|
13
24
|
void TokenizeWorker::OnOK() {
|
|
@@ -18,6 +29,24 @@ void TokenizeWorker::OnOK() {
|
|
|
18
29
|
memcpy(tokens.Data(), _result.tokens.data(),
|
|
19
30
|
_result.tokens.size() * sizeof(llama_token));
|
|
20
31
|
result.Set("tokens", tokens);
|
|
32
|
+
result.Set("has_media", _result.has_media);
|
|
33
|
+
if (_result.has_media) {
|
|
34
|
+
auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(), _result.bitmap_hashes.size());
|
|
35
|
+
for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
|
|
36
|
+
bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
|
|
37
|
+
}
|
|
38
|
+
result.Set("bitmap_hashes", bitmap_hashes);
|
|
39
|
+
auto chunk_pos = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
|
|
40
|
+
for (size_t i = 0; i < _result.chunk_pos.size(); i++) {
|
|
41
|
+
chunk_pos.Set(i, _result.chunk_pos[i]);
|
|
42
|
+
}
|
|
43
|
+
result.Set("chunk_pos", chunk_pos);
|
|
44
|
+
auto chunk_pos_media = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos_media.size());
|
|
45
|
+
for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
|
|
46
|
+
chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
|
|
47
|
+
}
|
|
48
|
+
result.Set("chunk_pos_media", chunk_pos_media);
|
|
49
|
+
}
|
|
21
50
|
Napi::Promise::Deferred::Resolve(result);
|
|
22
51
|
}
|
|
23
52
|
|
package/src/TokenizeWorker.h
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
2
|
#include <vector>
|
|
3
3
|
|
|
4
|
-
struct TokenizeResult {
|
|
5
|
-
std::vector<llama_token> tokens;
|
|
6
|
-
};
|
|
7
|
-
|
|
8
4
|
class TokenizeWorker : public Napi::AsyncWorker,
|
|
9
5
|
public Napi::Promise::Deferred {
|
|
10
6
|
public:
|
|
11
7
|
TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
12
|
-
std::string text);
|
|
8
|
+
std::string text, std::vector<std::string> media_paths);
|
|
13
9
|
|
|
14
10
|
protected:
|
|
15
11
|
void Execute();
|
|
@@ -19,5 +15,6 @@ protected:
|
|
|
19
15
|
private:
|
|
20
16
|
LlamaSessionPtr _sess;
|
|
21
17
|
std::string _text;
|
|
18
|
+
std::vector<std::string> _media_paths;
|
|
22
19
|
TokenizeResult _result;
|
|
23
20
|
};
|
package/src/common.hpp
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
#include "common/common.h"
|
|
4
4
|
#include "common/sampling.h"
|
|
5
|
+
#include "tools/mtmd/mtmd.h"
|
|
6
|
+
#include "tools/mtmd/clip.h"
|
|
5
7
|
#include "chat.h"
|
|
6
8
|
#include "llama.h"
|
|
7
9
|
#include "tools/mtmd/mtmd.h"
|
|
@@ -120,3 +122,390 @@ private:
|
|
|
120
122
|
};
|
|
121
123
|
|
|
122
124
|
typedef std::shared_ptr<LlamaSession> LlamaSessionPtr;
|
|
125
|
+
|
|
126
|
+
static size_t common_tokens_part(const std::vector<llama_token> &a,
|
|
127
|
+
const std::vector<llama_token> &b) {
|
|
128
|
+
size_t i = 0;
|
|
129
|
+
while (i < a.size() && i < b.size() && a[i] == b[i]) {
|
|
130
|
+
i++;
|
|
131
|
+
}
|
|
132
|
+
return i;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Computes FNV-1a hash of the data
|
|
136
|
+
static std::string fnv_hash(const uint8_t * data, size_t len) {
|
|
137
|
+
const uint64_t fnv_prime = 0x100000001b3ULL;
|
|
138
|
+
uint64_t hash = 0xcbf29ce484222325ULL;
|
|
139
|
+
|
|
140
|
+
for (size_t i = 0; i < len; ++i) {
|
|
141
|
+
hash ^= data[i];
|
|
142
|
+
hash *= fnv_prime;
|
|
143
|
+
}
|
|
144
|
+
return std::to_string(hash);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
static const std::string base64_chars =
|
|
148
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
149
|
+
"abcdefghijklmnopqrstuvwxyz"
|
|
150
|
+
"0123456789+/";
|
|
151
|
+
|
|
152
|
+
// Base64 decoding function
|
|
153
|
+
static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
|
|
154
|
+
std::vector<uint8_t> decoded;
|
|
155
|
+
int in_len = encoded_string.size();
|
|
156
|
+
int i = 0;
|
|
157
|
+
int j = 0;
|
|
158
|
+
int in_ = 0;
|
|
159
|
+
unsigned char char_array_4[4], char_array_3[3];
|
|
160
|
+
|
|
161
|
+
while (in_len-- && (encoded_string[in_] != '=')) {
|
|
162
|
+
if (isspace(encoded_string[in_])) {
|
|
163
|
+
in_++;
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
char_array_4[i++] = encoded_string[in_]; in_++;
|
|
172
|
+
if (i == 4) {
|
|
173
|
+
for (i = 0; i < 4; i++) {
|
|
174
|
+
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
|
178
|
+
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
|
179
|
+
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
|
180
|
+
|
|
181
|
+
for (i = 0; i < 3; i++) {
|
|
182
|
+
decoded.push_back(char_array_3[i]);
|
|
183
|
+
}
|
|
184
|
+
i = 0;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (i) {
|
|
189
|
+
for (j = i; j < 4; j++) {
|
|
190
|
+
char_array_4[j] = 0;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
for (j = 0; j < 4; j++) {
|
|
194
|
+
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
|
198
|
+
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
|
199
|
+
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
|
200
|
+
|
|
201
|
+
for (j = 0; j < i - 1; j++) {
|
|
202
|
+
decoded.push_back(char_array_3[j]);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return decoded;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
struct TokenizeResult {
|
|
210
|
+
std::vector<llama_token> tokens;
|
|
211
|
+
|
|
212
|
+
bool has_media = false;
|
|
213
|
+
std::vector<std::string> bitmap_hashes;
|
|
214
|
+
std::vector<size_t> chunk_pos; // both text and media
|
|
215
|
+
std::vector<size_t> chunk_pos_media; // media only
|
|
216
|
+
mtmd_input_chunks* chunks = nullptr;
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
static TokenizeResult tokenizeWithMedia(
|
|
220
|
+
const mtmd_context* mtmd_ctx,
|
|
221
|
+
const std::string &prompt,
|
|
222
|
+
const std::vector<std::string> &media_paths
|
|
223
|
+
) {
|
|
224
|
+
if (mtmd_ctx == nullptr) {
|
|
225
|
+
throw std::runtime_error("Multimodal context is not initialized");
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
TokenizeResult result;
|
|
229
|
+
result.has_media = !media_paths.empty();
|
|
230
|
+
|
|
231
|
+
mtmd::bitmaps bitmaps;
|
|
232
|
+
|
|
233
|
+
// Load all media paths
|
|
234
|
+
for (const auto& media_path : media_paths) {
|
|
235
|
+
fprintf(stdout, "[DEBUG] Loading media: %s\n",
|
|
236
|
+
media_path.substr(0, 50).c_str()); // Only log part of path for base64
|
|
237
|
+
|
|
238
|
+
// Check if it's a base64 media
|
|
239
|
+
if (media_path.compare(0, 11, "data:image/") == 0 || media_path.compare(0, 11, "data:audio/") == 0) {
|
|
240
|
+
|
|
241
|
+
// Parse base64 data
|
|
242
|
+
std::vector<std::string> parts;
|
|
243
|
+
size_t comma_pos = media_path.find(',');
|
|
244
|
+
if (comma_pos == std::string::npos) {
|
|
245
|
+
result.bitmap_hashes.clear();
|
|
246
|
+
throw std::runtime_error("Invalid base64 media format, missing comma separator");
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
std::string header = media_path.substr(0, comma_pos);
|
|
250
|
+
std::string base64_data = media_path.substr(comma_pos + 1);
|
|
251
|
+
|
|
252
|
+
if (header.find("base64") == std::string::npos) {
|
|
253
|
+
result.bitmap_hashes.clear();
|
|
254
|
+
throw std::runtime_error("Invalid base64 media");
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Decode base64
|
|
258
|
+
try {
|
|
259
|
+
// Decode base64 to binary
|
|
260
|
+
std::vector<uint8_t> media_data = base64_decode(base64_data);
|
|
261
|
+
|
|
262
|
+
// Load bitmap from memory buffer using direct initialization
|
|
263
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(), media_data.size()));
|
|
264
|
+
if (!bmp.ptr) {
|
|
265
|
+
bitmaps.entries.clear();
|
|
266
|
+
throw std::runtime_error("Failed to load base64 media");
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Calculate bitmap hash (for KV caching)
|
|
270
|
+
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
|
|
271
|
+
bmp.set_id(hash.c_str());
|
|
272
|
+
bitmaps.entries.push_back(std::move(bmp));
|
|
273
|
+
result.bitmap_hashes.push_back(hash.c_str());
|
|
274
|
+
} catch (const std::exception& e) {
|
|
275
|
+
bitmaps.entries.clear();
|
|
276
|
+
throw std::runtime_error("Failed to decode base64 media");
|
|
277
|
+
}
|
|
278
|
+
} else if (media_path.compare(0, 7, "http://") == 0 || media_path.compare(0, 8, "https://") == 0) {
|
|
279
|
+
// HTTP URLs are not supported yet
|
|
280
|
+
bitmaps.entries.clear();
|
|
281
|
+
throw std::runtime_error("HTTP/HTTPS URLs are not supported yet");
|
|
282
|
+
} else {
|
|
283
|
+
// Regular file path
|
|
284
|
+
// Check if file exists
|
|
285
|
+
FILE* file = fopen(media_path.c_str(), "rb");
|
|
286
|
+
if (file == nullptr) {
|
|
287
|
+
bitmaps.entries.clear();
|
|
288
|
+
throw std::runtime_error("File does not exist or cannot be opened");
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Get file size
|
|
292
|
+
fseek(file, 0, SEEK_END);
|
|
293
|
+
long file_size = ftell(file);
|
|
294
|
+
fseek(file, 0, SEEK_SET);
|
|
295
|
+
fclose(file);
|
|
296
|
+
|
|
297
|
+
// Create bitmap directly
|
|
298
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
|
|
299
|
+
if (!bmp.ptr) {
|
|
300
|
+
bitmaps.entries.clear();
|
|
301
|
+
throw std::runtime_error("Failed to load media");
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Calculate bitmap hash (for KV caching)
|
|
305
|
+
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
|
|
306
|
+
bmp.set_id(hash.c_str());
|
|
307
|
+
bitmaps.entries.push_back(std::move(bmp));
|
|
308
|
+
result.bitmap_hashes.push_back(hash.c_str());
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
result.chunks = mtmd_input_chunks_init();
|
|
313
|
+
if (result.chunks == nullptr) {
|
|
314
|
+
bitmaps.entries.clear();
|
|
315
|
+
throw std::runtime_error("Failed to initialize input chunks");
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Create input text
|
|
319
|
+
mtmd_input_text input_text;
|
|
320
|
+
input_text.text = prompt.c_str(); // Use the full prompt with media marker
|
|
321
|
+
input_text.add_special = true; // Add BOS token if this is the first message
|
|
322
|
+
input_text.parse_special = true; // Parse special tokens like <__media__>
|
|
323
|
+
|
|
324
|
+
// Tokenize the text and media
|
|
325
|
+
fprintf(stdout, "[DEBUG] Tokenizing text and %zu media\n", bitmaps.entries.size());
|
|
326
|
+
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
|
327
|
+
|
|
328
|
+
// Cast away const for mtmd_tokenize
|
|
329
|
+
int32_t res = mtmd_tokenize(
|
|
330
|
+
const_cast<mtmd_context*>(mtmd_ctx),
|
|
331
|
+
result.chunks,
|
|
332
|
+
&input_text,
|
|
333
|
+
bitmaps_c_ptr.data(),
|
|
334
|
+
bitmaps_c_ptr.size()
|
|
335
|
+
);
|
|
336
|
+
|
|
337
|
+
if (res != 0) {
|
|
338
|
+
mtmd_input_chunks_free(result.chunks);
|
|
339
|
+
bitmaps.entries.clear();
|
|
340
|
+
throw std::runtime_error("Failed to tokenize text and media");
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Log chunk information
|
|
344
|
+
size_t num_chunks = mtmd_input_chunks_size(result.chunks);
|
|
345
|
+
fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n", num_chunks);
|
|
346
|
+
|
|
347
|
+
// Track the total number of tokens (both text and media)
|
|
348
|
+
size_t total_token_count = 0;
|
|
349
|
+
|
|
350
|
+
// chunk pos
|
|
351
|
+
for (size_t i = 0; i < num_chunks; i++) {
|
|
352
|
+
result.chunk_pos.push_back(total_token_count);
|
|
353
|
+
|
|
354
|
+
const mtmd_input_chunk* chunk = mtmd_input_chunks_get(result.chunks, i);
|
|
355
|
+
mtmd_input_chunk_type chunk_type = mtmd_input_chunk_get_type(chunk);
|
|
356
|
+
|
|
357
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
358
|
+
size_t n_tokens;
|
|
359
|
+
const llama_token* tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
|
360
|
+
|
|
361
|
+
result.tokens.insert(result.tokens.end(), tokens, tokens + n_tokens);
|
|
362
|
+
total_token_count += n_tokens;
|
|
363
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
|
364
|
+
result.chunk_pos_media.push_back(total_token_count);
|
|
365
|
+
|
|
366
|
+
size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
|
|
367
|
+
size_t n_pos = mtmd_input_chunk_get_n_pos(chunk);
|
|
368
|
+
fprintf(stdout, "[DEBUG] Chunk %zu: type=%s, n_tokens=%zu, n_pos=%zu\n",
|
|
369
|
+
i, chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "IMAGE" : "AUDIO", n_tokens, n_pos);
|
|
370
|
+
|
|
371
|
+
for (size_t j = 0; j < n_pos; j++) {
|
|
372
|
+
result.tokens.push_back(LLAMA_TOKEN_NULL);
|
|
373
|
+
}
|
|
374
|
+
total_token_count += n_pos;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
bitmaps.entries.clear();
|
|
379
|
+
|
|
380
|
+
return result;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Process media and add them to the tokenized input
|
|
384
|
+
static llama_pos processMediaPrompt(
|
|
385
|
+
llama_context* ctx,
|
|
386
|
+
const mtmd_context* mtmd_ctx,
|
|
387
|
+
LlamaSessionPtr sess,
|
|
388
|
+
const common_params& params,
|
|
389
|
+
const std::vector<std::string>& media_paths
|
|
390
|
+
) {
|
|
391
|
+
if (mtmd_ctx == nullptr) {
|
|
392
|
+
throw std::runtime_error("Multimodal context is not initialized");
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Multimodal path
|
|
396
|
+
std::string full_prompt = params.prompt;
|
|
397
|
+
auto default_media_marker = mtmd_default_marker();
|
|
398
|
+
// Add media marker if it doesn't already exist
|
|
399
|
+
if (full_prompt.find(default_media_marker) == std::string::npos) {
|
|
400
|
+
full_prompt += " ";
|
|
401
|
+
full_prompt += default_media_marker;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
auto result = tokenizeWithMedia(mtmd_ctx, full_prompt, media_paths);
|
|
405
|
+
|
|
406
|
+
auto all_tokens = result.tokens;
|
|
407
|
+
auto chunks = result.chunks;
|
|
408
|
+
auto chunk_pos = result.chunk_pos;
|
|
409
|
+
auto chunk_pos_media = result.chunk_pos_media;
|
|
410
|
+
auto bitmap_hashes = result.bitmap_hashes;
|
|
411
|
+
|
|
412
|
+
llama_pos n_past = common_tokens_part(*sess->tokens_ptr(), all_tokens);
|
|
413
|
+
|
|
414
|
+
llama_pos new_n_past = n_past;
|
|
415
|
+
|
|
416
|
+
// Adjust n_past to position of the text chunk
|
|
417
|
+
// TODO: Edit the text chunk to remove the tokens before n_past to speed up
|
|
418
|
+
// need to update the mtmd api
|
|
419
|
+
auto adjusted_n_past = -1;
|
|
420
|
+
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
|
421
|
+
if (n_past < chunk_pos[i]) {
|
|
422
|
+
break;
|
|
423
|
+
}
|
|
424
|
+
bool is_end = i + 1 == chunk_pos.size();
|
|
425
|
+
if (
|
|
426
|
+
chunk_pos[i] < n_past &&
|
|
427
|
+
(!is_end && chunk_pos[i + 1] > n_past)
|
|
428
|
+
// is_end & n_past < total_token_count:
|
|
429
|
+
// don't need to adjust and it will skip eval_chunk_single, let nextToken() to finish the job
|
|
430
|
+
) {
|
|
431
|
+
adjusted_n_past = chunk_pos[i];
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
if (adjusted_n_past != -1) {
|
|
435
|
+
n_past = adjusted_n_past;
|
|
436
|
+
new_n_past = n_past;
|
|
437
|
+
fprintf(stdout, "[DEBUG] Adjusted n_past to %d\n", n_past);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
|
|
441
|
+
auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
|
|
442
|
+
if (mtmd_bitmap_past_hashes->size() > 0) {
|
|
443
|
+
for (size_t i = 0; i < bitmap_hashes.size(); i++) {
|
|
444
|
+
auto pos = chunk_pos_media[i];
|
|
445
|
+
if (n_past < pos) {
|
|
446
|
+
break;
|
|
447
|
+
}
|
|
448
|
+
if (i >= mtmd_bitmap_past_hashes->size()) {
|
|
449
|
+
break;
|
|
450
|
+
}
|
|
451
|
+
if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
|
|
452
|
+
n_past = chunk_pos_media[i];
|
|
453
|
+
new_n_past = n_past;
|
|
454
|
+
break;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Clear all KV cache entries after position n_past
|
|
460
|
+
llama_kv_self_seq_rm(ctx, 0, n_past, -1);
|
|
461
|
+
|
|
462
|
+
size_t num_chunks = mtmd_input_chunks_size(chunks);
|
|
463
|
+
|
|
464
|
+
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
|
465
|
+
fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
|
|
466
|
+
|
|
467
|
+
// Process chunk only if it's after the current n_past
|
|
468
|
+
if (chunk_pos[i] >= new_n_past) {
|
|
469
|
+
bool chunk_logits_last = (i == num_chunks - 1);
|
|
470
|
+
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
471
|
+
|
|
472
|
+
// Cast away const for mtmd_helper_eval_chunk_single
|
|
473
|
+
int32_t res = mtmd_helper_eval_chunk_single(
|
|
474
|
+
const_cast<mtmd_context*>(mtmd_ctx),
|
|
475
|
+
ctx,
|
|
476
|
+
chunk,
|
|
477
|
+
n_past,
|
|
478
|
+
0,
|
|
479
|
+
params.n_batch, // batch size
|
|
480
|
+
chunk_logits_last,
|
|
481
|
+
&new_n_past
|
|
482
|
+
);
|
|
483
|
+
|
|
484
|
+
if (res != 0) {
|
|
485
|
+
mtmd_input_chunks_free(chunks);
|
|
486
|
+
throw std::runtime_error("Failed to process chunk");
|
|
487
|
+
}
|
|
488
|
+
n_past = new_n_past;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (n_past == all_tokens.size() && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
|
|
493
|
+
// we have to evaluate at least 1 token to generate logits.
|
|
494
|
+
n_past--;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// Update sampling context to process token sequences
|
|
498
|
+
for (auto & token : all_tokens) {
|
|
499
|
+
if (token == LLAMA_TOKEN_NULL) {
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
// Set the tokens
|
|
504
|
+
sess->set_tokens(std::move(all_tokens));
|
|
505
|
+
|
|
506
|
+
sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
|
|
507
|
+
|
|
508
|
+
// Clean up media resources
|
|
509
|
+
mtmd_input_chunks_free(chunks);
|
|
510
|
+
return n_past;
|
|
511
|
+
}
|
|
@@ -351,7 +351,7 @@ jobs:
|
|
|
351
351
|
|
|
352
352
|
ubuntu-22-cmake-musa:
|
|
353
353
|
runs-on: ubuntu-22.04
|
|
354
|
-
container: mthreads/musa:
|
|
354
|
+
container: mthreads/musa:rc4.0.1-mudnn-devel-ubuntu22.04
|
|
355
355
|
|
|
356
356
|
steps:
|
|
357
357
|
- name: Clone
|
|
@@ -899,7 +899,7 @@ jobs:
|
|
|
899
899
|
shell: bash
|
|
900
900
|
|
|
901
901
|
env:
|
|
902
|
-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/
|
|
902
|
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
|
|
903
903
|
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
|
904
904
|
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
|
905
905
|
steps:
|