@fugood/llama.node 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +20 -6
- package/lib/index.js +41 -17
- package/lib/index.ts +50 -23
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +9 -9
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +37 -18
- package/src/LlamaContext.h +1 -0
- package/src/TokenizeWorker.cpp +16 -12
- package/src/TokenizeWorker.h +2 -2
- package/src/common.hpp +54 -50
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
package/src/common.hpp
CHANGED
|
@@ -209,82 +209,83 @@ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
|
|
|
209
209
|
struct TokenizeResult {
|
|
210
210
|
std::vector<llama_token> tokens;
|
|
211
211
|
|
|
212
|
-
bool
|
|
212
|
+
bool has_media = false;
|
|
213
213
|
std::vector<std::string> bitmap_hashes;
|
|
214
|
-
std::vector<size_t> chunk_pos; // both text and
|
|
215
|
-
std::vector<size_t>
|
|
214
|
+
std::vector<size_t> chunk_pos; // both text and media
|
|
215
|
+
std::vector<size_t> chunk_pos_media; // media only
|
|
216
216
|
mtmd_input_chunks* chunks = nullptr;
|
|
217
217
|
};
|
|
218
218
|
|
|
219
|
-
static TokenizeResult
|
|
219
|
+
static TokenizeResult tokenizeWithMedia(
|
|
220
220
|
const mtmd_context* mtmd_ctx,
|
|
221
221
|
const std::string &prompt,
|
|
222
|
-
const std::vector<std::string> &
|
|
222
|
+
const std::vector<std::string> &media_paths
|
|
223
223
|
) {
|
|
224
224
|
if (mtmd_ctx == nullptr) {
|
|
225
225
|
throw std::runtime_error("Multimodal context is not initialized");
|
|
226
226
|
}
|
|
227
227
|
|
|
228
228
|
TokenizeResult result;
|
|
229
|
-
result.
|
|
229
|
+
result.has_media = !media_paths.empty();
|
|
230
230
|
|
|
231
231
|
mtmd::bitmaps bitmaps;
|
|
232
232
|
|
|
233
|
-
// Load all
|
|
234
|
-
for (const auto&
|
|
235
|
-
fprintf(stdout, "[DEBUG] Loading
|
|
236
|
-
|
|
233
|
+
// Load all media paths
|
|
234
|
+
for (const auto& media_path : media_paths) {
|
|
235
|
+
fprintf(stdout, "[DEBUG] Loading media: %s\n",
|
|
236
|
+
media_path.substr(0, 50).c_str()); // Only log part of path for base64
|
|
237
237
|
|
|
238
|
-
// Check if it's a base64
|
|
239
|
-
if (
|
|
238
|
+
// Check if it's a base64 media
|
|
239
|
+
if (media_path.compare(0, 11, "data:image/") == 0 || media_path.compare(0, 11, "data:audio/") == 0) {
|
|
240
240
|
|
|
241
241
|
// Parse base64 data
|
|
242
242
|
std::vector<std::string> parts;
|
|
243
|
-
size_t comma_pos =
|
|
243
|
+
size_t comma_pos = media_path.find(',');
|
|
244
244
|
if (comma_pos == std::string::npos) {
|
|
245
245
|
result.bitmap_hashes.clear();
|
|
246
|
-
throw std::runtime_error("Invalid base64
|
|
246
|
+
throw std::runtime_error("Invalid base64 media format, missing comma separator");
|
|
247
247
|
}
|
|
248
248
|
|
|
249
|
-
std::string header =
|
|
250
|
-
std::string base64_data =
|
|
249
|
+
std::string header = media_path.substr(0, comma_pos);
|
|
250
|
+
std::string base64_data = media_path.substr(comma_pos + 1);
|
|
251
251
|
|
|
252
252
|
if (header.find("base64") == std::string::npos) {
|
|
253
253
|
result.bitmap_hashes.clear();
|
|
254
|
-
throw std::runtime_error("Invalid base64
|
|
254
|
+
throw std::runtime_error("Invalid base64 media");
|
|
255
255
|
}
|
|
256
256
|
|
|
257
257
|
// Decode base64
|
|
258
258
|
try {
|
|
259
259
|
// Decode base64 to binary
|
|
260
|
-
std::vector<uint8_t>
|
|
260
|
+
std::vector<uint8_t> media_data = base64_decode(base64_data);
|
|
261
261
|
|
|
262
262
|
// Load bitmap from memory buffer using direct initialization
|
|
263
|
-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(
|
|
263
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(), media_data.size()));
|
|
264
264
|
if (!bmp.ptr) {
|
|
265
265
|
bitmaps.entries.clear();
|
|
266
|
-
throw std::runtime_error("Failed to
|
|
266
|
+
throw std::runtime_error("Failed to load base64 media");
|
|
267
267
|
}
|
|
268
268
|
|
|
269
269
|
// Calculate bitmap hash (for KV caching)
|
|
270
|
-
std::string hash = fnv_hash(bmp.data(), bmp.
|
|
270
|
+
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
|
|
271
271
|
bmp.set_id(hash.c_str());
|
|
272
272
|
bitmaps.entries.push_back(std::move(bmp));
|
|
273
273
|
result.bitmap_hashes.push_back(hash.c_str());
|
|
274
274
|
} catch (const std::exception& e) {
|
|
275
275
|
bitmaps.entries.clear();
|
|
276
|
-
throw std::runtime_error("Failed to decode base64
|
|
276
|
+
throw std::runtime_error("Failed to decode base64 media");
|
|
277
277
|
}
|
|
278
|
-
} else if (
|
|
278
|
+
} else if (media_path.compare(0, 7, "http://") == 0 || media_path.compare(0, 8, "https://") == 0) {
|
|
279
279
|
// HTTP URLs are not supported yet
|
|
280
280
|
bitmaps.entries.clear();
|
|
281
|
-
throw std::runtime_error("HTTP URLs are not supported yet");
|
|
281
|
+
throw std::runtime_error("HTTP/HTTPS URLs are not supported yet");
|
|
282
282
|
} else {
|
|
283
|
+
// Regular file path
|
|
283
284
|
// Check if file exists
|
|
284
|
-
FILE* file = fopen(
|
|
285
|
+
FILE* file = fopen(media_path.c_str(), "rb");
|
|
285
286
|
if (file == nullptr) {
|
|
286
287
|
bitmaps.entries.clear();
|
|
287
|
-
throw std::runtime_error("
|
|
288
|
+
throw std::runtime_error("File does not exist or cannot be opened");
|
|
288
289
|
}
|
|
289
290
|
|
|
290
291
|
// Get file size
|
|
@@ -294,10 +295,10 @@ static TokenizeResult tokenizeWithImages(
|
|
|
294
295
|
fclose(file);
|
|
295
296
|
|
|
296
297
|
// Create bitmap directly
|
|
297
|
-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(
|
|
298
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
|
|
298
299
|
if (!bmp.ptr) {
|
|
299
300
|
bitmaps.entries.clear();
|
|
300
|
-
throw std::runtime_error("Failed to
|
|
301
|
+
throw std::runtime_error("Failed to load media");
|
|
301
302
|
}
|
|
302
303
|
|
|
303
304
|
// Calculate bitmap hash (for KV caching)
|
|
@@ -316,12 +317,12 @@ static TokenizeResult tokenizeWithImages(
|
|
|
316
317
|
|
|
317
318
|
// Create input text
|
|
318
319
|
mtmd_input_text input_text;
|
|
319
|
-
input_text.text = prompt.c_str(); // Use the full prompt with
|
|
320
|
+
input_text.text = prompt.c_str(); // Use the full prompt with media marker
|
|
320
321
|
input_text.add_special = true; // Add BOS token if this is the first message
|
|
321
|
-
input_text.parse_special = true; // Parse special tokens like <
|
|
322
|
+
input_text.parse_special = true; // Parse special tokens like <__media__>
|
|
322
323
|
|
|
323
|
-
// Tokenize the text and
|
|
324
|
-
fprintf(stdout, "[DEBUG] Tokenizing text and %zu
|
|
324
|
+
// Tokenize the text and media
|
|
325
|
+
fprintf(stdout, "[DEBUG] Tokenizing text and %zu media\n", bitmaps.entries.size());
|
|
325
326
|
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
|
326
327
|
|
|
327
328
|
// Cast away const for mtmd_tokenize
|
|
@@ -336,14 +337,14 @@ static TokenizeResult tokenizeWithImages(
|
|
|
336
337
|
if (res != 0) {
|
|
337
338
|
mtmd_input_chunks_free(result.chunks);
|
|
338
339
|
bitmaps.entries.clear();
|
|
339
|
-
throw std::runtime_error("Failed to tokenize text and
|
|
340
|
+
throw std::runtime_error("Failed to tokenize text and media");
|
|
340
341
|
}
|
|
341
342
|
|
|
342
343
|
// Log chunk information
|
|
343
344
|
size_t num_chunks = mtmd_input_chunks_size(result.chunks);
|
|
344
345
|
fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n", num_chunks);
|
|
345
346
|
|
|
346
|
-
// Track the total number of tokens (both text and
|
|
347
|
+
// Track the total number of tokens (both text and media)
|
|
347
348
|
size_t total_token_count = 0;
|
|
348
349
|
|
|
349
350
|
// chunk pos
|
|
@@ -359,12 +360,13 @@ static TokenizeResult tokenizeWithImages(
|
|
|
359
360
|
|
|
360
361
|
result.tokens.insert(result.tokens.end(), tokens, tokens + n_tokens);
|
|
361
362
|
total_token_count += n_tokens;
|
|
362
|
-
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
363
|
-
result.
|
|
363
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
|
364
|
+
result.chunk_pos_media.push_back(total_token_count);
|
|
364
365
|
|
|
365
|
-
|
|
366
|
-
size_t
|
|
367
|
-
|
|
366
|
+
size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
|
|
367
|
+
size_t n_pos = mtmd_input_chunk_get_n_pos(chunk);
|
|
368
|
+
fprintf(stdout, "[DEBUG] Chunk %zu: type=%s, n_tokens=%zu, n_pos=%zu\n",
|
|
369
|
+
i, chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "IMAGE" : "AUDIO", n_tokens, n_pos);
|
|
368
370
|
|
|
369
371
|
for (size_t j = 0; j < n_pos; j++) {
|
|
370
372
|
result.tokens.push_back(LLAMA_TOKEN_NULL);
|
|
@@ -378,13 +380,13 @@ static TokenizeResult tokenizeWithImages(
|
|
|
378
380
|
return result;
|
|
379
381
|
}
|
|
380
382
|
|
|
381
|
-
// Process
|
|
382
|
-
static llama_pos
|
|
383
|
+
// Process media and add them to the tokenized input
|
|
384
|
+
static llama_pos processMediaPrompt(
|
|
383
385
|
llama_context* ctx,
|
|
384
386
|
const mtmd_context* mtmd_ctx,
|
|
385
387
|
LlamaSessionPtr sess,
|
|
386
388
|
const common_params& params,
|
|
387
|
-
const std::vector<std::string>&
|
|
389
|
+
const std::vector<std::string>& media_paths
|
|
388
390
|
) {
|
|
389
391
|
if (mtmd_ctx == nullptr) {
|
|
390
392
|
throw std::runtime_error("Multimodal context is not initialized");
|
|
@@ -392,17 +394,19 @@ static llama_pos process_image_prompt(
|
|
|
392
394
|
|
|
393
395
|
// Multimodal path
|
|
394
396
|
std::string full_prompt = params.prompt;
|
|
395
|
-
|
|
396
|
-
if
|
|
397
|
-
|
|
397
|
+
auto default_media_marker = mtmd_default_marker();
|
|
398
|
+
// Add media marker if it doesn't already exist
|
|
399
|
+
if (full_prompt.find(default_media_marker) == std::string::npos) {
|
|
400
|
+
full_prompt += " ";
|
|
401
|
+
full_prompt += default_media_marker;
|
|
398
402
|
}
|
|
399
403
|
|
|
400
|
-
auto result =
|
|
404
|
+
auto result = tokenizeWithMedia(mtmd_ctx, full_prompt, media_paths);
|
|
401
405
|
|
|
402
406
|
auto all_tokens = result.tokens;
|
|
403
407
|
auto chunks = result.chunks;
|
|
404
408
|
auto chunk_pos = result.chunk_pos;
|
|
405
|
-
auto
|
|
409
|
+
auto chunk_pos_media = result.chunk_pos_media;
|
|
406
410
|
auto bitmap_hashes = result.bitmap_hashes;
|
|
407
411
|
|
|
408
412
|
llama_pos n_past = common_tokens_part(*sess->tokens_ptr(), all_tokens);
|
|
@@ -437,7 +441,7 @@ static llama_pos process_image_prompt(
|
|
|
437
441
|
auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
|
|
438
442
|
if (mtmd_bitmap_past_hashes->size() > 0) {
|
|
439
443
|
for (size_t i = 0; i < bitmap_hashes.size(); i++) {
|
|
440
|
-
auto pos =
|
|
444
|
+
auto pos = chunk_pos_media[i];
|
|
441
445
|
if (n_past < pos) {
|
|
442
446
|
break;
|
|
443
447
|
}
|
|
@@ -445,7 +449,7 @@ static llama_pos process_image_prompt(
|
|
|
445
449
|
break;
|
|
446
450
|
}
|
|
447
451
|
if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
|
|
448
|
-
n_past =
|
|
452
|
+
n_past = chunk_pos_media[i];
|
|
449
453
|
new_n_past = n_past;
|
|
450
454
|
break;
|
|
451
455
|
}
|
|
@@ -501,7 +505,7 @@ static llama_pos process_image_prompt(
|
|
|
501
505
|
|
|
502
506
|
sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
|
|
503
507
|
|
|
504
|
-
// Clean up
|
|
508
|
+
// Clean up media resources
|
|
505
509
|
mtmd_input_chunks_free(chunks);
|
|
506
510
|
return n_past;
|
|
507
511
|
}
|
|
@@ -351,7 +351,7 @@ jobs:
|
|
|
351
351
|
|
|
352
352
|
ubuntu-22-cmake-musa:
|
|
353
353
|
runs-on: ubuntu-22.04
|
|
354
|
-
container: mthreads/musa:
|
|
354
|
+
container: mthreads/musa:rc4.0.1-mudnn-devel-ubuntu22.04
|
|
355
355
|
|
|
356
356
|
steps:
|
|
357
357
|
- name: Clone
|
|
@@ -899,7 +899,7 @@ jobs:
|
|
|
899
899
|
shell: bash
|
|
900
900
|
|
|
901
901
|
env:
|
|
902
|
-
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/
|
|
902
|
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
|
|
903
903
|
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
|
904
904
|
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
|
905
905
|
steps:
|