@fugood/llama.node 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +20 -6
  18. package/lib/index.js +41 -17
  19. package/lib/index.ts +50 -23
  20. package/package.json +1 -1
  21. package/src/LlamaCompletionWorker.cpp +9 -9
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +37 -18
  24. package/src/LlamaContext.h +1 -0
  25. package/src/TokenizeWorker.cpp +16 -12
  26. package/src/TokenizeWorker.h +2 -2
  27. package/src/common.hpp +54 -50
  28. package/src/llama.cpp/.github/workflows/build.yml +2 -2
  29. package/src/llama.cpp/.github/workflows/release.yml +152 -129
  30. package/src/llama.cpp/.github/workflows/winget.yml +42 -0
  31. package/src/llama.cpp/common/arg.cpp +14 -13
  32. package/src/llama.cpp/common/common.cpp +4 -75
  33. package/src/llama.cpp/common/common.h +7 -12
  34. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
  35. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
  36. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
  37. package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
  38. package/src/llama.cpp/examples/simple/simple.cpp +1 -1
  39. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  40. package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
  41. package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
  42. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  43. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
  44. package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
  45. package/src/llama.cpp/ggml/include/ggml.h +11 -0
  46. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
  47. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
  48. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
  49. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
  50. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
  51. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
  52. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  53. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
  54. package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
  55. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
  56. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
  57. package/src/llama.cpp/ggml/src/ggml.c +64 -18
  58. package/src/llama.cpp/include/llama.h +24 -124
  59. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  60. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  61. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  62. package/src/llama.cpp/src/llama-batch.cpp +3 -1
  63. package/src/llama.cpp/src/llama-context.cpp +60 -110
  64. package/src/llama.cpp/src/llama-graph.cpp +137 -233
  65. package/src/llama.cpp/src/llama-graph.h +49 -7
  66. package/src/llama.cpp/src/llama-hparams.cpp +17 -1
  67. package/src/llama.cpp/src/llama-hparams.h +34 -5
  68. package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
  69. package/src/llama.cpp/src/llama-kv-cache.h +201 -85
  70. package/src/llama.cpp/src/llama-memory.h +3 -2
  71. package/src/llama.cpp/src/llama-model.cpp +273 -94
  72. package/src/llama.cpp/src/llama-model.h +4 -1
  73. package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
  74. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
  75. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
  76. package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
  77. package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
  78. package/src/llama.cpp/tools/mtmd/clip.h +6 -4
  79. package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
  80. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  81. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
  82. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
  83. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
  84. package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
  85. package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
  86. package/src/llama.cpp/tools/run/run.cpp +2 -2
  87. package/src/llama.cpp/tools/server/server.cpp +158 -47
  88. package/src/llama.cpp/tools/server/utils.hpp +71 -43
  89. package/src/llama.cpp/tools/tts/tts.cpp +4 -2
package/src/common.hpp CHANGED
@@ -209,82 +209,83 @@ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
209
209
  struct TokenizeResult {
210
210
  std::vector<llama_token> tokens;
211
211
 
212
- bool has_image = false;
212
+ bool has_media = false;
213
213
  std::vector<std::string> bitmap_hashes;
214
- std::vector<size_t> chunk_pos; // both text and image
215
- std::vector<size_t> chunk_pos_images; // image only
214
+ std::vector<size_t> chunk_pos; // both text and media
215
+ std::vector<size_t> chunk_pos_media; // media only
216
216
  mtmd_input_chunks* chunks = nullptr;
217
217
  };
218
218
 
219
- static TokenizeResult tokenizeWithImages(
219
+ static TokenizeResult tokenizeWithMedia(
220
220
  const mtmd_context* mtmd_ctx,
221
221
  const std::string &prompt,
222
- const std::vector<std::string> &image_paths
222
+ const std::vector<std::string> &media_paths
223
223
  ) {
224
224
  if (mtmd_ctx == nullptr) {
225
225
  throw std::runtime_error("Multimodal context is not initialized");
226
226
  }
227
227
 
228
228
  TokenizeResult result;
229
- result.has_image = !image_paths.empty();
229
+ result.has_media = !media_paths.empty();
230
230
 
231
231
  mtmd::bitmaps bitmaps;
232
232
 
233
- // Load all images
234
- for (const auto& image_path : image_paths) {
235
- fprintf(stdout, "[DEBUG] Loading image: %s\n",
236
- image_path.substr(0, 50).c_str()); // Only log part of path for base64
233
+ // Load all media paths
234
+ for (const auto& media_path : media_paths) {
235
+ fprintf(stdout, "[DEBUG] Loading media: %s\n",
236
+ media_path.substr(0, 50).c_str()); // Only log part of path for base64
237
237
 
238
- // Check if it's a base64 image
239
- if (image_path.compare(0, 11, "data:image/") == 0) {
238
+ // Check if it's a base64 media
239
+ if (media_path.compare(0, 11, "data:image/") == 0 || media_path.compare(0, 11, "data:audio/") == 0) {
240
240
 
241
241
  // Parse base64 data
242
242
  std::vector<std::string> parts;
243
- size_t comma_pos = image_path.find(',');
243
+ size_t comma_pos = media_path.find(',');
244
244
  if (comma_pos == std::string::npos) {
245
245
  result.bitmap_hashes.clear();
246
- throw std::runtime_error("Invalid base64 image");
246
+ throw std::runtime_error("Invalid base64 media format, missing comma separator");
247
247
  }
248
248
 
249
- std::string header = image_path.substr(0, comma_pos);
250
- std::string base64_data = image_path.substr(comma_pos + 1);
249
+ std::string header = media_path.substr(0, comma_pos);
250
+ std::string base64_data = media_path.substr(comma_pos + 1);
251
251
 
252
252
  if (header.find("base64") == std::string::npos) {
253
253
  result.bitmap_hashes.clear();
254
- throw std::runtime_error("Invalid base64 image");
254
+ throw std::runtime_error("Invalid base64 media");
255
255
  }
256
256
 
257
257
  // Decode base64
258
258
  try {
259
259
  // Decode base64 to binary
260
- std::vector<uint8_t> image_data = base64_decode(base64_data);
260
+ std::vector<uint8_t> media_data = base64_decode(base64_data);
261
261
 
262
262
  // Load bitmap from memory buffer using direct initialization
263
- mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(image_data.data(), image_data.size()));
263
+ mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(), media_data.size()));
264
264
  if (!bmp.ptr) {
265
265
  bitmaps.entries.clear();
266
- throw std::runtime_error("Failed to decode base64 image");
266
+ throw std::runtime_error("Failed to load base64 media");
267
267
  }
268
268
 
269
269
  // Calculate bitmap hash (for KV caching)
270
- std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
270
+ std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
271
271
  bmp.set_id(hash.c_str());
272
272
  bitmaps.entries.push_back(std::move(bmp));
273
273
  result.bitmap_hashes.push_back(hash.c_str());
274
274
  } catch (const std::exception& e) {
275
275
  bitmaps.entries.clear();
276
- throw std::runtime_error("Failed to decode base64 image");
276
+ throw std::runtime_error("Failed to decode base64 media");
277
277
  }
278
- } else if (image_path.compare(0, 7, "http://") == 0 || image_path.compare(0, 8, "https://") == 0) {
278
+ } else if (media_path.compare(0, 7, "http://") == 0 || media_path.compare(0, 8, "https://") == 0) {
279
279
  // HTTP URLs are not supported yet
280
280
  bitmaps.entries.clear();
281
- throw std::runtime_error("HTTP URLs are not supported yet");
281
+ throw std::runtime_error("HTTP/HTTPS URLs are not supported yet");
282
282
  } else {
283
+ // Regular file path
283
284
  // Check if file exists
284
- FILE* file = fopen(image_path.c_str(), "rb");
285
+ FILE* file = fopen(media_path.c_str(), "rb");
285
286
  if (file == nullptr) {
286
287
  bitmaps.entries.clear();
287
- throw std::runtime_error("Failed to open image file");
288
+ throw std::runtime_error("File does not exist or cannot be opened");
288
289
  }
289
290
 
290
291
  // Get file size
@@ -294,10 +295,10 @@ static TokenizeResult tokenizeWithImages(
294
295
  fclose(file);
295
296
 
296
297
  // Create bitmap directly
297
- mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(image_path.c_str()));
298
+ mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
298
299
  if (!bmp.ptr) {
299
300
  bitmaps.entries.clear();
300
- throw std::runtime_error("Failed to create bitmap from image file");
301
+ throw std::runtime_error("Failed to load media");
301
302
  }
302
303
 
303
304
  // Calculate bitmap hash (for KV caching)
@@ -316,12 +317,12 @@ static TokenizeResult tokenizeWithImages(
316
317
 
317
318
  // Create input text
318
319
  mtmd_input_text input_text;
319
- input_text.text = prompt.c_str(); // Use the full prompt with image marker
320
+ input_text.text = prompt.c_str(); // Use the full prompt with media marker
320
321
  input_text.add_special = true; // Add BOS token if this is the first message
321
- input_text.parse_special = true; // Parse special tokens like <__image__>
322
+ input_text.parse_special = true; // Parse special tokens like <__media__>
322
323
 
323
- // Tokenize the text and images
324
- fprintf(stdout, "[DEBUG] Tokenizing text and %zu images\n", bitmaps.entries.size());
324
+ // Tokenize the text and media
325
+ fprintf(stdout, "[DEBUG] Tokenizing text and %zu media\n", bitmaps.entries.size());
325
326
  auto bitmaps_c_ptr = bitmaps.c_ptr();
326
327
 
327
328
  // Cast away const for mtmd_tokenize
@@ -336,14 +337,14 @@ static TokenizeResult tokenizeWithImages(
336
337
  if (res != 0) {
337
338
  mtmd_input_chunks_free(result.chunks);
338
339
  bitmaps.entries.clear();
339
- throw std::runtime_error("Failed to tokenize text and images");
340
+ throw std::runtime_error("Failed to tokenize text and media");
340
341
  }
341
342
 
342
343
  // Log chunk information
343
344
  size_t num_chunks = mtmd_input_chunks_size(result.chunks);
344
345
  fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n", num_chunks);
345
346
 
346
- // Track the total number of tokens (both text and image)
347
+ // Track the total number of tokens (both text and media)
347
348
  size_t total_token_count = 0;
348
349
 
349
350
  // chunk pos
@@ -359,12 +360,13 @@ static TokenizeResult tokenizeWithImages(
359
360
 
360
361
  result.tokens.insert(result.tokens.end(), tokens, tokens + n_tokens);
361
362
  total_token_count += n_tokens;
362
- } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
363
- result.chunk_pos_images.push_back(total_token_count);
363
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
364
+ result.chunk_pos_media.push_back(total_token_count);
364
365
 
365
- const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
366
- size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
367
- size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
366
+ size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
367
+ size_t n_pos = mtmd_input_chunk_get_n_pos(chunk);
368
+ fprintf(stdout, "[DEBUG] Chunk %zu: type=%s, n_tokens=%zu, n_pos=%zu\n",
369
+ i, chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "IMAGE" : "AUDIO", n_tokens, n_pos);
368
370
 
369
371
  for (size_t j = 0; j < n_pos; j++) {
370
372
  result.tokens.push_back(LLAMA_TOKEN_NULL);
@@ -378,13 +380,13 @@ static TokenizeResult tokenizeWithImages(
378
380
  return result;
379
381
  }
380
382
 
381
- // Process images and add them to the tokenized input
382
- static llama_pos process_image_prompt(
383
+ // Process media and add them to the tokenized input
384
+ static llama_pos processMediaPrompt(
383
385
  llama_context* ctx,
384
386
  const mtmd_context* mtmd_ctx,
385
387
  LlamaSessionPtr sess,
386
388
  const common_params& params,
387
- const std::vector<std::string>& image_paths
389
+ const std::vector<std::string>& media_paths
388
390
  ) {
389
391
  if (mtmd_ctx == nullptr) {
390
392
  throw std::runtime_error("Multimodal context is not initialized");
@@ -392,17 +394,19 @@ static llama_pos process_image_prompt(
392
394
 
393
395
  // Multimodal path
394
396
  std::string full_prompt = params.prompt;
395
- // Add image marker if it doesn't already exist
396
- if (full_prompt.find("<__image__>") == std::string::npos) {
397
- full_prompt += " <__image__>";
397
+ auto default_media_marker = mtmd_default_marker();
398
+ // Add media marker if it doesn't already exist
399
+ if (full_prompt.find(default_media_marker) == std::string::npos) {
400
+ full_prompt += " ";
401
+ full_prompt += default_media_marker;
398
402
  }
399
403
 
400
- auto result = tokenizeWithImages(mtmd_ctx, full_prompt, image_paths);
404
+ auto result = tokenizeWithMedia(mtmd_ctx, full_prompt, media_paths);
401
405
 
402
406
  auto all_tokens = result.tokens;
403
407
  auto chunks = result.chunks;
404
408
  auto chunk_pos = result.chunk_pos;
405
- auto chunk_pos_images = result.chunk_pos_images;
409
+ auto chunk_pos_media = result.chunk_pos_media;
406
410
  auto bitmap_hashes = result.bitmap_hashes;
407
411
 
408
412
  llama_pos n_past = common_tokens_part(*sess->tokens_ptr(), all_tokens);
@@ -437,7 +441,7 @@ static llama_pos process_image_prompt(
437
441
  auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
438
442
  if (mtmd_bitmap_past_hashes->size() > 0) {
439
443
  for (size_t i = 0; i < bitmap_hashes.size(); i++) {
440
- auto pos = chunk_pos_images[i];
444
+ auto pos = chunk_pos_media[i];
441
445
  if (n_past < pos) {
442
446
  break;
443
447
  }
@@ -445,7 +449,7 @@ static llama_pos process_image_prompt(
445
449
  break;
446
450
  }
447
451
  if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
448
- n_past = chunk_pos_images[i];
452
+ n_past = chunk_pos_media[i];
449
453
  new_n_past = n_past;
450
454
  break;
451
455
  }
@@ -501,7 +505,7 @@ static llama_pos process_image_prompt(
501
505
 
502
506
  sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
503
507
 
504
- // Clean up image resources
508
+ // Clean up media resources
505
509
  mtmd_input_chunks_free(chunks);
506
510
  return n_past;
507
511
  }
@@ -351,7 +351,7 @@ jobs:
351
351
 
352
352
  ubuntu-22-cmake-musa:
353
353
  runs-on: ubuntu-22.04
354
- container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
354
+ container: mthreads/musa:rc4.0.1-mudnn-devel-ubuntu22.04
355
355
 
356
356
  steps:
357
357
  - name: Clone
@@ -899,7 +899,7 @@ jobs:
899
899
  shell: bash
900
900
 
901
901
  env:
902
- WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
902
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
903
903
  WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
904
904
  ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
905
905
  steps: