npm - @fugood/llama.node - Versions diffs - 0.4.7 → 0.5.0 - Mend

@fugood/llama.node 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +20 -6
package/lib/index.js +41 -17
package/lib/index.ts +50 -23
package/package.json +1 -1
package/src/LlamaCompletionWorker.cpp +9 -9
package/src/LlamaCompletionWorker.h +2 -2
package/src/LlamaContext.cpp +37 -18
package/src/LlamaContext.h +1 -0
package/src/TokenizeWorker.cpp +16 -12
package/src/TokenizeWorker.h +2 -2
package/src/common.hpp +54 -50
package/src/llama.cpp/.github/workflows/build.yml +2 -2
package/src/llama.cpp/.github/workflows/release.yml +152 -129
package/src/llama.cpp/.github/workflows/winget.yml +42 -0
package/src/llama.cpp/common/arg.cpp +14 -13
package/src/llama.cpp/common/common.cpp +4 -75
package/src/llama.cpp/common/common.h +7 -12
package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
package/src/llama.cpp/examples/simple/simple.cpp +1 -1
package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
package/src/llama.cpp/ggml/include/ggml.h +11 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
package/src/llama.cpp/ggml/src/ggml.c +64 -18
package/src/llama.cpp/include/llama.h +24 -124
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
package/src/llama.cpp/src/llama-batch.cpp +3 -1
package/src/llama.cpp/src/llama-context.cpp +60 -110
package/src/llama.cpp/src/llama-graph.cpp +137 -233
package/src/llama.cpp/src/llama-graph.h +49 -7
package/src/llama.cpp/src/llama-hparams.cpp +17 -1
package/src/llama.cpp/src/llama-hparams.h +34 -5
package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
package/src/llama.cpp/src/llama-kv-cache.h +201 -85
package/src/llama.cpp/src/llama-memory.h +3 -2
package/src/llama.cpp/src/llama-model.cpp +273 -94
package/src/llama.cpp/src/llama-model.h +4 -1
package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
package/src/llama.cpp/tools/mtmd/clip.h +6 -4
package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
package/src/llama.cpp/tools/run/run.cpp +2 -2
package/src/llama.cpp/tools/server/server.cpp +158 -47
package/src/llama.cpp/tools/server/utils.hpp +71 -43
package/src/llama.cpp/tools/tts/tts.cpp +4 -2

package/src/common.hpp CHANGED Viewed

@@ -209,82 +209,83 @@ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
 struct TokenizeResult {
   std::vector<llama_token> tokens;
-  bool has_image = false;
+  bool has_media = false;
   std::vector<std::string> bitmap_hashes;
-  std::vector<size_t> chunk_pos; // both text and image
-  std::vector<size_t> chunk_pos_images; // image only
+  std::vector<size_t> chunk_pos; // both text and media
+  std::vector<size_t> chunk_pos_media; // media only
   mtmd_input_chunks* chunks = nullptr;
 };
-static TokenizeResult tokenizeWithImages(
+static TokenizeResult tokenizeWithMedia(
   const mtmd_context* mtmd_ctx,
   const std::string &prompt,
-  const std::vector<std::string> &image_paths
+  const std::vector<std::string> &media_paths
 ) {
   if (mtmd_ctx == nullptr) {
     throw std::runtime_error("Multimodal context is not initialized");
   }
   TokenizeResult result;
-  result.has_image = !image_paths.empty();
+  result.has_media = !media_paths.empty();
   mtmd::bitmaps bitmaps;
-  // Load all images
-  for (const auto& image_path : image_paths) {
-    fprintf(stdout, "[DEBUG] Loading image: %s\n",
-             image_path.substr(0, 50).c_str()); // Only log part of path for base64
+  // Load all media paths
+  for (const auto& media_path : media_paths) {
+    fprintf(stdout, "[DEBUG] Loading media: %s\n",
+             media_path.substr(0, 50).c_str()); // Only log part of path for base64
-    // Check if it's a base64 image
-    if (image_path.compare(0, 11, "data:image/") == 0) {
+    // Check if it's a base64 media
+    if (media_path.compare(0, 11, "data:image/") == 0 || media_path.compare(0, 11, "data:audio/") == 0) {
       // Parse base64 data
       std::vector<std::string> parts;
-      size_t comma_pos = image_path.find(',');
+      size_t comma_pos = media_path.find(',');
       if (comma_pos == std::string::npos) {
         result.bitmap_hashes.clear();
-        throw std::runtime_error("Invalid base64 image");
+        throw std::runtime_error("Invalid base64 media format, missing comma separator");
       }
-      std::string header = image_path.substr(0, comma_pos);
-      std::string base64_data = image_path.substr(comma_pos + 1);
+      std::string header = media_path.substr(0, comma_pos);
+      std::string base64_data = media_path.substr(comma_pos + 1);
       if (header.find("base64") == std::string::npos) {
         result.bitmap_hashes.clear();
-        throw std::runtime_error("Invalid base64 image");
+        throw std::runtime_error("Invalid base64 media");
       }
       // Decode base64
       try {
         // Decode base64 to binary
-        std::vector<uint8_t> image_data = base64_decode(base64_data);
+        std::vector<uint8_t> media_data = base64_decode(base64_data);
         // Load bitmap from memory buffer using direct initialization
-        mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(image_data.data(), image_data.size()));
+        mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(), media_data.size()));
         if (!bmp.ptr) {
           bitmaps.entries.clear();
-          throw std::runtime_error("Failed to decode base64 image");
+          throw std::runtime_error("Failed to load base64 media");
         }
         // Calculate bitmap hash (for KV caching)
-        std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
+        std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
         bmp.set_id(hash.c_str());
         bitmaps.entries.push_back(std::move(bmp));
         result.bitmap_hashes.push_back(hash.c_str());
       } catch (const std::exception& e) {
         bitmaps.entries.clear();
-        throw std::runtime_error("Failed to decode base64 image");
+        throw std::runtime_error("Failed to decode base64 media");
       }
-    } else if (image_path.compare(0, 7, "http://") == 0 || image_path.compare(0, 8, "https://") == 0) {
+    } else if (media_path.compare(0, 7, "http://") == 0 || media_path.compare(0, 8, "https://") == 0) {
       // HTTP URLs are not supported yet
       bitmaps.entries.clear();
-      throw std::runtime_error("HTTP URLs are not supported yet");
+      throw std::runtime_error("HTTP/HTTPS URLs are not supported yet");
     } else {
+      // Regular file path
       // Check if file exists
-      FILE* file = fopen(image_path.c_str(), "rb");
+      FILE* file = fopen(media_path.c_str(), "rb");
       if (file == nullptr) {
         bitmaps.entries.clear();
-        throw std::runtime_error("Failed to open image file");
+        throw std::runtime_error("File does not exist or cannot be opened");
       }
       // Get file size
@@ -294,10 +295,10 @@ static TokenizeResult tokenizeWithImages(
       fclose(file);
       // Create bitmap directly
-      mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(image_path.c_str()));
+      mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
       if (!bmp.ptr) {
         bitmaps.entries.clear();
-        throw std::runtime_error("Failed to create bitmap from image file");
+        throw std::runtime_error("Failed to load media");
       }
       // Calculate bitmap hash (for KV caching)
@@ -316,12 +317,12 @@ static TokenizeResult tokenizeWithImages(
   // Create input text
   mtmd_input_text input_text;
-  input_text.text = prompt.c_str(); // Use the full prompt with image marker
+  input_text.text = prompt.c_str(); // Use the full prompt with media marker
   input_text.add_special = true;  // Add BOS token if this is the first message
-  input_text.parse_special = true;       // Parse special tokens like <__image__>
+  input_text.parse_special = true;       // Parse special tokens like <__media__>
-  // Tokenize the text and images
-  fprintf(stdout, "[DEBUG] Tokenizing text and %zu images\n", bitmaps.entries.size());
+  // Tokenize the text and media
+  fprintf(stdout, "[DEBUG] Tokenizing text and %zu media\n", bitmaps.entries.size());
   auto bitmaps_c_ptr = bitmaps.c_ptr();
   // Cast away const for mtmd_tokenize
@@ -336,14 +337,14 @@ static TokenizeResult tokenizeWithImages(
   if (res != 0) {
     mtmd_input_chunks_free(result.chunks);
     bitmaps.entries.clear();
-    throw std::runtime_error("Failed to tokenize text and images");
+    throw std::runtime_error("Failed to tokenize text and media");
   }
   // Log chunk information
   size_t num_chunks = mtmd_input_chunks_size(result.chunks);
   fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n", num_chunks);
-  // Track the total number of tokens (both text and image)
+  // Track the total number of tokens (both text and media)
   size_t total_token_count = 0;
   // chunk pos
@@ -359,12 +360,13 @@ static TokenizeResult tokenizeWithImages(
       result.tokens.insert(result.tokens.end(), tokens, tokens + n_tokens);
       total_token_count += n_tokens;
-    } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-      result.chunk_pos_images.push_back(total_token_count);
+    } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
+      result.chunk_pos_media.push_back(total_token_count);
-      const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
-      size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
-      size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
+      size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
+      size_t n_pos = mtmd_input_chunk_get_n_pos(chunk);
+      fprintf(stdout, "[DEBUG] Chunk %zu: type=%s, n_tokens=%zu, n_pos=%zu\n",
+               i, chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "IMAGE" : "AUDIO", n_tokens, n_pos);
       for (size_t j = 0; j < n_pos; j++) {
         result.tokens.push_back(LLAMA_TOKEN_NULL);
@@ -378,13 +380,13 @@ static TokenizeResult tokenizeWithImages(
   return result;
 }
-// Process images and add them to the tokenized input
-static llama_pos process_image_prompt(
+// Process media and add them to the tokenized input
+static llama_pos processMediaPrompt(
   llama_context* ctx,
   const mtmd_context* mtmd_ctx,
   LlamaSessionPtr sess,
   const common_params& params,
-  const std::vector<std::string>& image_paths
+  const std::vector<std::string>& media_paths
 ) {
   if (mtmd_ctx == nullptr) {
     throw std::runtime_error("Multimodal context is not initialized");
@@ -392,17 +394,19 @@ static llama_pos process_image_prompt(
   // Multimodal path
   std::string full_prompt = params.prompt;
-  // Add image marker if it doesn't already exist
-  if (full_prompt.find("<__image__>") == std::string::npos) {
-    full_prompt += " <__image__>";
+  auto default_media_marker = mtmd_default_marker();
+  // Add media marker if it doesn't already exist
+  if (full_prompt.find(default_media_marker) == std::string::npos) {
+    full_prompt += " ";
+    full_prompt += default_media_marker;
   }
-  auto result = tokenizeWithImages(mtmd_ctx, full_prompt, image_paths);
+  auto result = tokenizeWithMedia(mtmd_ctx, full_prompt, media_paths);
   auto all_tokens = result.tokens;
   auto chunks = result.chunks;
   auto chunk_pos = result.chunk_pos;
-  auto chunk_pos_images = result.chunk_pos_images;
+  auto chunk_pos_media = result.chunk_pos_media;
   auto bitmap_hashes = result.bitmap_hashes;
   llama_pos n_past = common_tokens_part(*sess->tokens_ptr(), all_tokens);
@@ -437,7 +441,7 @@ static llama_pos process_image_prompt(
   auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
   if (mtmd_bitmap_past_hashes->size() > 0) {
     for (size_t i = 0; i < bitmap_hashes.size(); i++) {
-      auto pos = chunk_pos_images[i];
+      auto pos = chunk_pos_media[i];
       if (n_past < pos) {
         break;
       }
@@ -445,7 +449,7 @@ static llama_pos process_image_prompt(
         break;
       }
       if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
-        n_past = chunk_pos_images[i];
+        n_past = chunk_pos_media[i];
         new_n_past = n_past;
         break;
       }
@@ -501,7 +505,7 @@ static llama_pos process_image_prompt(
   sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
-  // Clean up image resources
+  // Clean up media resources
   mtmd_input_chunks_free(chunks);
   return n_past;
 }

package/src/llama.cpp/.github/workflows/build.yml CHANGED Viewed

@@ -351,7 +351,7 @@ jobs:
   ubuntu-22-cmake-musa:
     runs-on: ubuntu-22.04
-    container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
+    container: mthreads/musa:rc4.0.1-mudnn-devel-ubuntu22.04
     steps:
       - name: Clone
@@ -899,7 +899,7 @@ jobs:
         shell: bash
     env:
-      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
+      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
       WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
       ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
     steps: