npm - @fugood/llama.node - Versions diffs - 0.4.7 → 0.6.0 - Mend

@fugood/llama.node 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/CMakeLists.txt +4 -0
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/lib/binding.ts +66 -6
package/lib/index.js +59 -17
package/lib/index.ts +74 -23
package/package.json +1 -1
package/src/DecodeAudioTokenWorker.cpp +40 -0
package/src/DecodeAudioTokenWorker.h +22 -0
package/src/EmbeddingWorker.cpp +7 -5
package/src/LlamaCompletionWorker.cpp +68 -54
package/src/LlamaCompletionWorker.h +7 -8
package/src/LlamaContext.cpp +551 -235
package/src/LlamaContext.h +26 -4
package/src/LoadSessionWorker.cpp +4 -2
package/src/SaveSessionWorker.cpp +10 -6
package/src/TokenizeWorker.cpp +23 -14
package/src/TokenizeWorker.h +2 -2
package/src/addons.cc +8 -11
package/src/common.hpp +129 -126
package/src/llama.cpp/.github/workflows/build.yml +2 -2
package/src/llama.cpp/.github/workflows/release.yml +152 -129
package/src/llama.cpp/.github/workflows/winget.yml +42 -0
package/src/llama.cpp/common/arg.cpp +14 -13
package/src/llama.cpp/common/common.cpp +4 -75
package/src/llama.cpp/common/common.h +7 -12
package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
package/src/llama.cpp/examples/simple/simple.cpp +1 -1
package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
package/src/llama.cpp/ggml/include/ggml.h +11 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
package/src/llama.cpp/ggml/src/ggml.c +64 -18
package/src/llama.cpp/include/llama.h +24 -124
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
package/src/llama.cpp/src/llama-batch.cpp +3 -1
package/src/llama.cpp/src/llama-context.cpp +60 -110
package/src/llama.cpp/src/llama-graph.cpp +137 -233
package/src/llama.cpp/src/llama-graph.h +49 -7
package/src/llama.cpp/src/llama-hparams.cpp +17 -1
package/src/llama.cpp/src/llama-hparams.h +34 -5
package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
package/src/llama.cpp/src/llama-kv-cache.h +201 -85
package/src/llama.cpp/src/llama-memory.h +3 -2
package/src/llama.cpp/src/llama-model.cpp +273 -94
package/src/llama.cpp/src/llama-model.h +4 -1
package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
package/src/llama.cpp/tools/mtmd/clip.h +6 -4
package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
package/src/llama.cpp/tools/run/run.cpp +2 -2
package/src/llama.cpp/tools/server/server.cpp +158 -47
package/src/llama.cpp/tools/server/utils.hpp +71 -43
package/src/llama.cpp/tools/tts/tts.cpp +4 -2
package/src/tts_utils.cpp +342 -0
package/src/tts_utils.h +62 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0

package/src/common.hpp CHANGED Viewed

@@ -1,11 +1,10 @@
 #pragma once
+#include "chat.h"
 #include "common/common.h"
 #include "common/sampling.h"
-#include "tools/mtmd/mtmd.h"
-#include "tools/mtmd/clip.h"
-#include "chat.h"
 #include "llama.h"
+#include "tools/mtmd/clip.h"
 #include "tools/mtmd/mtmd.h"
 #include <memory>
 #include <mutex>
@@ -27,13 +26,17 @@ static std::string json_stringify(const Napi::Object &obj) {
   Napi::Env env = obj.Env();
   Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
   Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
-  return stringify.Call(json, { obj }).As<Napi::String>().ToString();
+  return stringify.Call(json, {obj}).As<Napi::String>().ToString();
 }
-static void console_log(Napi::Env env, const std::string& message) {
-  Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
-  consoleLog.Call({ Napi::String::New(env, message) });
-}
+static void console_log(Napi::Env env, const std::string &message) {
+  Napi::Function consoleLog = env.Global()
+                                  .Get("console")
+                                  .As<Napi::Object>()
+                                  .Get("log")
+                                  .As<Napi::Function>();
+  consoleLog.Call({Napi::String::New(env, message)});
+}
 template <typename T>
 constexpr T get_option(const Napi::Object &options, const std::string &name,
@@ -64,8 +67,7 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
 class LlamaSession {
 public:
-  LlamaSession(common_params params)
-      : params_(params) {
+  LlamaSession(common_params params) : params_(params) {
     llama_init_ = common_init_from_params(params);
     tokens_.reserve(params.n_ctx);
   }
@@ -93,21 +95,17 @@ public:
   inline const common_params &params() const { return params_; }
   inline std::mutex &get_mutex() { return mutex; }
   // Getter for the multimodal context
-  inline const mtmd_context* get_mtmd_ctx() const {
-    return _mtmd_ctx;
-  }
+  inline const mtmd_context *get_mtmd_ctx() const { return _mtmd_ctx; }
   // Setter for the multimodal context
-  inline void set_mtmd_ctx(mtmd_context* ctx) {
-    _mtmd_ctx = ctx;
-  }
+  inline void set_mtmd_ctx(mtmd_context *ctx) { _mtmd_ctx = ctx; }
   void dispose() {
     std::lock_guard<std::mutex> lock(mutex);
     tokens_.clear();
     // mtmd_ctx is owned by LlamaContext, so we don't free it here
     _mtmd_ctx = nullptr;
   }
@@ -118,13 +116,13 @@ private:
   std::vector<llama_token> tokens_{};
   std::vector<std::string> mtmd_bitmap_past_hashes_{};
   std::mutex mutex;
-  mtmd_context* _mtmd_ctx = nullptr;
+  mtmd_context *_mtmd_ctx = nullptr;
 };
 typedef std::shared_ptr<LlamaSession> LlamaSessionPtr;
 static size_t common_tokens_part(const std::vector<llama_token> &a,
-                   const std::vector<llama_token> &b) {
+                                 const std::vector<llama_token> &b) {
   size_t i = 0;
   while (i < a.size() && i < b.size() && a[i] == b[i]) {
     i++;
@@ -133,7 +131,7 @@ static size_t common_tokens_part(const std::vector<llama_token> &a,
 }
 // Computes FNV-1a hash of the data
-static std::string fnv_hash(const uint8_t * data, size_t len) {
+static std::string fnv_hash(const uint8_t *data, size_t len) {
   const uint64_t fnv_prime = 0x100000001b3ULL;
   uint64_t hash = 0xcbf29ce484222325ULL;
@@ -144,10 +142,9 @@ static std::string fnv_hash(const uint8_t * data, size_t len) {
   return std::to_string(hash);
 }
-static const std::string base64_chars =
-  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-  "abcdefghijklmnopqrstuvwxyz"
-  "0123456789+/";
+static const std::string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                        "abcdefghijklmnopqrstuvwxyz"
+                                        "0123456789+/";
 // Base64 decoding function
 static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
@@ -164,18 +161,22 @@ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
       continue;
     }
-    if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
+    if (encoded_string[in_] == '=' ||
+        base64_chars.find(encoded_string[in_]) == std::string::npos) {
       break;
     }
-    char_array_4[i++] = encoded_string[in_]; in_++;
+    char_array_4[i++] = encoded_string[in_];
+    in_++;
     if (i == 4) {
       for (i = 0; i < 4; i++) {
         char_array_4[i] = base64_chars.find(char_array_4[i]);
       }
-      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
-      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+      char_array_3[0] =
+          (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+      char_array_3[1] =
+          ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
       char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
       for (i = 0; i < 3; i++) {
@@ -195,7 +196,8 @@ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
     }
     char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
-    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+    char_array_3[1] =
+        ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
     char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
     for (j = 0; j < i - 1; j++) {
@@ -209,82 +211,86 @@ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
 struct TokenizeResult {
   std::vector<llama_token> tokens;
-  bool has_image = false;
+  bool has_media = false;
   std::vector<std::string> bitmap_hashes;
-  std::vector<size_t> chunk_pos; // both text and image
-  std::vector<size_t> chunk_pos_images; // image only
-  mtmd_input_chunks* chunks = nullptr;
+  std::vector<size_t> chunk_pos;       // both text and media
+  std::vector<size_t> chunk_pos_media; // media only
+  mtmd_input_chunks *chunks = nullptr;
 };
-static TokenizeResult tokenizeWithImages(
-  const mtmd_context* mtmd_ctx,
-  const std::string &prompt,
-  const std::vector<std::string> &image_paths
-) {
+static TokenizeResult
+tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
+                  const std::vector<std::string> &media_paths) {
   if (mtmd_ctx == nullptr) {
     throw std::runtime_error("Multimodal context is not initialized");
   }
   TokenizeResult result;
-  result.has_image = !image_paths.empty();
+  result.has_media = !media_paths.empty();
   mtmd::bitmaps bitmaps;
-  // Load all images
-  for (const auto& image_path : image_paths) {
-    fprintf(stdout, "[DEBUG] Loading image: %s\n",
-             image_path.substr(0, 50).c_str()); // Only log part of path for base64
+  // Load all media paths
+  for (const auto &media_path : media_paths) {
+    fprintf(
+        stdout, "[DEBUG] Loading media: %s\n",
+        media_path.substr(0, 50).c_str()); // Only log part of path for base64
-    // Check if it's a base64 image
-    if (image_path.compare(0, 11, "data:image/") == 0) {
+    // Check if it's a base64 media
+    if (media_path.compare(0, 11, "data:image/") == 0 ||
+        media_path.compare(0, 11, "data:audio/") == 0) {
       // Parse base64 data
       std::vector<std::string> parts;
-      size_t comma_pos = image_path.find(',');
+      size_t comma_pos = media_path.find(',');
       if (comma_pos == std::string::npos) {
         result.bitmap_hashes.clear();
-        throw std::runtime_error("Invalid base64 image");
+        throw std::runtime_error(
+            "Invalid base64 media format, missing comma separator");
       }
-      std::string header = image_path.substr(0, comma_pos);
-      std::string base64_data = image_path.substr(comma_pos + 1);
+      std::string header = media_path.substr(0, comma_pos);
+      std::string base64_data = media_path.substr(comma_pos + 1);
       if (header.find("base64") == std::string::npos) {
         result.bitmap_hashes.clear();
-        throw std::runtime_error("Invalid base64 image");
+        throw std::runtime_error("Invalid base64 media");
       }
       // Decode base64
       try {
         // Decode base64 to binary
-        std::vector<uint8_t> image_data = base64_decode(base64_data);
+        std::vector<uint8_t> media_data = base64_decode(base64_data);
         // Load bitmap from memory buffer using direct initialization
-        mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(image_data.data(), image_data.size()));
+        mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(),
+                                                          media_data.size()));
         if (!bmp.ptr) {
           bitmaps.entries.clear();
-          throw std::runtime_error("Failed to decode base64 image");
+          throw std::runtime_error("Failed to load base64 media");
         }
         // Calculate bitmap hash (for KV caching)
-        std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
+        std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
         bmp.set_id(hash.c_str());
         bitmaps.entries.push_back(std::move(bmp));
         result.bitmap_hashes.push_back(hash.c_str());
-      } catch (const std::exception& e) {
+      } catch (const std::exception &e) {
         bitmaps.entries.clear();
-        throw std::runtime_error("Failed to decode base64 image");
+        throw std::runtime_error("Failed to decode base64 media");
       }
-    } else if (image_path.compare(0, 7, "http://") == 0 || image_path.compare(0, 8, "https://") == 0) {
+    } else if (media_path.compare(0, 7, "http://") == 0 ||
+               media_path.compare(0, 8, "https://") == 0) {
       // HTTP URLs are not supported yet
       bitmaps.entries.clear();
-      throw std::runtime_error("HTTP URLs are not supported yet");
+      throw std::runtime_error("HTTP/HTTPS URLs are not supported yet");
     } else {
+      // Regular file path
       // Check if file exists
-      FILE* file = fopen(image_path.c_str(), "rb");
+      FILE *file = fopen(media_path.c_str(), "rb");
       if (file == nullptr) {
         bitmaps.entries.clear();
-        throw std::runtime_error("Failed to open image file");
+        throw std::runtime_error("File does not exist or cannot be opened");
       }
       // Get file size
@@ -294,14 +300,14 @@ static TokenizeResult tokenizeWithImages(
       fclose(file);
       // Create bitmap directly
-      mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(image_path.c_str()));
+      mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
       if (!bmp.ptr) {
         bitmaps.entries.clear();
-        throw std::runtime_error("Failed to create bitmap from image file");
+        throw std::runtime_error("Failed to load media");
       }
       // Calculate bitmap hash (for KV caching)
-      std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
+      std::string hash = fnv_hash(bmp.data(), bmp.nx() * bmp.ny() * 3);
       bmp.set_id(hash.c_str());
       bitmaps.entries.push_back(std::move(bmp));
       result.bitmap_hashes.push_back(hash.c_str());
@@ -313,58 +319,60 @@ static TokenizeResult tokenizeWithImages(
     bitmaps.entries.clear();
     throw std::runtime_error("Failed to initialize input chunks");
   }
   // Create input text
   mtmd_input_text input_text;
-  input_text.text = prompt.c_str(); // Use the full prompt with image marker
-  input_text.add_special = true;  // Add BOS token if this is the first message
-  input_text.parse_special = true;       // Parse special tokens like <__image__>
+  input_text.text = prompt.c_str(); // Use the full prompt with media marker
+  input_text.add_special = true;   // Add BOS token if this is the first message
+  input_text.parse_special = true; // Parse special tokens like <__media__>
-  // Tokenize the text and images
-  fprintf(stdout, "[DEBUG] Tokenizing text and %zu images\n", bitmaps.entries.size());
+  // Tokenize the text and media
+  fprintf(stdout, "[DEBUG] Tokenizing text and %zu media\n",
+          bitmaps.entries.size());
   auto bitmaps_c_ptr = bitmaps.c_ptr();
   // Cast away const for mtmd_tokenize
-  int32_t res = mtmd_tokenize(
-    const_cast<mtmd_context*>(mtmd_ctx),
-    result.chunks,
-    &input_text,
-    bitmaps_c_ptr.data(),
-    bitmaps_c_ptr.size()
-  );
+  int32_t res =
+      mtmd_tokenize(const_cast<mtmd_context *>(mtmd_ctx), result.chunks,
+                    &input_text, bitmaps_c_ptr.data(), bitmaps_c_ptr.size());
   if (res != 0) {
     mtmd_input_chunks_free(result.chunks);
     bitmaps.entries.clear();
-    throw std::runtime_error("Failed to tokenize text and images");
+    throw std::runtime_error("Failed to tokenize text and media");
   }
   // Log chunk information
   size_t num_chunks = mtmd_input_chunks_size(result.chunks);
-  fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n", num_chunks);
+  fprintf(stdout, "[DEBUG] Tokenization successful: num_chunks=%zu\n",
+          num_chunks);
-  // Track the total number of tokens (both text and image)
+  // Track the total number of tokens (both text and media)
   size_t total_token_count = 0;
   // chunk pos
   for (size_t i = 0; i < num_chunks; i++) {
     result.chunk_pos.push_back(total_token_count);
-    const mtmd_input_chunk* chunk = mtmd_input_chunks_get(result.chunks, i);
+    const mtmd_input_chunk *chunk = mtmd_input_chunks_get(result.chunks, i);
     mtmd_input_chunk_type chunk_type = mtmd_input_chunk_get_type(chunk);
     if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
       size_t n_tokens;
-      const llama_token* tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
+      const llama_token *tokens =
+          mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
       result.tokens.insert(result.tokens.end(), tokens, tokens + n_tokens);
       total_token_count += n_tokens;
-    } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-      result.chunk_pos_images.push_back(total_token_count);
+    } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ||
+               chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
+      result.chunk_pos_media.push_back(total_token_count);
-      const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
-      size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
-      size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
+      size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
+      size_t n_pos = mtmd_input_chunk_get_n_pos(chunk);
+      fprintf(stdout, "[DEBUG] Chunk %zu: type=%s, n_tokens=%zu, n_pos=%zu\n",
+              i, chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "IMAGE" : "AUDIO",
+              n_tokens, n_pos);
       for (size_t j = 0; j < n_pos; j++) {
         result.tokens.push_back(LLAMA_TOKEN_NULL);
@@ -374,35 +382,34 @@ static TokenizeResult tokenizeWithImages(
   }
   bitmaps.entries.clear();
   return result;
 }
-// Process images and add them to the tokenized input
-static llama_pos process_image_prompt(
-  llama_context* ctx,
-  const mtmd_context* mtmd_ctx,
-  LlamaSessionPtr sess,
-  const common_params& params,
-  const std::vector<std::string>& image_paths
-) {
+// Process media and add them to the tokenized input
+static llama_pos
+processMediaPrompt(llama_context *ctx, const mtmd_context *mtmd_ctx,
+                   LlamaSessionPtr sess, const common_params &params,
+                   const std::vector<std::string> &media_paths) {
   if (mtmd_ctx == nullptr) {
     throw std::runtime_error("Multimodal context is not initialized");
   }
   // Multimodal path
   std::string full_prompt = params.prompt;
-  // Add image marker if it doesn't already exist
-  if (full_prompt.find("<__image__>") == std::string::npos) {
-    full_prompt += " <__image__>";
+  auto default_media_marker = mtmd_default_marker();
+  // Add media marker if it doesn't already exist
+  if (full_prompt.find(default_media_marker) == std::string::npos) {
+    full_prompt += " ";
+    full_prompt += default_media_marker;
   }
-  auto result = tokenizeWithImages(mtmd_ctx, full_prompt, image_paths);
+  auto result = tokenizeWithMedia(mtmd_ctx, full_prompt, media_paths);
   auto all_tokens = result.tokens;
   auto chunks = result.chunks;
   auto chunk_pos = result.chunk_pos;
-  auto chunk_pos_images = result.chunk_pos_images;
+  auto chunk_pos_media = result.chunk_pos_media;
   auto bitmap_hashes = result.bitmap_hashes;
   llama_pos n_past = common_tokens_part(*sess->tokens_ptr(), all_tokens);
@@ -418,11 +425,10 @@ static llama_pos process_image_prompt(
       break;
     }
     bool is_end = i + 1 == chunk_pos.size();
-    if (
-      chunk_pos[i] < n_past &&
-      (!is_end && chunk_pos[i + 1] > n_past)
-      // is_end & n_past < total_token_count:
-      // don't need to adjust and it will skip eval_chunk_single, let nextToken() to finish the job
+    if (chunk_pos[i] < n_past && (!is_end && chunk_pos[i + 1] > n_past)
+        // is_end & n_past < total_token_count:
+        // don't need to adjust and it will skip eval_chunk_single, let
+        // nextToken() to finish the job
     ) {
       adjusted_n_past = chunk_pos[i];
     }
@@ -433,11 +439,12 @@ static llama_pos process_image_prompt(
     fprintf(stdout, "[DEBUG] Adjusted n_past to %d\n", n_past);
   }
-  // Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
+  // Compare bitmap hashes, if they are not the same, backtrack n_past to the
+  // position of the first mismatch
   auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
   if (mtmd_bitmap_past_hashes->size() > 0) {
     for (size_t i = 0; i < bitmap_hashes.size(); i++) {
-      auto pos = chunk_pos_images[i];
+      auto pos = chunk_pos_media[i];
       if (n_past < pos) {
         break;
       }
@@ -445,7 +452,7 @@ static llama_pos process_image_prompt(
         break;
       }
       if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
-        n_past = chunk_pos_images[i];
+        n_past = chunk_pos_media[i];
         new_n_past = n_past;
         break;
       }
@@ -458,7 +465,8 @@ static llama_pos process_image_prompt(
   size_t num_chunks = mtmd_input_chunks_size(chunks);
   for (size_t i = 0; i < chunk_pos.size(); i++) {
-    fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
+    fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n",
+            i, n_past, chunk_pos[i]);
     // Process chunk only if it's after the current n_past
     if (chunk_pos[i] >= new_n_past) {
@@ -467,16 +475,10 @@ static llama_pos process_image_prompt(
       // Cast away const for mtmd_helper_eval_chunk_single
       int32_t res = mtmd_helper_eval_chunk_single(
-        const_cast<mtmd_context*>(mtmd_ctx),
-        ctx,
-        chunk,
-        n_past,
-        0,
-        params.n_batch, // batch size
-        chunk_logits_last,
-        &new_n_past
-      );
+          const_cast<mtmd_context *>(mtmd_ctx), ctx, chunk, n_past, 0,
+          params.n_batch, // batch size
+          chunk_logits_last, &new_n_past);
       if (res != 0) {
         mtmd_input_chunks_free(chunks);
         throw std::runtime_error("Failed to process chunk");
@@ -485,13 +487,14 @@ static llama_pos process_image_prompt(
     }
   }
-  if (n_past == all_tokens.size() && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
+  if (n_past == all_tokens.size() && n_past > 0 &&
+      all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
     // we have to evaluate at least 1 token to generate logits.
     n_past--;
   }
   // Update sampling context to process token sequences
-  for (auto & token : all_tokens) {
+  for (auto &token : all_tokens) {
     if (token == LLAMA_TOKEN_NULL) {
       continue;
     }
@@ -501,7 +504,7 @@ static llama_pos process_image_prompt(
   sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
-  // Clean up image resources
+  // Clean up media resources
   mtmd_input_chunks_free(chunks);
   return n_past;
 }

package/src/llama.cpp/.github/workflows/build.yml CHANGED Viewed

@@ -351,7 +351,7 @@ jobs:
   ubuntu-22-cmake-musa:
     runs-on: ubuntu-22.04
-    container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
+    container: mthreads/musa:rc4.0.1-mudnn-devel-ubuntu22.04
     steps:
       - name: Clone
@@ -899,7 +899,7 @@ jobs:
         shell: bash
     env:
-      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
+      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
       WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
       ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
     steps: