npm - @fugood/llama.node - Versions diffs - 0.3.17 → 0.4.1 - Mend

@fugood/llama.node 0.3.17 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

package/CMakeLists.txt +3 -1
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +39 -2
package/lib/index.js +132 -1
package/lib/index.ts +203 -3
package/package.json +2 -1
package/src/EmbeddingWorker.cpp +1 -1
package/src/LlamaCompletionWorker.cpp +366 -19
package/src/LlamaCompletionWorker.h +30 -10
package/src/LlamaContext.cpp +213 -5
package/src/LlamaContext.h +12 -0
package/src/common.hpp +15 -0
package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
package/src/llama.cpp/.github/workflows/build.yml +41 -762
package/src/llama.cpp/.github/workflows/docker.yml +5 -2
package/src/llama.cpp/.github/workflows/release.yml +716 -0
package/src/llama.cpp/.github/workflows/server.yml +12 -12
package/src/llama.cpp/CMakeLists.txt +5 -17
package/src/llama.cpp/cmake/build-info.cmake +8 -2
package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
package/src/llama.cpp/common/CMakeLists.txt +31 -3
package/src/llama.cpp/common/arg.cpp +48 -29
package/src/llama.cpp/common/chat.cpp +128 -106
package/src/llama.cpp/common/chat.h +2 -0
package/src/llama.cpp/common/common.cpp +37 -1
package/src/llama.cpp/common/common.h +18 -9
package/src/llama.cpp/common/llguidance.cpp +1 -0
package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
package/src/llama.cpp/common/minja/minja.hpp +69 -36
package/src/llama.cpp/common/regex-partial.cpp +204 -0
package/src/llama.cpp/common/regex-partial.h +56 -0
package/src/llama.cpp/common/sampling.cpp +57 -50
package/src/llama.cpp/examples/CMakeLists.txt +2 -23
package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
package/src/llama.cpp/examples/training/finetune.cpp +96 -0
package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
package/src/llama.cpp/ggml/include/ggml.h +10 -7
package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
package/src/llama.cpp/ggml/src/ggml.c +29 -20
package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
package/src/llama.cpp/include/llama.h +52 -11
package/src/llama.cpp/requirements/requirements-all.txt +3 -3
package/src/llama.cpp/scripts/xxd.cmake +1 -1
package/src/llama.cpp/src/CMakeLists.txt +1 -0
package/src/llama.cpp/src/llama-adapter.cpp +6 -0
package/src/llama.cpp/src/llama-arch.cpp +3 -0
package/src/llama.cpp/src/llama-batch.cpp +5 -1
package/src/llama.cpp/src/llama-batch.h +2 -1
package/src/llama.cpp/src/llama-chat.cpp +17 -7
package/src/llama.cpp/src/llama-chat.h +1 -0
package/src/llama.cpp/src/llama-context.cpp +389 -501
package/src/llama.cpp/src/llama-context.h +44 -32
package/src/llama.cpp/src/llama-cparams.h +1 -0
package/src/llama.cpp/src/llama-graph.cpp +20 -38
package/src/llama.cpp/src/llama-graph.h +12 -8
package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
package/src/llama.cpp/src/llama-kv-cache.h +271 -85
package/src/llama.cpp/src/llama-memory.h +11 -1
package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
package/src/llama.cpp/src/llama-model-saver.h +37 -0
package/src/llama.cpp/src/llama-model.cpp +316 -69
package/src/llama.cpp/src/llama-model.h +8 -1
package/src/llama.cpp/src/llama-quant.cpp +15 -13
package/src/llama.cpp/src/llama-sampling.cpp +18 -6
package/src/llama.cpp/src/llama-vocab.cpp +42 -4
package/src/llama.cpp/src/llama-vocab.h +6 -0
package/src/llama.cpp/src/llama.cpp +14 -0
package/src/llama.cpp/tests/CMakeLists.txt +10 -2
package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
package/src/llama.cpp/tests/test-chat.cpp +3 -1
package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
package/src/llama.cpp/tests/test-opt.cpp +33 -21
package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
package/src/llama.cpp/tests/test-sampling.cpp +1 -1
package/src/llama.cpp/tools/CMakeLists.txt +39 -0
package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
package/src/llama.cpp/tools/mtmd/clip.h +99 -0
package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
package/src/llama.cpp/examples/infill/infill.cpp +0 -590
package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
package/src/llama.cpp/examples/llava/clip.h +0 -135
package/src/llama.cpp/examples/llava/llava.cpp +0 -586
package/src/llama.cpp/examples/llava/llava.h +0 -49
package/src/llama.cpp/examples/llava/mtmd.h +0 -168
package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
/package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
/package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
/package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
/package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
/package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
/package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
/package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
/package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0

package/src/LlamaContext.cpp CHANGED Viewed

@@ -12,6 +12,91 @@
 #include "SaveSessionWorker.h"
 #include "TokenizeWorker.h"
+// Helper function for formatted strings (for console logs)
+template<typename ... Args>
+static std::string format_string(const std::string& format, Args ... args) {
+    int size_s = std::snprintf(nullptr, 0, format.c_str(), args ...) + 1; // +1 for null terminator
+    if (size_s <= 0) { return "Error formatting string"; }
+    auto size = static_cast<size_t>(size_s);
+    std::unique_ptr<char[]> buf(new char[size]);
+    std::snprintf(buf.get(), size, format.c_str(), args ...);
+    return std::string(buf.get(), buf.get() + size - 1); // -1 to exclude null terminator
+}
+// Computes FNV-1a hash of the data
+static std::string fnv_hash(const uint8_t* data, size_t len) {
+  const uint64_t fnv_prime = 0x100000001b3ULL;
+  uint64_t hash = 0xcbf29ce484222325ULL;
+  for (size_t i = 0; i < len; ++i) {
+    hash ^= data[i];
+    hash *= fnv_prime;
+  }
+  return std::to_string(hash);
+}
+static const std::string base64_chars =
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+  "abcdefghijklmnopqrstuvwxyz"
+  "0123456789+/";
+// Base64 decoding function
+static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
+  std::vector<uint8_t> decoded;
+  int in_len = encoded_string.size();
+  int i = 0;
+  int j = 0;
+  int in_ = 0;
+  unsigned char char_array_4[4], char_array_3[3];
+  while (in_len-- && (encoded_string[in_] != '=')) {
+    if (isspace(encoded_string[in_])) {
+      in_++;
+      continue;
+    }
+    if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
+      break;
+    }
+    char_array_4[i++] = encoded_string[in_]; in_++;
+    if (i == 4) {
+      for (i = 0; i < 4; i++) {
+        char_array_4[i] = base64_chars.find(char_array_4[i]);
+      }
+      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+      for (i = 0; i < 3; i++) {
+        decoded.push_back(char_array_3[i]);
+      }
+      i = 0;
+    }
+  }
+  if (i) {
+    for (j = i; j < 4; j++) {
+      char_array_4[j] = 0;
+    }
+    for (j = 0; j < 4; j++) {
+      char_array_4[j] = base64_chars.find(char_array_4[j]);
+    }
+    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+    for (j = 0; j < i - 1; j++) {
+      decoded.push_back(char_array_3[j]);
+    }
+  }
+  return decoded;
+}
 using json = nlohmann::ordered_json;
 // loadModelInfo(path: string): object
@@ -116,6 +201,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
        InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
            "getLoadedLoraAdapters",
            static_cast<napi_property_attributes>(napi_enumerable)),
+       InstanceMethod<&LlamaContext::InitMultimodal>(
+           "initMultimodal",
+           static_cast<napi_property_attributes>(napi_enumerable)),
+       InstanceMethod<&LlamaContext::IsMultimodalEnabled>(
+           "isMultimodalEnabled",
+           static_cast<napi_property_attributes>(napi_enumerable)),
+       InstanceMethod<&LlamaContext::ReleaseMultimodal>(
+           "releaseMultimodal",
+           static_cast<napi_property_attributes>(napi_enumerable)),
        InstanceMethod<&LlamaContext::Release>(
            "release", static_cast<napi_property_attributes>(napi_enumerable)),
        StaticMethod<&LlamaContext::ModelInfo>(
@@ -448,7 +542,6 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
     Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
   }
   auto messages = json_stringify(info[0].As<Napi::Array>());
-  printf("messages: %s\n", messages.c_str());
   auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
   auto has_params = info.Length() >= 2;
@@ -545,6 +638,25 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
     }
   }
+  // Process image_paths parameter
+  std::vector<std::string> image_paths;
+  if (options.Has("image_paths")) {
+    if (options.Get("image_paths").IsArray()) {
+      auto image_paths_array = options.Get("image_paths").As<Napi::Array>();
+      for (size_t i = 0; i < image_paths_array.Length(); i++) {
+        image_paths.push_back(image_paths_array.Get(i).ToString().Utf8Value());
+      }
+    } else if (options.Get("image_paths").IsString()) {
+      image_paths.push_back(options.Get("image_paths").ToString().Utf8Value());
+    }
+  }
+  // Check if multimodal is enabled when image_paths are provided
+  if (!image_paths.empty() && !(_has_multimodal && _mtmd_ctx != nullptr)) {
+    Napi::Error::New(env, "Multimodal support must be enabled via initMultimodal to use image_paths").ThrowAsJavaScriptException();
+    return env.Undefined();
+  }
   int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
   common_params params = _sess->params();
@@ -727,17 +839,17 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
   }
   auto *worker =
-      new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
+      new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format, image_paths);
   worker->Queue();
   _wip = worker;
-  worker->onComplete([this]() { _wip = nullptr; });
+  worker->OnComplete([this]() { _wip = nullptr; });
   return worker->Promise();
 }
 // stopCompletion(): void
 void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
   if (_wip != nullptr) {
-    _wip->Stop();
+    _wip->SetStop();
   }
 }
@@ -890,14 +1002,110 @@ Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info)
 Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
   auto env = info.Env();
   if (_wip != nullptr) {
-    _wip->Stop();
+    _wip->SetStop();
   }
   if (_sess == nullptr) {
     auto promise = Napi::Promise::Deferred(env);
     promise.Resolve(env.Undefined());
     return promise.Promise();
   }
+  // Clear the mtmd context reference in the session
+  if (_mtmd_ctx != nullptr) {
+    _sess->set_mtmd_ctx(nullptr);
+  }
   auto *worker = new DisposeWorker(info, std::move(_sess));
   worker->Queue();
   return worker->Promise();
 }
+LlamaContext::~LlamaContext() {
+  if (_mtmd_ctx != nullptr) {
+    mtmd_free(_mtmd_ctx);
+    _mtmd_ctx = nullptr;
+    _has_multimodal = false;
+  }
+}
+// initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
+Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() < 1 || !info[0].IsObject()) {
+    Napi::TypeError::New(env, "Object expected for mmproj path").ThrowAsJavaScriptException();
+  }
+  auto options = info[0].As<Napi::Object>();
+  auto mmproj_path = options.Get("path").ToString().Utf8Value();
+  auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
+  if (mmproj_path.empty()) {
+    Napi::TypeError::New(env, "mmproj path is required").ThrowAsJavaScriptException();
+  }
+  console_log(env, "Initializing multimodal with mmproj path: " + mmproj_path);
+  auto model = _sess->model();
+  auto ctx = _sess->context();
+  if (model == nullptr) {
+    Napi::Error::New(env, "Model not loaded").ThrowAsJavaScriptException();
+    return Napi::Boolean::New(env, false);
+  }
+  if (_mtmd_ctx != nullptr) {
+    mtmd_free(_mtmd_ctx);
+    _mtmd_ctx = nullptr;
+    _has_multimodal = false;
+  }
+  // Initialize mtmd context
+  mtmd_context_params mtmd_params = mtmd_context_params_default();
+  mtmd_params.use_gpu = use_gpu;
+  mtmd_params.print_timings = false;
+  mtmd_params.n_threads = _sess->params().cpuparams.n_threads;
+  mtmd_params.verbosity = (ggml_log_level)GGML_LOG_LEVEL_INFO;
+  console_log(env, format_string("Initializing mtmd context with threads=%d, use_gpu=%d",
+                   mtmd_params.n_threads, mtmd_params.use_gpu ? 1 : 0));
+  _mtmd_ctx = mtmd_init_from_file(mmproj_path.c_str(), model, mtmd_params);
+  if (_mtmd_ctx == nullptr) {
+    Napi::Error::New(env, "Failed to initialize multimodal context").ThrowAsJavaScriptException();
+    return Napi::Boolean::New(env, false);
+  }
+  _has_multimodal = true;
+  // Share the mtmd context with the session
+  _sess->set_mtmd_ctx(_mtmd_ctx);
+  // Check if the model uses M-RoPE or non-causal attention
+  bool uses_mrope = mtmd_decode_use_mrope(_mtmd_ctx);
+  bool uses_non_causal = mtmd_decode_use_non_causal(_mtmd_ctx);
+  console_log(env, format_string("Model multimodal properties: uses_mrope=%d, uses_non_causal=%d",
+             uses_mrope ? 1 : 0, uses_non_causal ? 1 : 0));
+  console_log(env, "Multimodal context initialized successfully with mmproj: " + mmproj_path);
+  return Napi::Boolean::New(env, true);
+}
+// isMultimodalEnabled(): boolean
+Napi::Value LlamaContext::IsMultimodalEnabled(const Napi::CallbackInfo &info) {
+  return Napi::Boolean::New(info.Env(), _has_multimodal && _mtmd_ctx != nullptr);
+}
+// releaseMultimodal(): void
+void LlamaContext::ReleaseMultimodal(const Napi::CallbackInfo &info) {
+  if (_mtmd_ctx != nullptr) {
+    // Clear the mtmd context reference in the session
+    if (_sess != nullptr) {
+      _sess->set_mtmd_ctx(nullptr);
+    }
+    // Free the mtmd context
+    mtmd_free(_mtmd_ctx);
+    _mtmd_ctx = nullptr;
+    _has_multimodal = false;
+  }
+}

package/src/LlamaContext.h CHANGED Viewed

@@ -1,10 +1,13 @@
 #include "common.hpp"
+#include "tools/mtmd/mtmd.h"
+#include "tools/mtmd/clip.h"
 class LlamaCompletionWorker;
 class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
 public:
   LlamaContext(const Napi::CallbackInfo &info);
+  ~LlamaContext();
   static void ToggleNativeLog(const Napi::CallbackInfo &info);
   static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
   static void Init(Napi::Env env, Napi::Object &exports);
@@ -24,6 +27,11 @@ private:
   void RemoveLoraAdapters(const Napi::CallbackInfo &info);
   Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
   Napi::Value Release(const Napi::CallbackInfo &info);
+  // Multimodal methods
+  Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
+  Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
+  void ReleaseMultimodal(const Napi::CallbackInfo &info);
   std::string _info;
   Napi::Object _meta;
@@ -31,4 +39,8 @@ private:
   common_chat_templates_ptr _templates;
   std::vector<common_adapter_lora_info> _lora;
   LlamaCompletionWorker *_wip = nullptr;
+  // Multimodal support
+  mtmd_context *_mtmd_ctx = nullptr;
+  bool _has_multimodal = false;
 };

package/src/common.hpp CHANGED Viewed

@@ -4,6 +4,7 @@
 #include "common/sampling.h"
 #include "chat.h"
 #include "llama.h"
+#include "tools/mtmd/mtmd.h"
 #include <memory>
 #include <mutex>
 #include <napi.h>
@@ -82,10 +83,23 @@ public:
   inline const common_params &params() const { return params_; }
   inline std::mutex &get_mutex() { return mutex; }
+  // Getter for the multimodal context
+  inline const mtmd_context* get_mtmd_ctx() const {
+    return _mtmd_ctx;
+  }
+  // Setter for the multimodal context
+  inline void set_mtmd_ctx(mtmd_context* ctx) {
+    _mtmd_ctx = ctx;
+  }
   void dispose() {
     std::lock_guard<std::mutex> lock(mutex);
     tokens_.clear();
+    // mtmd_ctx is owned by LlamaContext, so we don't free it here
+    _mtmd_ctx = nullptr;
   }
 private:
@@ -93,6 +107,7 @@ private:
   const common_params params_;
   std::vector<llama_token> tokens_{};
   std::mutex mutex;
+  mtmd_context* _mtmd_ctx = nullptr;
 };
 typedef std::shared_ptr<LlamaSession> LlamaSessionPtr;

package/src/llama.cpp/.github/workflows/build-linux-cross.yml CHANGED Viewed

@@ -4,18 +4,25 @@ on:
   workflow_call:
 jobs:
-  ubuntu-latest-riscv64-cpu-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-riscv64-cpu-cross:
+    runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
       - name: Setup Riscv
         run: |
           sudo dpkg --add-architecture riscv64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   gcc-14-riscv64-linux-gnu \
@@ -27,6 +34,7 @@ jobs:
           cmake -B build -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -40,21 +48,25 @@ jobs:
           cmake --build build --config Release -j $(nproc)
-  ubuntu-latest-riscv64-vulkan-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-riscv64-vulkan-cross:
+    runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
       - name: Setup Riscv
         run: |
           sudo dpkg --add-architecture riscv64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   glslc \
@@ -69,6 +81,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -82,21 +95,25 @@ jobs:
           cmake --build build --config Release -j $(nproc)
-  ubuntu-latest-arm64-vulkan-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-arm64-vulkan-cross:
+    runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
       - name: Setup Arm64
         run: |
           sudo dpkg --add-architecture arm64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   glslc \
@@ -110,6 +127,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
@@ -122,3 +140,94 @@ jobs:
                          -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
           cmake --build build --config Release -j $(nproc)
+  ubuntu-24-ppc64el-cpu-cross:
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup PowerPC64le
+        run: |
+          sudo dpkg --add-architecture ppc64el
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  gcc-14-powerpc64le-linux-gnu \
+                  g++-14-powerpc64le-linux-gnu \
+                  libcurl4-openssl-dev:ppc64el
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
+                         -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
+                         -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+          cmake --build build --config Release -j $(nproc)
+  ubuntu-24-ppc64el-vulkan-cross:
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup PowerPC64le
+        run: |
+          sudo dpkg --add-architecture ppc64el
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  glslc \
+                  gcc-14-powerpc64le-linux-gnu \
+                  g++-14-powerpc64le-linux-gnu \
+                  libvulkan-dev:ppc64el \
+                  libcurl4-openssl-dev:ppc64el
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_VULKAN=ON \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
+                         -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
+                         -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+          cmake --build build --config Release -j $(nproc)