npm - @fugood/llama.node - Versions diffs - 1.3.3 → 1.3.4 - Mend

@fugood/llama.node 1.3.3 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CMakeLists.txt +7 -3
package/package.json +14 -14
package/src/LlamaCompletionWorker.cpp +33 -33
package/src/LlamaContext.cpp +17 -16

package/CMakeLists.txt CHANGED Viewed

@@ -120,16 +120,20 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT DEFINED GGML_OPENMP OR GGML_O
 endif()
 set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
-set(LLAMA_BUILD_TOOLS ON CACHE BOOL "Build tools")
+set(LLAMA_BUILD_TOOLS OFF CACHE BOOL "Build tools")
+set(LLAMA_BUILD_TESTS OFF CACHE BOOL "Build tests")
+set(LLAMA_BUILD_SERVER OFF CACHE BOOL "Build server")
+set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "Build examples")
 set(LLAMA_CURL OFF CACHE BOOL "Build curl")
+set(LLAMA_INSTALL_VERSION "0.0.0") # TODO: Set the version number (0.0.<BUILD_NUMBER>)
 set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
 add_definitions(-DGGML_MAX_NAME=80)
 add_subdirectory("src/llama.cpp")
+add_subdirectory("src/llama.cpp/tools/mtmd")
 include_directories(
   ${CMAKE_JS_INC}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.3.3",
+  "version": "1.3.4",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,19 +72,19 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-linux-x64": "1.3.3",
-    "@fugood/node-llama-linux-x64-vulkan": "1.3.3",
-    "@fugood/node-llama-linux-x64-cuda": "1.3.3",
-    "@fugood/node-llama-linux-arm64": "1.3.3",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.3.3",
-    "@fugood/node-llama-linux-arm64-cuda": "1.3.3",
-    "@fugood/node-llama-win32-x64": "1.3.3",
-    "@fugood/node-llama-win32-x64-vulkan": "1.3.3",
-    "@fugood/node-llama-win32-x64-cuda": "1.3.3",
-    "@fugood/node-llama-win32-arm64": "1.3.3",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.3.3",
-    "@fugood/node-llama-darwin-x64": "1.3.3",
-    "@fugood/node-llama-darwin-arm64": "1.3.3"
+    "@fugood/node-llama-linux-x64": "1.3.4",
+    "@fugood/node-llama-linux-x64-vulkan": "1.3.4",
+    "@fugood/node-llama-linux-x64-cuda": "1.3.4",
+    "@fugood/node-llama-linux-arm64": "1.3.4",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.3.4",
+    "@fugood/node-llama-linux-arm64-cuda": "1.3.4",
+    "@fugood/node-llama-win32-x64": "1.3.4",
+    "@fugood/node-llama-win32-x64-vulkan": "1.3.4",
+    "@fugood/node-llama-win32-x64-cuda": "1.3.4",
+    "@fugood/node-llama-win32-arm64": "1.3.4",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.3.4",
+    "@fugood/node-llama-darwin-x64": "1.3.4",
+    "@fugood/node-llama-darwin-arm64": "1.3.4"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -9,10 +9,10 @@ Napi::Array TokenProbsToArray(Napi::Env env, llama_context* ctx, const std::vect
   for (size_t i = 0; i < probs.size(); i++) {
     const auto &prob = probs[i];
     Napi::Object token_obj = Napi::Object::New(env);
     std::string token_str = common_token_to_piece(ctx, prob.tok);
     token_obj.Set("content", Napi::String::New(env, token_str));
     Napi::Array token_probs = Napi::Array::New(env);
     for (size_t j = 0; j < prob.probs.size(); j++) {
       const auto &p = prob.probs[j];
@@ -83,10 +83,10 @@ void LlamaCompletionWorker::Execute() {
     }
     auto completion = _rn_ctx->completion;
     // Prepare completion context
     completion->rewind();
     // Set up parameters
     _rn_ctx->params.prompt = _params.prompt;
     _rn_ctx->params.sampling = _params.sampling;
@@ -95,50 +95,50 @@ void LlamaCompletionWorker::Execute() {
     _rn_ctx->params.n_ctx = _params.n_ctx;
     _rn_ctx->params.n_batch = _params.n_batch;
     _rn_ctx->params.ctx_shift = _params.ctx_shift;
     // Set prefill text
     completion->prefill_text = _prefill_text;
     // Set up TTS guide tokens if enabled
     if (_has_vocoder && _rn_ctx->tts_wrapper != nullptr) {
       _rn_ctx->tts_wrapper->guide_tokens = _guide_tokens;
       _rn_ctx->tts_wrapper->next_token_uses_guide_token = true;
     }
     // Initialize sampling
     if (!completion->initSampling()) {
       SetError("Failed to initialize sampling");
       return;
     }
     // Load prompt (handles both text-only and multimodal)
     completion->loadPrompt(_media_paths);
     // Check if context is full after loading prompt
     if (completion->context_full) {
       _result.context_full = true;
       return;
     }
     // Begin completion with chat format and reasoning settings
     completion->beginCompletion(_chat_format, common_reasoning_format_from_name(_reasoning_format), _thinking_forced_open);
     // Main completion loop
     int token_count = 0;
     const int max_tokens = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
     while (completion->has_next_token && !_interrupted && token_count < max_tokens) {
       // Get next token using rn-llama completion
       rnllama::completion_token_output token_output = completion->doCompletion();
       if (token_output.tok == -1) {
         break;
       }
       token_count++;
       std::string token_text = common_token_to_piece(_rn_ctx->ctx, token_output.tok);
       _result.text += token_text;
       // Check for stopping strings after adding the token
       if (!_stop_words.empty()) {
         size_t stop_pos = completion->findStoppingStrings(_result.text, token_text.size(), rnllama::STOP_FULL);
@@ -148,7 +148,7 @@ void LlamaCompletionWorker::Execute() {
           break;
         }
       }
       // Handle streaming callback
       if (_has_callback && !completion->incomplete) {
         struct TokenData {
@@ -160,9 +160,9 @@ void LlamaCompletionWorker::Execute() {
           std::vector<rnllama::completion_token_output> completion_probabilities;
           llama_context* ctx;
         };
         auto partial_output = completion->parseChatOutput(true);
         // Extract completion probabilities if n_probs > 0, similar to iOS implementation
         std::vector<rnllama::completion_token_output> probs_output;
         if (_rn_ctx->params.sampling.n_probs > 0) {
@@ -171,23 +171,23 @@ void LlamaCompletionWorker::Execute() {
           size_t probs_stop_pos = std::min(_sent_token_probs_index + to_send_toks.size(), completion->generated_token_probs.size());
           if (probs_pos < probs_stop_pos) {
             probs_output = std::vector<rnllama::completion_token_output>(
-              completion->generated_token_probs.begin() + probs_pos,
+              completion->generated_token_probs.begin() + probs_pos,
               completion->generated_token_probs.begin() + probs_stop_pos
             );
           }
           _sent_token_probs_index = probs_stop_pos;
         }
         TokenData *token_data = new TokenData{
-          token_text,
-          partial_output.content,
-          partial_output.reasoning_content,
-          partial_output.tool_calls,
+          token_text,
+          partial_output.content,
+          partial_output.reasoning_content,
+          partial_output.tool_calls,
           partial_output.accumulated_text,
           probs_output,
           _rn_ctx->ctx
         };
         _tsfn.BlockingCall(token_data, [](Napi::Env env, Napi::Function jsCallback,
                                           TokenData *data) {
           auto obj = Napi::Object::New(env);
@@ -216,25 +216,25 @@ void LlamaCompletionWorker::Execute() {
             obj.Set("tool_calls", tool_calls);
           }
           obj.Set("accumulated_text", Napi::String::New(env, data->accumulated_text));
           // Add completion_probabilities if available
           if (!data->completion_probabilities.empty()) {
             obj.Set("completion_probabilities", TokenProbsToArray(env, data->ctx, data->completion_probabilities));
           }
           delete data;
           jsCallback.Call({obj});
         });
       }
     }
     // Check stopping conditions
     if (token_count >= max_tokens) {
       _result.stopped_limited = true;
     } else if (!completion->has_next_token && completion->n_remain == 0) {
       _result.stopped_limited = true;
     }
     // Set completion results from rn-llama completion context
     // tokens_evaluated should include both prompt tokens and generated tokens that were processed
     _result.tokens_evaluated = completion->num_prompt_tokens + completion->num_tokens_predicted;
@@ -245,20 +245,20 @@ void LlamaCompletionWorker::Execute() {
     _result.stopped_words = completion->stopped_word;
     _result.stopping_word = completion->stopping_word;
     _result.stopped_limited = completion->stopped_limit;
     // Get audio tokens if TTS is enabled
     if (_has_vocoder && _rn_ctx->tts_wrapper != nullptr) {
       _result.audio_tokens = _rn_ctx->tts_wrapper->audio_tokens;
     }
+    common_perf_print(_rn_ctx->ctx, _rn_ctx->completion->ctx_sampling);
     // End completion
     completion->endCompletion();
   } catch (const std::exception &e) {
     SetError(e.what());
     return;
   }
   if (_onComplete) {
     _onComplete();
   }

package/src/LlamaContext.cpp CHANGED Viewed

@@ -376,6 +376,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
     _rn_ctx = nullptr;
     Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
   }
+  _rn_ctx->attachThreadpoolsIfAvailable();
   // Release progress callback after model is loaded
   if (has_progress_callback) {
@@ -386,7 +387,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
   if (!lora.empty()) {
     _rn_ctx->applyLoraAdapters(lora);
   }
   _info = common_params_get_system_info(params);
 }
@@ -636,7 +637,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
     auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
     auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
     auto now_str = get_option<std::string>(params, "now", "");
     std::map<std::string, std::string> chat_template_kwargs;
     if (params.Has("chat_template_kwargs") && params.Get("chat_template_kwargs").IsObject()) {
       auto kwargs_obj = params.Get("chat_template_kwargs").As<Napi::Object>();
@@ -873,7 +874,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
       auto enable_thinking = get_option<bool>(options, "enable_thinking", true);
       auto add_generation_prompt = get_option<bool>(options, "add_generation_prompt", true);
       auto now_str = get_option<std::string>(options, "now", "");
       std::map<std::string, std::string> chat_template_kwargs;
       if (options.Has("chat_template_kwargs") && options.Get("chat_template_kwargs").IsObject()) {
         auto kwargs_obj = options.Get("chat_template_kwargs").As<Napi::Object>();
@@ -886,7 +887,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
       }
       common_chat_params chatParams;
       try {
         chatParams = _rn_ctx->getFormattedChatWithJinja(
             json_stringify(messages), chat_template,
@@ -1043,7 +1044,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
   }
   auto text = info[0].ToString().Utf8Value();
   std::vector<std::string> media_paths;
   if (info.Length() >= 2 && info[1].IsArray()) {
     // Direct array format: tokenize(text, [media_paths])
     auto media_paths_array = info[1].As<Napi::Array>();
@@ -1051,7 +1052,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
       media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
     }
   }
   auto *worker = new TokenizeWorker(info, _rn_ctx, text, media_paths);
   worker->Queue();
   return worker->Promise();
@@ -1072,7 +1073,7 @@ Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
   for (size_t i = 0; i < tokens.Length(); i++) {
     token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
   }
   auto *worker = new DetokenizeWorker(info, _rn_ctx, token_ids);
   worker->Queue();
   return worker->Promise();
@@ -1112,16 +1113,16 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
     Napi::TypeError::New(env, "Context is disposed")
         .ThrowAsJavaScriptException();
   }
   auto query = info[0].ToString().Utf8Value();
   auto documents_array = info[1].As<Napi::Array>();
   // Convert documents array to vector
   std::vector<std::string> documents;
   for (size_t i = 0; i < documents_array.Length(); i++) {
     documents.push_back(documents_array.Get(i).ToString().Utf8Value());
   }
   auto options = Napi::Object::New(env);
   if (info.Length() >= 3 && info[2].IsObject()) {
     options = info[2].As<Napi::Object>();
@@ -1130,7 +1131,7 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
   common_params rerankParams;
   rerankParams.embedding = true;
   rerankParams.embd_normalize = get_option<int32_t>(options, "normalize", -1);
   auto *worker = new RerankWorker(info, _rn_ctx, query, documents, rerankParams);
   worker->Queue();
   return worker->Promise();
@@ -1379,13 +1380,13 @@ LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
   }
   auto text = info[1].ToString().Utf8Value();
   auto speaker_json = info[0].IsString() ? info[0].ToString().Utf8Value() : "";
   if (!_rn_ctx->tts_wrapper) {
     Napi::Error::New(env, "Vocoder not initialized")
         .ThrowAsJavaScriptException();
     return env.Undefined();
   }
   auto result_data = _rn_ctx->tts_wrapper->getFormattedAudioCompletion(_rn_ctx, speaker_json, text);
   Napi::Object result = Napi::Object::New(env);
   result.Set("prompt", Napi::String::New(env, result_data.prompt));
@@ -1406,13 +1407,13 @@ LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
     return env.Undefined();
   }
   auto text = info[0].ToString().Utf8Value();
   if (!_rn_ctx->tts_wrapper) {
     Napi::Error::New(env, "Vocoder not initialized")
         .ThrowAsJavaScriptException();
     return env.Undefined();
   }
   auto result = _rn_ctx->tts_wrapper->getAudioCompletionGuideTokens(_rn_ctx, text);
   auto tokens = Napi::Int32Array::New(env, result.size());
   memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
@@ -1448,7 +1449,7 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
         .ThrowAsJavaScriptException();
     return env.Undefined();
   }
   auto *worker = new DecodeAudioTokenWorker(info, _rn_ctx, tokens);
   worker->Queue();
   return worker->Promise();