npm - @fugood/llama.node - Versions diffs - 0.4.2 → 0.4.4 - Mend

@fugood/llama.node 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +2 -0
package/package.json +1 -1
package/src/LlamaCompletionWorker.cpp +26 -8
package/src/LlamaCompletionWorker.h +4 -0

package/bin/darwin/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/darwin/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-cuda/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-cuda/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/x64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/node.lib CHANGED Viewed

Binary file

package/lib/binding.ts CHANGED Viewed

@@ -113,6 +113,8 @@ export type LlamaCompletionOptions = {
 export type LlamaCompletionResult = {
   text: string
+  reasoning_content?: string
+  content?: string
   tokens_predicted: number
   tokens_evaluated: number
   truncated: boolean

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "0.4.2",
+  "version": "0.4.4",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -507,6 +507,8 @@ void LlamaCompletionWorker::Execute() {
     _result.tokens_predicted += 1;
     n_input = 1;
     if (_has_callback) {
+      // TODO: When we got possible stop words (startsWith)
+      // we should avoid calling the callback, wait for the next token
       const char *c_token = strdup(token.c_str());
       _tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
                                      const char *value) {
@@ -518,6 +520,8 @@ void LlamaCompletionWorker::Execute() {
     }
     // is it an end of generation?
     if (llama_vocab_is_eog(vocab, new_token_id)) {
+      _result.stopped_eos = true;
+      // TODO: EOS token should be cut
       break;
     }
     // check for stop words
@@ -525,10 +529,16 @@ void LlamaCompletionWorker::Execute() {
       const size_t stop_pos =
           findStoppingStrings(_result.text, token.size(), _stop_words);
       if (stop_pos != std::string::npos) {
+        _result.stopped_words = true;
+        _result.stopping_word = _result.text.substr(stop_pos, token.size());
+        _result.text = _result.text.substr(0, stop_pos - 1);
         break;
       }
     }
   }
+  if (!_result.stopped_eos && !_result.stopped_words) {
+    _result.stopped_limited = true;
+  }
   const auto t_main_end = ggml_time_us();
   _sess->get_mutex().unlock();
   if (_onComplete) {
@@ -549,18 +559,26 @@ void LlamaCompletionWorker::OnOK() {
              Napi::Boolean::New(env, _result.context_full));
   result.Set("text",
              Napi::String::New(env, _result.text.c_str()));
+  result.Set("stopped_eos",
+             Napi::Boolean::New(env, _result.stopped_eos));
+  result.Set("stopped_words",
+             Napi::Boolean::New(env, _result.stopped_words));
+  result.Set("stopping_word",
+             Napi::String::New(env, _result.stopping_word.c_str()));
+  result.Set("stopped_limited",
+             Napi::Boolean::New(env, _result.stopped_limited));
   Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
-  std::string * reasoning_content = nullptr;
-  std::string * content = nullptr;
+  std::string reasoning_content = "";
+  std::string content;
   if (!_stop) {
     try {
       common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
       if (!message.reasoning_content.empty()) {
-        reasoning_content = &message.reasoning_content;
+        reasoning_content = message.reasoning_content;
       }
       if (!message.content.empty()) {
-        content = &message.content;
+        content = message.content;
       }
       for (size_t i = 0; i < message.tool_calls.size(); i++) {
         const auto &tc = message.tool_calls[i];
@@ -582,11 +600,11 @@ void LlamaCompletionWorker::OnOK() {
   if (tool_calls.Length() > 0) {
     result.Set("tool_calls", tool_calls);
   }
-  if (reasoning_content) {
-    result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
+  if (!reasoning_content.empty()) {
+    result.Set("reasoning_content", Napi::String::New(env, reasoning_content.c_str()));
   }
-  if (content) {
-    result.Set("content", Napi::String::New(env, content->c_str()));
+  if (!content.empty()) {
+    result.Set("content", Napi::String::New(env, content.c_str()));
   }
   auto ctx = _sess->context();

package/src/LlamaCompletionWorker.h CHANGED Viewed

@@ -57,5 +57,9 @@ private:
     bool truncated = false;
     bool context_full = false;
     std::string text;
+    bool stopped_eos = false;
+    bool stopped_words = false;
+    std::string stopping_word;
+    bool stopped_limited = false;
   } _result;
 };