npm - @fugood/llama.node - Versions diffs - 1.0.4 → 1.0.5 - Mend

@fugood/llama.node 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/lib/binding.ts +1 -0
package/package.json +14 -14
package/src/LlamaCompletionWorker.cpp +24 -4
package/src/LlamaCompletionWorker.h +7 -1
package/src/LlamaContext.cpp +2 -1

package/lib/binding.ts CHANGED Viewed

@@ -131,6 +131,7 @@ export type LlamaCompletionResult = {
   tokens_evaluated: number
   truncated: boolean
   context_full: boolean
+  audio_tokens?: Array<number>
   timings: {
     prompt_n: number
     prompt_ms: number

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.0.4",
+  "version": "1.0.5",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -70,19 +70,19 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-linux-x64": "1.0.4",
-    "@fugood/node-llama-linux-x64-vulkan": "1.0.4",
-    "@fugood/node-llama-linux-x64-cuda": "1.0.4",
-    "@fugood/node-llama-linux-arm64": "1.0.4",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.0.4",
-    "@fugood/node-llama-linux-arm64-cuda": "1.0.4",
-    "@fugood/node-llama-win32-x64": "1.0.4",
-    "@fugood/node-llama-win32-x64-vulkan": "1.0.4",
-    "@fugood/node-llama-win32-x64-cuda": "1.0.4",
-    "@fugood/node-llama-win32-arm64": "1.0.4",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.0.4",
-    "@fugood/node-llama-darwin-x64": "1.0.4",
-    "@fugood/node-llama-darwin-arm64": "1.0.4"
+    "@fugood/node-llama-linux-x64": "1.0.5",
+    "@fugood/node-llama-linux-x64-vulkan": "1.0.5",
+    "@fugood/node-llama-linux-x64-cuda": "1.0.5",
+    "@fugood/node-llama-linux-arm64": "1.0.5",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.0.5",
+    "@fugood/node-llama-linux-arm64-cuda": "1.0.5",
+    "@fugood/node-llama-win32-x64": "1.0.5",
+    "@fugood/node-llama-win32-x64-vulkan": "1.0.5",
+    "@fugood/node-llama-win32-x64-cuda": "1.0.5",
+    "@fugood/node-llama-win32-arm64": "1.0.5",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.0.5",
+    "@fugood/node-llama-darwin-x64": "1.0.5",
+    "@fugood/node-llama-darwin-arm64": "1.0.5"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -32,12 +32,15 @@ LlamaCompletionWorker::LlamaCompletionWorker(
     bool thinking_forced_open,
     std::string reasoning_format,
     const std::vector<std::string> &media_paths,
-    const std::vector<llama_token> &guide_tokens)
+    const std::vector<llama_token> &guide_tokens,
+    bool has_vocoder,
+    tts_type tts_type_val)
     : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
       _params(params), _stop_words(stop_words), _chat_format(chat_format),
       _thinking_forced_open(thinking_forced_open),
       _reasoning_format(reasoning_format),
-      _media_paths(media_paths), _guide_tokens(guide_tokens) {
+      _media_paths(media_paths), _guide_tokens(guide_tokens),
+      _has_vocoder(has_vocoder), _tts_type(tts_type_val) {
   if (!callback.IsEmpty()) {
     _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
                                           "LlamaCompletionCallback", 0, 1);
@@ -153,8 +156,7 @@ void LlamaCompletionWorker::Execute() {
     // For multimodal input, n_past might already be set
     // Only decode text tokens if we have any input left
     if (n_input > 0) {
-      int ret =
-          llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
+      int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
       if (ret < 0) {
         SetError("Failed to decode token, code: " + std::to_string(ret));
         break;
@@ -171,6 +173,15 @@ void LlamaCompletionWorker::Execute() {
     }
     _next_token_uses_guide_token = (new_token_id == 198);
     common_sampler_accept(sampling.get(), new_token_id, true);
+    // Collect audio tokens for TTS if vocoder is enabled
+    if (_has_vocoder) {
+      if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
+          (new_token_id >= 151672 && new_token_id <= 155772)) {
+        _result.audio_tokens.push_back(new_token_id);
+      }
+    }
     // prepare the next batch
     embd->emplace_back(new_token_id);
     auto token = common_token_to_piece(ctx, new_token_id);
@@ -291,6 +302,15 @@ void LlamaCompletionWorker::OnOK() {
     result.Set("content", Napi::String::New(env, content.c_str()));
   }
+  // Add audio_tokens if vocoder is enabled and we have audio tokens
+  if (_has_vocoder && !_result.audio_tokens.empty()) {
+    auto audio_tokens = Napi::Array::New(env, _result.audio_tokens.size());
+    for (size_t i = 0; i < _result.audio_tokens.size(); i++) {
+      audio_tokens.Set(i, Napi::Number::New(env, _result.audio_tokens[i]));
+    }
+    result.Set("audio_tokens", audio_tokens);
+  }
   auto ctx = _sess->context();
   const auto timings_token = llama_perf_context(ctx);

package/src/LlamaCompletionWorker.h CHANGED Viewed

@@ -1,6 +1,7 @@
 #pragma once
 #include "common.hpp"
+#include "tts_utils.h"
 #include <atomic>
 #include <functional>
 #include <napi.h>
@@ -23,7 +24,9 @@ public:
                         bool thinking_forced_open,
                         std::string reasoning_format,
                         const std::vector<std::string> &media_paths = {},
-                        const std::vector<llama_token> &guide_tokens = {});
+                        const std::vector<llama_token> &guide_tokens = {},
+                        bool has_vocoder = false,
+                        tts_type tts_type_val = UNKNOWN);
   ~LlamaCompletionWorker();
@@ -52,6 +55,8 @@ private:
   bool _stop = false;
   Napi::ThreadSafeFunction _tsfn;
   bool _next_token_uses_guide_token = true;
+  bool _has_vocoder;
+  tts_type _tts_type;
   struct {
     size_t tokens_evaluated = 0;
     size_t tokens_predicted = 0;
@@ -62,5 +67,6 @@ private:
     bool stopped_words = false;
     std::string stopping_word;
     bool stopped_limited = false;
+    std::vector<llama_token> audio_tokens;
   } _result;
 };

package/src/LlamaContext.cpp CHANGED Viewed

@@ -917,7 +917,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
   auto *worker =
       new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
-                                chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens);
+                                chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
+                                _has_vocoder, _tts_type);
   worker->Queue();
   _wip = worker;
   worker->OnComplete([this]() { _wip = nullptr; });