npm - @fugood/llama.node - Versions diffs - 1.0.4 → 1.0.6 - Mend

@fugood/llama.node 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/lib/binding.ts +2 -1
package/lib/index.ts +1 -1
package/package.json +14 -14
package/src/LlamaCompletionWorker.cpp +27 -6
package/src/LlamaCompletionWorker.h +7 -1
package/src/LlamaContext.cpp +8 -4

package/lib/binding.ts CHANGED Viewed

@@ -131,6 +131,7 @@ export type LlamaCompletionResult = {
   tokens_evaluated: number
   truncated: boolean
   context_full: boolean
+  audio_tokens?: Array<number>
   timings: {
     prompt_n: number
     prompt_ms: number
@@ -230,7 +231,7 @@ export interface LlamaContext {
    * @param path Path to the vocoder model
    * @returns Promise resolving to true if loading was successful
    */
-  initVocoder(options: { path: string }): Promise<boolean>
+  initVocoder(options: { path: string, n_batch?: number }): Promise<boolean>
   /**
    * Unload the vocoder model

package/lib/index.ts CHANGED Viewed

@@ -286,7 +286,7 @@ class LlamaContextWrapper {
     return this.ctx.getMultimodalSupport()
   }
-  initVocoder(options: { path: string }): Promise<boolean> {
+  initVocoder(options: { path: string, n_batch?: number }): Promise<boolean> {
     return this.ctx.initVocoder(options)
   }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.0.4",
+  "version": "1.0.6",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -70,19 +70,19 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-linux-x64": "1.0.4",
-    "@fugood/node-llama-linux-x64-vulkan": "1.0.4",
-    "@fugood/node-llama-linux-x64-cuda": "1.0.4",
-    "@fugood/node-llama-linux-arm64": "1.0.4",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.0.4",
-    "@fugood/node-llama-linux-arm64-cuda": "1.0.4",
-    "@fugood/node-llama-win32-x64": "1.0.4",
-    "@fugood/node-llama-win32-x64-vulkan": "1.0.4",
-    "@fugood/node-llama-win32-x64-cuda": "1.0.4",
-    "@fugood/node-llama-win32-arm64": "1.0.4",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.0.4",
-    "@fugood/node-llama-darwin-x64": "1.0.4",
-    "@fugood/node-llama-darwin-arm64": "1.0.4"
+    "@fugood/node-llama-linux-x64": "1.0.6",
+    "@fugood/node-llama-linux-x64-vulkan": "1.0.6",
+    "@fugood/node-llama-linux-x64-cuda": "1.0.6",
+    "@fugood/node-llama-linux-arm64": "1.0.6",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.0.6",
+    "@fugood/node-llama-linux-arm64-cuda": "1.0.6",
+    "@fugood/node-llama-win32-x64": "1.0.6",
+    "@fugood/node-llama-win32-x64-vulkan": "1.0.6",
+    "@fugood/node-llama-win32-x64-cuda": "1.0.6",
+    "@fugood/node-llama-win32-arm64": "1.0.6",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.0.6",
+    "@fugood/node-llama-darwin-x64": "1.0.6",
+    "@fugood/node-llama-darwin-arm64": "1.0.6"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -1,5 +1,6 @@
 #include "LlamaCompletionWorker.h"
 #include "LlamaContext.h"
+#include <limits>
 size_t findStoppingStrings(const std::string &text,
                            const size_t last_token_size,
@@ -32,12 +33,15 @@ LlamaCompletionWorker::LlamaCompletionWorker(
     bool thinking_forced_open,
     std::string reasoning_format,
     const std::vector<std::string> &media_paths,
-    const std::vector<llama_token> &guide_tokens)
+    const std::vector<llama_token> &guide_tokens,
+    bool has_vocoder,
+    tts_type tts_type_val)
     : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
       _params(params), _stop_words(stop_words), _chat_format(chat_format),
       _thinking_forced_open(thinking_forced_open),
       _reasoning_format(reasoning_format),
-      _media_paths(media_paths), _guide_tokens(guide_tokens) {
+      _media_paths(media_paths), _guide_tokens(guide_tokens),
+      _has_vocoder(has_vocoder), _tts_type(tts_type_val) {
   if (!callback.IsEmpty()) {
     _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
                                           "LlamaCompletionCallback", 0, 1);
@@ -121,11 +125,11 @@ void LlamaCompletionWorker::Execute() {
     _sess->set_tokens(std::move(prompt_tokens));
   }
-  const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
+  const int max_len = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
   _sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
   auto embd = _sess->tokens_ptr();
-  for (int i = 0; i < max_len || _stop; i++) {
+  for (int i = 0; (i < max_len || _stop) && !_params.vocab_only; i++) {
     // check if we need to remove some tokens
     if (embd->size() >= _params.n_ctx) {
       if (!_params.ctx_shift) {
@@ -153,8 +157,7 @@ void LlamaCompletionWorker::Execute() {
     // For multimodal input, n_past might already be set
     // Only decode text tokens if we have any input left
     if (n_input > 0) {
-      int ret =
-          llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
+      int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
       if (ret < 0) {
         SetError("Failed to decode token, code: " + std::to_string(ret));
         break;
@@ -171,6 +174,15 @@ void LlamaCompletionWorker::Execute() {
     }
     _next_token_uses_guide_token = (new_token_id == 198);
     common_sampler_accept(sampling.get(), new_token_id, true);
+    // Collect audio tokens for TTS if vocoder is enabled
+    if (_has_vocoder) {
+      if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
+          (new_token_id >= 151672 && new_token_id <= 155772)) {
+        _result.audio_tokens.push_back(new_token_id);
+      }
+    }
     // prepare the next batch
     embd->emplace_back(new_token_id);
     auto token = common_token_to_piece(ctx, new_token_id);
@@ -291,6 +303,15 @@ void LlamaCompletionWorker::OnOK() {
     result.Set("content", Napi::String::New(env, content.c_str()));
   }
+  // Add audio_tokens if vocoder is enabled and we have audio tokens
+  if (_has_vocoder && !_result.audio_tokens.empty()) {
+    auto audio_tokens = Napi::Array::New(env, _result.audio_tokens.size());
+    for (size_t i = 0; i < _result.audio_tokens.size(); i++) {
+      audio_tokens.Set(i, Napi::Number::New(env, _result.audio_tokens[i]));
+    }
+    result.Set("audio_tokens", audio_tokens);
+  }
   auto ctx = _sess->context();
   const auto timings_token = llama_perf_context(ctx);

package/src/LlamaCompletionWorker.h CHANGED Viewed

@@ -1,6 +1,7 @@
 #pragma once
 #include "common.hpp"
+#include "tts_utils.h"
 #include <atomic>
 #include <functional>
 #include <napi.h>
@@ -23,7 +24,9 @@ public:
                         bool thinking_forced_open,
                         std::string reasoning_format,
                         const std::vector<std::string> &media_paths = {},
-                        const std::vector<llama_token> &guide_tokens = {});
+                        const std::vector<llama_token> &guide_tokens = {},
+                        bool has_vocoder = false,
+                        tts_type tts_type_val = UNKNOWN);
   ~LlamaCompletionWorker();
@@ -52,6 +55,8 @@ private:
   bool _stop = false;
   Napi::ThreadSafeFunction _tsfn;
   bool _next_token_uses_guide_token = true;
+  bool _has_vocoder;
+  tts_type _tts_type;
   struct {
     size_t tokens_evaluated = 0;
     size_t tokens_predicted = 0;
@@ -62,5 +67,6 @@ private:
     bool stopped_words = false;
     std::string stopping_word;
     bool stopped_limited = false;
+    std::vector<llama_token> audio_tokens;
   } _result;
 };

package/src/LlamaContext.cpp CHANGED Viewed

@@ -917,7 +917,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
   auto *worker =
       new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
-                                chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens);
+                                chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
+                                _has_vocoder, _tts_type);
   worker->Queue();
   _wip = worker;
   worker->OnComplete([this]() { _wip = nullptr; });
@@ -1290,14 +1291,16 @@ tts_type LlamaContext::getTTSType(Napi::Env env, nlohmann::json speaker) {
   return OUTETTS_V0_2;
 }
-// initVocoder(path: string): boolean
+// initVocoder(params?: object): boolean
 Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
   Napi::Env env = info.Env();
   if (info.Length() < 1 || !info[0].IsObject()) {
-    Napi::TypeError::New(env, "Object is expected for vocoder path")
+    Napi::TypeError::New(env, "Object is expected for vocoder options")
         .ThrowAsJavaScriptException();
   }
-  auto vocoder_path = info[0].As<Napi::Object>().Get("path").ToString().Utf8Value();
+  auto options = info[0].As<Napi::Object>();
+  auto vocoder_path = options.Get("path").ToString().Utf8Value();
+  auto n_batch = get_option<int32_t>(options, "n_batch", _sess->params().n_batch);
   if (vocoder_path.empty()) {
     Napi::TypeError::New(env, "vocoder path is required")
         .ThrowAsJavaScriptException();
@@ -1313,6 +1316,7 @@ Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
   _vocoder.params.model.path = vocoder_path;
   _vocoder.params.embedding = true;
   _vocoder.params.ctx_shift = false;
+  _vocoder.params.n_batch = n_batch;
   _vocoder.params.n_ubatch = _vocoder.params.n_batch;
   common_init_result result = common_init_from_params(_vocoder.params);
   if (result.model == nullptr || result.context == nullptr) {