@fugood/llama.node 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.ts CHANGED
@@ -131,6 +131,7 @@ export type LlamaCompletionResult = {
131
131
  tokens_evaluated: number
132
132
  truncated: boolean
133
133
  context_full: boolean
134
+ audio_tokens?: Array<number>
134
135
  timings: {
135
136
  prompt_n: number
136
137
  prompt_ms: number
@@ -230,7 +231,7 @@ export interface LlamaContext {
230
231
  * @param path Path to the vocoder model
231
232
  * @returns Promise resolving to true if loading was successful
232
233
  */
233
- initVocoder(options: { path: string }): Promise<boolean>
234
+ initVocoder(options: { path: string, n_batch?: number }): Promise<boolean>
234
235
 
235
236
  /**
236
237
  * Unload the vocoder model
package/lib/index.ts CHANGED
@@ -286,7 +286,7 @@ class LlamaContextWrapper {
286
286
  return this.ctx.getMultimodalSupport()
287
287
  }
288
288
 
289
- initVocoder(options: { path: string }): Promise<boolean> {
289
+ initVocoder(options: { path: string, n_batch?: number }): Promise<boolean> {
290
290
  return this.ctx.initVocoder(options)
291
291
  }
292
292
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.0.4",
4
+ "version": "1.0.6",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -70,19 +70,19 @@
70
70
  "CMakeLists.txt"
71
71
  ],
72
72
  "optionalDependencies": {
73
- "@fugood/node-llama-linux-x64": "1.0.4",
74
- "@fugood/node-llama-linux-x64-vulkan": "1.0.4",
75
- "@fugood/node-llama-linux-x64-cuda": "1.0.4",
76
- "@fugood/node-llama-linux-arm64": "1.0.4",
77
- "@fugood/node-llama-linux-arm64-vulkan": "1.0.4",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.0.4",
79
- "@fugood/node-llama-win32-x64": "1.0.4",
80
- "@fugood/node-llama-win32-x64-vulkan": "1.0.4",
81
- "@fugood/node-llama-win32-x64-cuda": "1.0.4",
82
- "@fugood/node-llama-win32-arm64": "1.0.4",
83
- "@fugood/node-llama-win32-arm64-vulkan": "1.0.4",
84
- "@fugood/node-llama-darwin-x64": "1.0.4",
85
- "@fugood/node-llama-darwin-arm64": "1.0.4"
73
+ "@fugood/node-llama-linux-x64": "1.0.6",
74
+ "@fugood/node-llama-linux-x64-vulkan": "1.0.6",
75
+ "@fugood/node-llama-linux-x64-cuda": "1.0.6",
76
+ "@fugood/node-llama-linux-arm64": "1.0.6",
77
+ "@fugood/node-llama-linux-arm64-vulkan": "1.0.6",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.0.6",
79
+ "@fugood/node-llama-win32-x64": "1.0.6",
80
+ "@fugood/node-llama-win32-x64-vulkan": "1.0.6",
81
+ "@fugood/node-llama-win32-x64-cuda": "1.0.6",
82
+ "@fugood/node-llama-win32-arm64": "1.0.6",
83
+ "@fugood/node-llama-win32-arm64-vulkan": "1.0.6",
84
+ "@fugood/node-llama-darwin-x64": "1.0.6",
85
+ "@fugood/node-llama-darwin-arm64": "1.0.6"
86
86
  },
87
87
  "devDependencies": {
88
88
  "@babel/preset-env": "^7.24.4",
@@ -1,5 +1,6 @@
1
1
  #include "LlamaCompletionWorker.h"
2
2
  #include "LlamaContext.h"
3
+ #include <limits>
3
4
 
4
5
  size_t findStoppingStrings(const std::string &text,
5
6
  const size_t last_token_size,
@@ -32,12 +33,15 @@ LlamaCompletionWorker::LlamaCompletionWorker(
32
33
  bool thinking_forced_open,
33
34
  std::string reasoning_format,
34
35
  const std::vector<std::string> &media_paths,
35
- const std::vector<llama_token> &guide_tokens)
36
+ const std::vector<llama_token> &guide_tokens,
37
+ bool has_vocoder,
38
+ tts_type tts_type_val)
36
39
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
37
40
  _params(params), _stop_words(stop_words), _chat_format(chat_format),
38
41
  _thinking_forced_open(thinking_forced_open),
39
42
  _reasoning_format(reasoning_format),
40
- _media_paths(media_paths), _guide_tokens(guide_tokens) {
43
+ _media_paths(media_paths), _guide_tokens(guide_tokens),
44
+ _has_vocoder(has_vocoder), _tts_type(tts_type_val) {
41
45
  if (!callback.IsEmpty()) {
42
46
  _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
43
47
  "LlamaCompletionCallback", 0, 1);
@@ -121,11 +125,11 @@ void LlamaCompletionWorker::Execute() {
121
125
  _sess->set_tokens(std::move(prompt_tokens));
122
126
  }
123
127
 
124
- const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
128
+ const int max_len = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
125
129
  _sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
126
130
 
127
131
  auto embd = _sess->tokens_ptr();
128
- for (int i = 0; i < max_len || _stop; i++) {
132
+ for (int i = 0; (i < max_len || _stop) && !_params.vocab_only; i++) {
129
133
  // check if we need to remove some tokens
130
134
  if (embd->size() >= _params.n_ctx) {
131
135
  if (!_params.ctx_shift) {
@@ -153,8 +157,7 @@ void LlamaCompletionWorker::Execute() {
153
157
  // For multimodal input, n_past might already be set
154
158
  // Only decode text tokens if we have any input left
155
159
  if (n_input > 0) {
156
- int ret =
157
- llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
160
+ int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
158
161
  if (ret < 0) {
159
162
  SetError("Failed to decode token, code: " + std::to_string(ret));
160
163
  break;
@@ -171,6 +174,15 @@ void LlamaCompletionWorker::Execute() {
171
174
  }
172
175
  _next_token_uses_guide_token = (new_token_id == 198);
173
176
  common_sampler_accept(sampling.get(), new_token_id, true);
177
+
178
+ // Collect audio tokens for TTS if vocoder is enabled
179
+ if (_has_vocoder) {
180
+ if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
181
+ (new_token_id >= 151672 && new_token_id <= 155772)) {
182
+ _result.audio_tokens.push_back(new_token_id);
183
+ }
184
+ }
185
+
174
186
  // prepare the next batch
175
187
  embd->emplace_back(new_token_id);
176
188
  auto token = common_token_to_piece(ctx, new_token_id);
@@ -291,6 +303,15 @@ void LlamaCompletionWorker::OnOK() {
291
303
  result.Set("content", Napi::String::New(env, content.c_str()));
292
304
  }
293
305
 
306
+ // Add audio_tokens if vocoder is enabled and we have audio tokens
307
+ if (_has_vocoder && !_result.audio_tokens.empty()) {
308
+ auto audio_tokens = Napi::Array::New(env, _result.audio_tokens.size());
309
+ for (size_t i = 0; i < _result.audio_tokens.size(); i++) {
310
+ audio_tokens.Set(i, Napi::Number::New(env, _result.audio_tokens[i]));
311
+ }
312
+ result.Set("audio_tokens", audio_tokens);
313
+ }
314
+
294
315
  auto ctx = _sess->context();
295
316
  const auto timings_token = llama_perf_context(ctx);
296
317
 
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "common.hpp"
4
+ #include "tts_utils.h"
4
5
  #include <atomic>
5
6
  #include <functional>
6
7
  #include <napi.h>
@@ -23,7 +24,9 @@ public:
23
24
  bool thinking_forced_open,
24
25
  std::string reasoning_format,
25
26
  const std::vector<std::string> &media_paths = {},
26
- const std::vector<llama_token> &guide_tokens = {});
27
+ const std::vector<llama_token> &guide_tokens = {},
28
+ bool has_vocoder = false,
29
+ tts_type tts_type_val = UNKNOWN);
27
30
 
28
31
  ~LlamaCompletionWorker();
29
32
 
@@ -52,6 +55,8 @@ private:
52
55
  bool _stop = false;
53
56
  Napi::ThreadSafeFunction _tsfn;
54
57
  bool _next_token_uses_guide_token = true;
58
+ bool _has_vocoder;
59
+ tts_type _tts_type;
55
60
  struct {
56
61
  size_t tokens_evaluated = 0;
57
62
  size_t tokens_predicted = 0;
@@ -62,5 +67,6 @@ private:
62
67
  bool stopped_words = false;
63
68
  std::string stopping_word;
64
69
  bool stopped_limited = false;
70
+ std::vector<llama_token> audio_tokens;
65
71
  } _result;
66
72
  };
@@ -917,7 +917,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
917
917
 
918
918
  auto *worker =
919
919
  new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
920
- chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens);
920
+ chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
921
+ _has_vocoder, _tts_type);
921
922
  worker->Queue();
922
923
  _wip = worker;
923
924
  worker->OnComplete([this]() { _wip = nullptr; });
@@ -1290,14 +1291,16 @@ tts_type LlamaContext::getTTSType(Napi::Env env, nlohmann::json speaker) {
1290
1291
  return OUTETTS_V0_2;
1291
1292
  }
1292
1293
 
1293
- // initVocoder(path: string): boolean
1294
+ // initVocoder(params?: object): boolean
1294
1295
  Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
1295
1296
  Napi::Env env = info.Env();
1296
1297
  if (info.Length() < 1 || !info[0].IsObject()) {
1297
- Napi::TypeError::New(env, "Object is expected for vocoder path")
1298
+ Napi::TypeError::New(env, "Object is expected for vocoder options")
1298
1299
  .ThrowAsJavaScriptException();
1299
1300
  }
1300
- auto vocoder_path = info[0].As<Napi::Object>().Get("path").ToString().Utf8Value();
1301
+ auto options = info[0].As<Napi::Object>();
1302
+ auto vocoder_path = options.Get("path").ToString().Utf8Value();
1303
+ auto n_batch = get_option<int32_t>(options, "n_batch", _sess->params().n_batch);
1301
1304
  if (vocoder_path.empty()) {
1302
1305
  Napi::TypeError::New(env, "vocoder path is required")
1303
1306
  .ThrowAsJavaScriptException();
@@ -1313,6 +1316,7 @@ Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
1313
1316
  _vocoder.params.model.path = vocoder_path;
1314
1317
  _vocoder.params.embedding = true;
1315
1318
  _vocoder.params.ctx_shift = false;
1319
+ _vocoder.params.n_batch = n_batch;
1316
1320
  _vocoder.params.n_ubatch = _vocoder.params.n_batch;
1317
1321
  common_init_result result = common_init_from_params(_vocoder.params);
1318
1322
  if (result.model == nullptr || result.context == nullptr) {