@fugood/llama.node 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.ts CHANGED
@@ -131,6 +131,7 @@ export type LlamaCompletionResult = {
131
131
  tokens_evaluated: number
132
132
  truncated: boolean
133
133
  context_full: boolean
134
+ audio_tokens?: Array<number>
134
135
  timings: {
135
136
  prompt_n: number
136
137
  prompt_ms: number
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.0.4",
4
+ "version": "1.0.5",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -70,19 +70,19 @@
70
70
  "CMakeLists.txt"
71
71
  ],
72
72
  "optionalDependencies": {
73
- "@fugood/node-llama-linux-x64": "1.0.4",
74
- "@fugood/node-llama-linux-x64-vulkan": "1.0.4",
75
- "@fugood/node-llama-linux-x64-cuda": "1.0.4",
76
- "@fugood/node-llama-linux-arm64": "1.0.4",
77
- "@fugood/node-llama-linux-arm64-vulkan": "1.0.4",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.0.4",
79
- "@fugood/node-llama-win32-x64": "1.0.4",
80
- "@fugood/node-llama-win32-x64-vulkan": "1.0.4",
81
- "@fugood/node-llama-win32-x64-cuda": "1.0.4",
82
- "@fugood/node-llama-win32-arm64": "1.0.4",
83
- "@fugood/node-llama-win32-arm64-vulkan": "1.0.4",
84
- "@fugood/node-llama-darwin-x64": "1.0.4",
85
- "@fugood/node-llama-darwin-arm64": "1.0.4"
73
+ "@fugood/node-llama-linux-x64": "1.0.5",
74
+ "@fugood/node-llama-linux-x64-vulkan": "1.0.5",
75
+ "@fugood/node-llama-linux-x64-cuda": "1.0.5",
76
+ "@fugood/node-llama-linux-arm64": "1.0.5",
77
+ "@fugood/node-llama-linux-arm64-vulkan": "1.0.5",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.0.5",
79
+ "@fugood/node-llama-win32-x64": "1.0.5",
80
+ "@fugood/node-llama-win32-x64-vulkan": "1.0.5",
81
+ "@fugood/node-llama-win32-x64-cuda": "1.0.5",
82
+ "@fugood/node-llama-win32-arm64": "1.0.5",
83
+ "@fugood/node-llama-win32-arm64-vulkan": "1.0.5",
84
+ "@fugood/node-llama-darwin-x64": "1.0.5",
85
+ "@fugood/node-llama-darwin-arm64": "1.0.5"
86
86
  },
87
87
  "devDependencies": {
88
88
  "@babel/preset-env": "^7.24.4",
@@ -32,12 +32,15 @@ LlamaCompletionWorker::LlamaCompletionWorker(
32
32
  bool thinking_forced_open,
33
33
  std::string reasoning_format,
34
34
  const std::vector<std::string> &media_paths,
35
- const std::vector<llama_token> &guide_tokens)
35
+ const std::vector<llama_token> &guide_tokens,
36
+ bool has_vocoder,
37
+ tts_type tts_type_val)
36
38
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
37
39
  _params(params), _stop_words(stop_words), _chat_format(chat_format),
38
40
  _thinking_forced_open(thinking_forced_open),
39
41
  _reasoning_format(reasoning_format),
40
- _media_paths(media_paths), _guide_tokens(guide_tokens) {
42
+ _media_paths(media_paths), _guide_tokens(guide_tokens),
43
+ _has_vocoder(has_vocoder), _tts_type(tts_type_val) {
41
44
  if (!callback.IsEmpty()) {
42
45
  _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
43
46
  "LlamaCompletionCallback", 0, 1);
@@ -153,8 +156,7 @@ void LlamaCompletionWorker::Execute() {
153
156
  // For multimodal input, n_past might already be set
154
157
  // Only decode text tokens if we have any input left
155
158
  if (n_input > 0) {
156
- int ret =
157
- llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
159
+ int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
158
160
  if (ret < 0) {
159
161
  SetError("Failed to decode token, code: " + std::to_string(ret));
160
162
  break;
@@ -171,6 +173,15 @@ void LlamaCompletionWorker::Execute() {
171
173
  }
172
174
  _next_token_uses_guide_token = (new_token_id == 198);
173
175
  common_sampler_accept(sampling.get(), new_token_id, true);
176
+
177
+ // Collect audio tokens for TTS if vocoder is enabled
178
+ if (_has_vocoder) {
179
+ if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
180
+ (new_token_id >= 151672 && new_token_id <= 155772)) {
181
+ _result.audio_tokens.push_back(new_token_id);
182
+ }
183
+ }
184
+
174
185
  // prepare the next batch
175
186
  embd->emplace_back(new_token_id);
176
187
  auto token = common_token_to_piece(ctx, new_token_id);
@@ -291,6 +302,15 @@ void LlamaCompletionWorker::OnOK() {
291
302
  result.Set("content", Napi::String::New(env, content.c_str()));
292
303
  }
293
304
 
305
+ // Add audio_tokens if vocoder is enabled and we have audio tokens
306
+ if (_has_vocoder && !_result.audio_tokens.empty()) {
307
+ auto audio_tokens = Napi::Array::New(env, _result.audio_tokens.size());
308
+ for (size_t i = 0; i < _result.audio_tokens.size(); i++) {
309
+ audio_tokens.Set(i, Napi::Number::New(env, _result.audio_tokens[i]));
310
+ }
311
+ result.Set("audio_tokens", audio_tokens);
312
+ }
313
+
294
314
  auto ctx = _sess->context();
295
315
  const auto timings_token = llama_perf_context(ctx);
296
316
 
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "common.hpp"
4
+ #include "tts_utils.h"
4
5
  #include <atomic>
5
6
  #include <functional>
6
7
  #include <napi.h>
@@ -23,7 +24,9 @@ public:
23
24
  bool thinking_forced_open,
24
25
  std::string reasoning_format,
25
26
  const std::vector<std::string> &media_paths = {},
26
- const std::vector<llama_token> &guide_tokens = {});
27
+ const std::vector<llama_token> &guide_tokens = {},
28
+ bool has_vocoder = false,
29
+ tts_type tts_type_val = UNKNOWN);
27
30
 
28
31
  ~LlamaCompletionWorker();
29
32
 
@@ -52,6 +55,8 @@ private:
52
55
  bool _stop = false;
53
56
  Napi::ThreadSafeFunction _tsfn;
54
57
  bool _next_token_uses_guide_token = true;
58
+ bool _has_vocoder;
59
+ tts_type _tts_type;
55
60
  struct {
56
61
  size_t tokens_evaluated = 0;
57
62
  size_t tokens_predicted = 0;
@@ -62,5 +67,6 @@ private:
62
67
  bool stopped_words = false;
63
68
  std::string stopping_word;
64
69
  bool stopped_limited = false;
70
+ std::vector<llama_token> audio_tokens;
65
71
  } _result;
66
72
  };
@@ -917,7 +917,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
917
917
 
918
918
  auto *worker =
919
919
  new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
920
- chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens);
920
+ chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
921
+ _has_vocoder, _tts_type);
921
922
  worker->Queue();
922
923
  _wip = worker;
923
924
  worker->OnComplete([this]() { _wip = nullptr; });