@fugood/llama.node 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +1 -0
- package/package.json +14 -14
- package/src/LlamaCompletionWorker.cpp +24 -4
- package/src/LlamaCompletionWorker.h +7 -1
- package/src/LlamaContext.cpp +2 -1
package/lib/binding.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.5",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -70,19 +70,19 @@
|
|
|
70
70
|
"CMakeLists.txt"
|
|
71
71
|
],
|
|
72
72
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.0.
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.0.
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.0.
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.0.
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.0.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.0.
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.0.
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.0.
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.0.
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.0.
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.0.
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.0.
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.0.
|
|
73
|
+
"@fugood/node-llama-linux-x64": "1.0.5",
|
|
74
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.0.5",
|
|
75
|
+
"@fugood/node-llama-linux-x64-cuda": "1.0.5",
|
|
76
|
+
"@fugood/node-llama-linux-arm64": "1.0.5",
|
|
77
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.0.5",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.0.5",
|
|
79
|
+
"@fugood/node-llama-win32-x64": "1.0.5",
|
|
80
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.0.5",
|
|
81
|
+
"@fugood/node-llama-win32-x64-cuda": "1.0.5",
|
|
82
|
+
"@fugood/node-llama-win32-arm64": "1.0.5",
|
|
83
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.0.5",
|
|
84
|
+
"@fugood/node-llama-darwin-x64": "1.0.5",
|
|
85
|
+
"@fugood/node-llama-darwin-arm64": "1.0.5"
|
|
86
86
|
},
|
|
87
87
|
"devDependencies": {
|
|
88
88
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -32,12 +32,15 @@ LlamaCompletionWorker::LlamaCompletionWorker(
|
|
|
32
32
|
bool thinking_forced_open,
|
|
33
33
|
std::string reasoning_format,
|
|
34
34
|
const std::vector<std::string> &media_paths,
|
|
35
|
-
const std::vector<llama_token> &guide_tokens
|
|
35
|
+
const std::vector<llama_token> &guide_tokens,
|
|
36
|
+
bool has_vocoder,
|
|
37
|
+
tts_type tts_type_val)
|
|
36
38
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
37
39
|
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
38
40
|
_thinking_forced_open(thinking_forced_open),
|
|
39
41
|
_reasoning_format(reasoning_format),
|
|
40
|
-
_media_paths(media_paths), _guide_tokens(guide_tokens)
|
|
42
|
+
_media_paths(media_paths), _guide_tokens(guide_tokens),
|
|
43
|
+
_has_vocoder(has_vocoder), _tts_type(tts_type_val) {
|
|
41
44
|
if (!callback.IsEmpty()) {
|
|
42
45
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
43
46
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -153,8 +156,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
153
156
|
// For multimodal input, n_past might already be set
|
|
154
157
|
// Only decode text tokens if we have any input left
|
|
155
158
|
if (n_input > 0) {
|
|
156
|
-
int ret =
|
|
157
|
-
llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
159
|
+
int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
158
160
|
if (ret < 0) {
|
|
159
161
|
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
160
162
|
break;
|
|
@@ -171,6 +173,15 @@ void LlamaCompletionWorker::Execute() {
|
|
|
171
173
|
}
|
|
172
174
|
_next_token_uses_guide_token = (new_token_id == 198);
|
|
173
175
|
common_sampler_accept(sampling.get(), new_token_id, true);
|
|
176
|
+
|
|
177
|
+
// Collect audio tokens for TTS if vocoder is enabled
|
|
178
|
+
if (_has_vocoder) {
|
|
179
|
+
if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
|
|
180
|
+
(new_token_id >= 151672 && new_token_id <= 155772)) {
|
|
181
|
+
_result.audio_tokens.push_back(new_token_id);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
174
185
|
// prepare the next batch
|
|
175
186
|
embd->emplace_back(new_token_id);
|
|
176
187
|
auto token = common_token_to_piece(ctx, new_token_id);
|
|
@@ -291,6 +302,15 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
291
302
|
result.Set("content", Napi::String::New(env, content.c_str()));
|
|
292
303
|
}
|
|
293
304
|
|
|
305
|
+
// Add audio_tokens if vocoder is enabled and we have audio tokens
|
|
306
|
+
if (_has_vocoder && !_result.audio_tokens.empty()) {
|
|
307
|
+
auto audio_tokens = Napi::Array::New(env, _result.audio_tokens.size());
|
|
308
|
+
for (size_t i = 0; i < _result.audio_tokens.size(); i++) {
|
|
309
|
+
audio_tokens.Set(i, Napi::Number::New(env, _result.audio_tokens[i]));
|
|
310
|
+
}
|
|
311
|
+
result.Set("audio_tokens", audio_tokens);
|
|
312
|
+
}
|
|
313
|
+
|
|
294
314
|
auto ctx = _sess->context();
|
|
295
315
|
const auto timings_token = llama_perf_context(ctx);
|
|
296
316
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "common.hpp"
|
|
4
|
+
#include "tts_utils.h"
|
|
4
5
|
#include <atomic>
|
|
5
6
|
#include <functional>
|
|
6
7
|
#include <napi.h>
|
|
@@ -23,7 +24,9 @@ public:
|
|
|
23
24
|
bool thinking_forced_open,
|
|
24
25
|
std::string reasoning_format,
|
|
25
26
|
const std::vector<std::string> &media_paths = {},
|
|
26
|
-
const std::vector<llama_token> &guide_tokens = {}
|
|
27
|
+
const std::vector<llama_token> &guide_tokens = {},
|
|
28
|
+
bool has_vocoder = false,
|
|
29
|
+
tts_type tts_type_val = UNKNOWN);
|
|
27
30
|
|
|
28
31
|
~LlamaCompletionWorker();
|
|
29
32
|
|
|
@@ -52,6 +55,8 @@ private:
|
|
|
52
55
|
bool _stop = false;
|
|
53
56
|
Napi::ThreadSafeFunction _tsfn;
|
|
54
57
|
bool _next_token_uses_guide_token = true;
|
|
58
|
+
bool _has_vocoder;
|
|
59
|
+
tts_type _tts_type;
|
|
55
60
|
struct {
|
|
56
61
|
size_t tokens_evaluated = 0;
|
|
57
62
|
size_t tokens_predicted = 0;
|
|
@@ -62,5 +67,6 @@ private:
|
|
|
62
67
|
bool stopped_words = false;
|
|
63
68
|
std::string stopping_word;
|
|
64
69
|
bool stopped_limited = false;
|
|
70
|
+
std::vector<llama_token> audio_tokens;
|
|
65
71
|
} _result;
|
|
66
72
|
};
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -917,7 +917,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
917
917
|
|
|
918
918
|
auto *worker =
|
|
919
919
|
new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
|
|
920
|
-
chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens
|
|
920
|
+
chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
|
|
921
|
+
_has_vocoder, _tts_type);
|
|
921
922
|
worker->Queue();
|
|
922
923
|
_wip = worker;
|
|
923
924
|
worker->OnComplete([this]() { _wip = nullptr; });
|