@fugood/llama.node 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +2 -1
- package/lib/index.ts +1 -1
- package/package.json +14 -14
- package/src/LlamaCompletionWorker.cpp +27 -6
- package/src/LlamaCompletionWorker.h +7 -1
- package/src/LlamaContext.cpp +8 -4
package/lib/binding.ts
CHANGED
|
@@ -131,6 +131,7 @@ export type LlamaCompletionResult = {
|
|
|
131
131
|
tokens_evaluated: number
|
|
132
132
|
truncated: boolean
|
|
133
133
|
context_full: boolean
|
|
134
|
+
audio_tokens?: Array<number>
|
|
134
135
|
timings: {
|
|
135
136
|
prompt_n: number
|
|
136
137
|
prompt_ms: number
|
|
@@ -230,7 +231,7 @@ export interface LlamaContext {
|
|
|
230
231
|
* @param path Path to the vocoder model
|
|
231
232
|
* @returns Promise resolving to true if loading was successful
|
|
232
233
|
*/
|
|
233
|
-
initVocoder(options: { path: string }): Promise<boolean>
|
|
234
|
+
initVocoder(options: { path: string, n_batch?: number }): Promise<boolean>
|
|
234
235
|
|
|
235
236
|
/**
|
|
236
237
|
* Unload the vocoder model
|
package/lib/index.ts
CHANGED
|
@@ -286,7 +286,7 @@ class LlamaContextWrapper {
|
|
|
286
286
|
return this.ctx.getMultimodalSupport()
|
|
287
287
|
}
|
|
288
288
|
|
|
289
|
-
initVocoder(options: { path: string }): Promise<boolean> {
|
|
289
|
+
initVocoder(options: { path: string, n_batch?: number }): Promise<boolean> {
|
|
290
290
|
return this.ctx.initVocoder(options)
|
|
291
291
|
}
|
|
292
292
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.6",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -70,19 +70,19 @@
|
|
|
70
70
|
"CMakeLists.txt"
|
|
71
71
|
],
|
|
72
72
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.0.
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.0.
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.0.
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.0.
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.0.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.0.
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.0.
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.0.
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.0.
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.0.
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.0.
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.0.
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.0.
|
|
73
|
+
"@fugood/node-llama-linux-x64": "1.0.6",
|
|
74
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.0.6",
|
|
75
|
+
"@fugood/node-llama-linux-x64-cuda": "1.0.6",
|
|
76
|
+
"@fugood/node-llama-linux-arm64": "1.0.6",
|
|
77
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.0.6",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.0.6",
|
|
79
|
+
"@fugood/node-llama-win32-x64": "1.0.6",
|
|
80
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.0.6",
|
|
81
|
+
"@fugood/node-llama-win32-x64-cuda": "1.0.6",
|
|
82
|
+
"@fugood/node-llama-win32-arm64": "1.0.6",
|
|
83
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.0.6",
|
|
84
|
+
"@fugood/node-llama-darwin-x64": "1.0.6",
|
|
85
|
+
"@fugood/node-llama-darwin-arm64": "1.0.6"
|
|
86
86
|
},
|
|
87
87
|
"devDependencies": {
|
|
88
88
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#include "LlamaCompletionWorker.h"
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
|
+
#include <limits>
|
|
3
4
|
|
|
4
5
|
size_t findStoppingStrings(const std::string &text,
|
|
5
6
|
const size_t last_token_size,
|
|
@@ -32,12 +33,15 @@ LlamaCompletionWorker::LlamaCompletionWorker(
|
|
|
32
33
|
bool thinking_forced_open,
|
|
33
34
|
std::string reasoning_format,
|
|
34
35
|
const std::vector<std::string> &media_paths,
|
|
35
|
-
const std::vector<llama_token> &guide_tokens
|
|
36
|
+
const std::vector<llama_token> &guide_tokens,
|
|
37
|
+
bool has_vocoder,
|
|
38
|
+
tts_type tts_type_val)
|
|
36
39
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
37
40
|
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
38
41
|
_thinking_forced_open(thinking_forced_open),
|
|
39
42
|
_reasoning_format(reasoning_format),
|
|
40
|
-
_media_paths(media_paths), _guide_tokens(guide_tokens)
|
|
43
|
+
_media_paths(media_paths), _guide_tokens(guide_tokens),
|
|
44
|
+
_has_vocoder(has_vocoder), _tts_type(tts_type_val) {
|
|
41
45
|
if (!callback.IsEmpty()) {
|
|
42
46
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
43
47
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -121,11 +125,11 @@ void LlamaCompletionWorker::Execute() {
|
|
|
121
125
|
_sess->set_tokens(std::move(prompt_tokens));
|
|
122
126
|
}
|
|
123
127
|
|
|
124
|
-
const int max_len = _params.n_predict < 0 ?
|
|
128
|
+
const int max_len = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
|
|
125
129
|
_sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
|
|
126
130
|
|
|
127
131
|
auto embd = _sess->tokens_ptr();
|
|
128
|
-
for (int i = 0; i < max_len || _stop; i++) {
|
|
132
|
+
for (int i = 0; (i < max_len || _stop) && !_params.vocab_only; i++) {
|
|
129
133
|
// check if we need to remove some tokens
|
|
130
134
|
if (embd->size() >= _params.n_ctx) {
|
|
131
135
|
if (!_params.ctx_shift) {
|
|
@@ -153,8 +157,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
153
157
|
// For multimodal input, n_past might already be set
|
|
154
158
|
// Only decode text tokens if we have any input left
|
|
155
159
|
if (n_input > 0) {
|
|
156
|
-
int ret =
|
|
157
|
-
llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
160
|
+
int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
158
161
|
if (ret < 0) {
|
|
159
162
|
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
160
163
|
break;
|
|
@@ -171,6 +174,15 @@ void LlamaCompletionWorker::Execute() {
|
|
|
171
174
|
}
|
|
172
175
|
_next_token_uses_guide_token = (new_token_id == 198);
|
|
173
176
|
common_sampler_accept(sampling.get(), new_token_id, true);
|
|
177
|
+
|
|
178
|
+
// Collect audio tokens for TTS if vocoder is enabled
|
|
179
|
+
if (_has_vocoder) {
|
|
180
|
+
if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
|
|
181
|
+
(new_token_id >= 151672 && new_token_id <= 155772)) {
|
|
182
|
+
_result.audio_tokens.push_back(new_token_id);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
174
186
|
// prepare the next batch
|
|
175
187
|
embd->emplace_back(new_token_id);
|
|
176
188
|
auto token = common_token_to_piece(ctx, new_token_id);
|
|
@@ -291,6 +303,15 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
291
303
|
result.Set("content", Napi::String::New(env, content.c_str()));
|
|
292
304
|
}
|
|
293
305
|
|
|
306
|
+
// Add audio_tokens if vocoder is enabled and we have audio tokens
|
|
307
|
+
if (_has_vocoder && !_result.audio_tokens.empty()) {
|
|
308
|
+
auto audio_tokens = Napi::Array::New(env, _result.audio_tokens.size());
|
|
309
|
+
for (size_t i = 0; i < _result.audio_tokens.size(); i++) {
|
|
310
|
+
audio_tokens.Set(i, Napi::Number::New(env, _result.audio_tokens[i]));
|
|
311
|
+
}
|
|
312
|
+
result.Set("audio_tokens", audio_tokens);
|
|
313
|
+
}
|
|
314
|
+
|
|
294
315
|
auto ctx = _sess->context();
|
|
295
316
|
const auto timings_token = llama_perf_context(ctx);
|
|
296
317
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "common.hpp"
|
|
4
|
+
#include "tts_utils.h"
|
|
4
5
|
#include <atomic>
|
|
5
6
|
#include <functional>
|
|
6
7
|
#include <napi.h>
|
|
@@ -23,7 +24,9 @@ public:
|
|
|
23
24
|
bool thinking_forced_open,
|
|
24
25
|
std::string reasoning_format,
|
|
25
26
|
const std::vector<std::string> &media_paths = {},
|
|
26
|
-
const std::vector<llama_token> &guide_tokens = {}
|
|
27
|
+
const std::vector<llama_token> &guide_tokens = {},
|
|
28
|
+
bool has_vocoder = false,
|
|
29
|
+
tts_type tts_type_val = UNKNOWN);
|
|
27
30
|
|
|
28
31
|
~LlamaCompletionWorker();
|
|
29
32
|
|
|
@@ -52,6 +55,8 @@ private:
|
|
|
52
55
|
bool _stop = false;
|
|
53
56
|
Napi::ThreadSafeFunction _tsfn;
|
|
54
57
|
bool _next_token_uses_guide_token = true;
|
|
58
|
+
bool _has_vocoder;
|
|
59
|
+
tts_type _tts_type;
|
|
55
60
|
struct {
|
|
56
61
|
size_t tokens_evaluated = 0;
|
|
57
62
|
size_t tokens_predicted = 0;
|
|
@@ -62,5 +67,6 @@ private:
|
|
|
62
67
|
bool stopped_words = false;
|
|
63
68
|
std::string stopping_word;
|
|
64
69
|
bool stopped_limited = false;
|
|
70
|
+
std::vector<llama_token> audio_tokens;
|
|
65
71
|
} _result;
|
|
66
72
|
};
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -917,7 +917,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
917
917
|
|
|
918
918
|
auto *worker =
|
|
919
919
|
new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
|
|
920
|
-
chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens
|
|
920
|
+
chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
|
|
921
|
+
_has_vocoder, _tts_type);
|
|
921
922
|
worker->Queue();
|
|
922
923
|
_wip = worker;
|
|
923
924
|
worker->OnComplete([this]() { _wip = nullptr; });
|
|
@@ -1290,14 +1291,16 @@ tts_type LlamaContext::getTTSType(Napi::Env env, nlohmann::json speaker) {
|
|
|
1290
1291
|
return OUTETTS_V0_2;
|
|
1291
1292
|
}
|
|
1292
1293
|
|
|
1293
|
-
// initVocoder(
|
|
1294
|
+
// initVocoder(params?: object): boolean
|
|
1294
1295
|
Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
|
|
1295
1296
|
Napi::Env env = info.Env();
|
|
1296
1297
|
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
1297
|
-
Napi::TypeError::New(env, "Object is expected for vocoder
|
|
1298
|
+
Napi::TypeError::New(env, "Object is expected for vocoder options")
|
|
1298
1299
|
.ThrowAsJavaScriptException();
|
|
1299
1300
|
}
|
|
1300
|
-
auto
|
|
1301
|
+
auto options = info[0].As<Napi::Object>();
|
|
1302
|
+
auto vocoder_path = options.Get("path").ToString().Utf8Value();
|
|
1303
|
+
auto n_batch = get_option<int32_t>(options, "n_batch", _sess->params().n_batch);
|
|
1301
1304
|
if (vocoder_path.empty()) {
|
|
1302
1305
|
Napi::TypeError::New(env, "vocoder path is required")
|
|
1303
1306
|
.ThrowAsJavaScriptException();
|
|
@@ -1313,6 +1316,7 @@ Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
|
|
|
1313
1316
|
_vocoder.params.model.path = vocoder_path;
|
|
1314
1317
|
_vocoder.params.embedding = true;
|
|
1315
1318
|
_vocoder.params.ctx_shift = false;
|
|
1319
|
+
_vocoder.params.n_batch = n_batch;
|
|
1316
1320
|
_vocoder.params.n_ubatch = _vocoder.params.n_batch;
|
|
1317
1321
|
common_init_result result = common_init_from_params(_vocoder.params);
|
|
1318
1322
|
if (result.model == nullptr || result.context == nullptr) {
|