@fugood/llama.node 0.4.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/lib/binding.ts +66 -6
- package/lib/index.js +59 -17
- package/lib/index.ts +74 -23
- package/package.json +1 -1
- package/src/DecodeAudioTokenWorker.cpp +40 -0
- package/src/DecodeAudioTokenWorker.h +22 -0
- package/src/EmbeddingWorker.cpp +7 -5
- package/src/LlamaCompletionWorker.cpp +68 -54
- package/src/LlamaCompletionWorker.h +7 -8
- package/src/LlamaContext.cpp +551 -235
- package/src/LlamaContext.h +26 -4
- package/src/LoadSessionWorker.cpp +4 -2
- package/src/SaveSessionWorker.cpp +10 -6
- package/src/TokenizeWorker.cpp +23 -14
- package/src/TokenizeWorker.h +2 -2
- package/src/addons.cc +8 -11
- package/src/common.hpp +129 -126
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
- package/src/tts_utils.cpp +342 -0
- package/src/tts_utils.h +62 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,9 +1,5 @@
|
|
|
1
|
-
#include "ggml.h"
|
|
2
|
-
#include "gguf.h"
|
|
3
|
-
#include "llama-impl.h"
|
|
4
|
-
#include "json.hpp"
|
|
5
|
-
#include "json-schema-to-grammar.h"
|
|
6
1
|
#include "LlamaContext.h"
|
|
2
|
+
#include "DecodeAudioTokenWorker.h"
|
|
7
3
|
#include "DetokenizeWorker.h"
|
|
8
4
|
#include "DisposeWorker.h"
|
|
9
5
|
#include "EmbeddingWorker.h"
|
|
@@ -11,33 +7,42 @@
|
|
|
11
7
|
#include "LoadSessionWorker.h"
|
|
12
8
|
#include "SaveSessionWorker.h"
|
|
13
9
|
#include "TokenizeWorker.h"
|
|
10
|
+
#include "ggml.h"
|
|
11
|
+
#include "gguf.h"
|
|
12
|
+
#include "json-schema-to-grammar.h"
|
|
13
|
+
#include "json.hpp"
|
|
14
|
+
#include "llama-impl.h"
|
|
14
15
|
|
|
16
|
+
#include <atomic>
|
|
15
17
|
#include <mutex>
|
|
16
18
|
#include <queue>
|
|
17
|
-
#include <atomic>
|
|
18
19
|
|
|
19
20
|
// Helper function for formatted strings (for console logs)
|
|
20
|
-
template<typename
|
|
21
|
-
static std::string format_string(const std::string&
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
template <typename... Args>
|
|
22
|
+
static std::string format_string(const std::string &format, Args... args) {
|
|
23
|
+
int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) +
|
|
24
|
+
1; // +1 for null terminator
|
|
25
|
+
if (size_s <= 0) {
|
|
26
|
+
return "Error formatting string";
|
|
27
|
+
}
|
|
28
|
+
auto size = static_cast<size_t>(size_s);
|
|
29
|
+
std::unique_ptr<char[]> buf(new char[size]);
|
|
30
|
+
std::snprintf(buf.get(), size, format.c_str(), args...);
|
|
31
|
+
return std::string(buf.get(),
|
|
32
|
+
buf.get() + size - 1); // -1 to exclude null terminator
|
|
28
33
|
}
|
|
29
34
|
|
|
30
35
|
using json = nlohmann::ordered_json;
|
|
31
36
|
|
|
32
37
|
// loadModelInfo(path: string): object
|
|
33
|
-
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo&
|
|
38
|
+
Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo &info) {
|
|
34
39
|
Napi::Env env = info.Env();
|
|
35
40
|
struct gguf_init_params params = {
|
|
36
|
-
|
|
37
|
-
|
|
41
|
+
/*.no_alloc = */ false,
|
|
42
|
+
/*.ctx = */ NULL,
|
|
38
43
|
};
|
|
39
44
|
std::string path = info[0].ToString().Utf8Value();
|
|
40
|
-
|
|
45
|
+
|
|
41
46
|
// Convert Napi::Array to vector<string>
|
|
42
47
|
std::vector<std::string> skip;
|
|
43
48
|
if (info.Length() > 1 && info[1].IsArray()) {
|
|
@@ -47,7 +52,7 @@ Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
|
47
52
|
}
|
|
48
53
|
}
|
|
49
54
|
|
|
50
|
-
struct gguf_context *
|
|
55
|
+
struct gguf_context *ctx = gguf_init_from_file(path.c_str(), params);
|
|
51
56
|
|
|
52
57
|
Napi::Object metadata = Napi::Object::New(env);
|
|
53
58
|
if (std::find(skip.begin(), skip.end(), "version") == skip.end()) {
|
|
@@ -57,7 +62,8 @@ Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
|
57
62
|
metadata.Set("alignment", Napi::Number::New(env, gguf_get_alignment(ctx)));
|
|
58
63
|
}
|
|
59
64
|
if (std::find(skip.begin(), skip.end(), "data_offset") == skip.end()) {
|
|
60
|
-
metadata.Set("data_offset",
|
|
65
|
+
metadata.Set("data_offset",
|
|
66
|
+
Napi::Number::New(env, gguf_get_data_offset(ctx)));
|
|
61
67
|
}
|
|
62
68
|
|
|
63
69
|
// kv
|
|
@@ -65,7 +71,7 @@ Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
|
|
|
65
71
|
const int n_kv = gguf_get_n_kv(ctx);
|
|
66
72
|
|
|
67
73
|
for (int i = 0; i < n_kv; ++i) {
|
|
68
|
-
const char *
|
|
74
|
+
const char *key = gguf_get_key(ctx, i);
|
|
69
75
|
if (std::find(skip.begin(), skip.end(), key) != skip.end()) {
|
|
70
76
|
continue;
|
|
71
77
|
}
|
|
@@ -135,6 +141,27 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
135
141
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
136
142
|
StaticMethod<&LlamaContext::ToggleNativeLog>(
|
|
137
143
|
"toggleNativeLog",
|
|
144
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
145
|
+
InstanceMethod<&LlamaContext::GetMultimodalSupport>(
|
|
146
|
+
"getMultimodalSupport",
|
|
147
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
148
|
+
InstanceMethod<&LlamaContext::InitVocoder>(
|
|
149
|
+
"initVocoder",
|
|
150
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
151
|
+
InstanceMethod<&LlamaContext::ReleaseVocoder>(
|
|
152
|
+
"releaseVocoder",
|
|
153
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
154
|
+
InstanceMethod<&LlamaContext::IsVocoderEnabled>(
|
|
155
|
+
"isVocoderEnabled",
|
|
156
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
157
|
+
InstanceMethod<&LlamaContext::GetFormattedAudioCompletion>(
|
|
158
|
+
"getFormattedAudioCompletion",
|
|
159
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
160
|
+
InstanceMethod<&LlamaContext::GetAudioCompletionGuideTokens>(
|
|
161
|
+
"getAudioCompletionGuideTokens",
|
|
162
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
163
|
+
InstanceMethod<&LlamaContext::DecodeAudioTokens>(
|
|
164
|
+
"decodeAudioTokens",
|
|
138
165
|
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
139
166
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
140
167
|
*constructor = Napi::Persistent(func);
|
|
@@ -145,19 +172,13 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
145
172
|
}
|
|
146
173
|
|
|
147
174
|
const std::vector<ggml_type> kv_cache_types = {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
GGML_TYPE_Q8_0,
|
|
152
|
-
GGML_TYPE_Q4_0,
|
|
153
|
-
GGML_TYPE_Q4_1,
|
|
154
|
-
GGML_TYPE_IQ4_NL,
|
|
155
|
-
GGML_TYPE_Q5_0,
|
|
156
|
-
GGML_TYPE_Q5_1,
|
|
175
|
+
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
|
|
176
|
+
GGML_TYPE_Q8_0, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
|
|
177
|
+
GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
157
178
|
};
|
|
158
179
|
|
|
159
|
-
static ggml_type kv_cache_type_from_str(const std::string &
|
|
160
|
-
for (const auto &
|
|
180
|
+
static ggml_type kv_cache_type_from_str(const std::string &s) {
|
|
181
|
+
for (const auto &type : kv_cache_types) {
|
|
161
182
|
if (ggml_type_name(type) == s) {
|
|
162
183
|
return type;
|
|
163
184
|
}
|
|
@@ -165,12 +186,17 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
|
|
|
165
186
|
throw std::runtime_error("Unsupported cache type: " + s);
|
|
166
187
|
}
|
|
167
188
|
|
|
168
|
-
static int32_t pooling_type_from_str(const std::string &
|
|
169
|
-
if (s == "none")
|
|
170
|
-
|
|
171
|
-
if (s == "
|
|
172
|
-
|
|
173
|
-
if (s == "
|
|
189
|
+
static int32_t pooling_type_from_str(const std::string &s) {
|
|
190
|
+
if (s == "none")
|
|
191
|
+
return LLAMA_POOLING_TYPE_NONE;
|
|
192
|
+
if (s == "mean")
|
|
193
|
+
return LLAMA_POOLING_TYPE_MEAN;
|
|
194
|
+
if (s == "cls")
|
|
195
|
+
return LLAMA_POOLING_TYPE_CLS;
|
|
196
|
+
if (s == "last")
|
|
197
|
+
return LLAMA_POOLING_TYPE_LAST;
|
|
198
|
+
if (s == "rank")
|
|
199
|
+
return LLAMA_POOLING_TYPE_RANK;
|
|
174
200
|
return LLAMA_POOLING_TYPE_UNSPECIFIED;
|
|
175
201
|
}
|
|
176
202
|
|
|
@@ -197,7 +223,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
197
223
|
|
|
198
224
|
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
199
225
|
|
|
200
|
-
std::string reasoning_format =
|
|
226
|
+
std::string reasoning_format =
|
|
227
|
+
get_option<std::string>(options, "reasoning_format", "none");
|
|
201
228
|
if (reasoning_format == "deepseek") {
|
|
202
229
|
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
203
230
|
} else {
|
|
@@ -213,16 +240,17 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
213
240
|
params.n_ubatch = params.n_batch;
|
|
214
241
|
}
|
|
215
242
|
params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
|
|
216
|
-
params.pooling_type = (enum llama_pooling_type)
|
|
217
|
-
|
|
218
|
-
);
|
|
243
|
+
params.pooling_type = (enum llama_pooling_type)pooling_type_from_str(
|
|
244
|
+
get_option<std::string>(options, "pooling_type", "").c_str());
|
|
219
245
|
|
|
220
246
|
params.cpuparams.n_threads =
|
|
221
247
|
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
|
|
222
248
|
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
|
|
223
249
|
params.flash_attn = get_option<bool>(options, "flash_attn", false);
|
|
224
|
-
params.cache_type_k = kv_cache_type_from_str(
|
|
225
|
-
|
|
250
|
+
params.cache_type_k = kv_cache_type_from_str(
|
|
251
|
+
get_option<std::string>(options, "cache_type_k", "f16").c_str());
|
|
252
|
+
params.cache_type_v = kv_cache_type_from_str(
|
|
253
|
+
get_option<std::string>(options, "cache_type_v", "f16").c_str());
|
|
226
254
|
params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
|
|
227
255
|
|
|
228
256
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
@@ -293,8 +321,9 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
|
293
321
|
return Napi::String::New(info.Env(), _info);
|
|
294
322
|
}
|
|
295
323
|
|
|
296
|
-
bool validateModelChatTemplate(const struct llama_model *
|
|
297
|
-
|
|
324
|
+
bool validateModelChatTemplate(const struct llama_model *model,
|
|
325
|
+
const bool use_jinja, const char *name) {
|
|
326
|
+
const char *tmpl = llama_model_chat_template(model, name);
|
|
298
327
|
if (tmpl == nullptr) {
|
|
299
328
|
return false;
|
|
300
329
|
}
|
|
@@ -320,68 +349,68 @@ extern "C" void cleanup_logging();
|
|
|
320
349
|
void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
|
|
321
350
|
Napi::Env env = info.Env();
|
|
322
351
|
bool enable = info[0].ToBoolean().Value();
|
|
323
|
-
|
|
352
|
+
|
|
324
353
|
if (enable) {
|
|
325
354
|
if (!info[1].IsFunction()) {
|
|
326
|
-
Napi::TypeError::New(env, "Callback function required")
|
|
355
|
+
Napi::TypeError::New(env, "Callback function required")
|
|
356
|
+
.ThrowAsJavaScriptException();
|
|
327
357
|
return;
|
|
328
358
|
}
|
|
329
|
-
|
|
359
|
+
|
|
330
360
|
// First clean up existing thread-safe function if any
|
|
331
361
|
if (g_logging_enabled) {
|
|
332
362
|
g_tsfn.Release();
|
|
333
363
|
g_logging_enabled = false;
|
|
334
364
|
}
|
|
335
|
-
|
|
365
|
+
|
|
336
366
|
// Create thread-safe function that can be called from any thread
|
|
337
|
-
g_tsfn = Napi::ThreadSafeFunction::New(
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
1,
|
|
343
|
-
[](Napi::Env) {
|
|
344
|
-
// Finalizer callback - nothing needed here
|
|
345
|
-
}
|
|
346
|
-
);
|
|
367
|
+
g_tsfn = Napi::ThreadSafeFunction::New(env, info[1].As<Napi::Function>(),
|
|
368
|
+
"LLAMA Logger", 0, 1, [](Napi::Env) {
|
|
369
|
+
// Finalizer callback - nothing
|
|
370
|
+
// needed here
|
|
371
|
+
});
|
|
347
372
|
|
|
348
373
|
g_logging_enabled = true;
|
|
349
|
-
|
|
374
|
+
|
|
350
375
|
// Set up log callback
|
|
351
|
-
llama_log_set(
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
376
|
+
llama_log_set(
|
|
377
|
+
[](ggml_log_level level, const char *text, void *user_data) {
|
|
378
|
+
// First call the default logger
|
|
379
|
+
llama_log_callback_default(level, text, user_data);
|
|
380
|
+
|
|
381
|
+
if (!g_logging_enabled)
|
|
382
|
+
return;
|
|
383
|
+
|
|
384
|
+
// Determine log level string
|
|
385
|
+
std::string level_str = "";
|
|
386
|
+
if (level == GGML_LOG_LEVEL_ERROR) {
|
|
387
|
+
level_str = "error";
|
|
388
|
+
} else if (level == GGML_LOG_LEVEL_INFO) {
|
|
389
|
+
level_str = "info";
|
|
390
|
+
} else if (level == GGML_LOG_LEVEL_WARN) {
|
|
391
|
+
level_str = "warn";
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Create a heap-allocated copy of the data
|
|
395
|
+
auto *data = new LogMessage{level_str, text};
|
|
396
|
+
|
|
397
|
+
// Queue callback to be executed on the JavaScript thread
|
|
398
|
+
auto status = g_tsfn.BlockingCall(
|
|
399
|
+
data,
|
|
400
|
+
[](Napi::Env env, Napi::Function jsCallback, LogMessage *data) {
|
|
401
|
+
// This code runs on the JavaScript thread
|
|
402
|
+
jsCallback.Call({Napi::String::New(env, data->level),
|
|
403
|
+
Napi::String::New(env, data->text)});
|
|
404
|
+
delete data;
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
// If the call failed (e.g., runtime is shutting down), clean up the
|
|
408
|
+
// data
|
|
409
|
+
if (status != napi_ok) {
|
|
410
|
+
delete data;
|
|
411
|
+
}
|
|
412
|
+
},
|
|
413
|
+
nullptr);
|
|
385
414
|
} else {
|
|
386
415
|
// Disable logging
|
|
387
416
|
if (g_logging_enabled) {
|
|
@@ -419,22 +448,47 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
419
448
|
Napi::Object minja = Napi::Object::New(info.Env());
|
|
420
449
|
minja.Set("default", validateModelChatTemplate(model, true, ""));
|
|
421
450
|
Napi::Object defaultCaps = Napi::Object::New(info.Env());
|
|
422
|
-
defaultCaps.Set(
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
defaultCaps.Set(
|
|
426
|
-
|
|
427
|
-
|
|
451
|
+
defaultCaps.Set(
|
|
452
|
+
"tools",
|
|
453
|
+
_templates.get()->template_default->original_caps().supports_tools);
|
|
454
|
+
defaultCaps.Set(
|
|
455
|
+
"toolCalls",
|
|
456
|
+
_templates.get()->template_default->original_caps().supports_tool_calls);
|
|
457
|
+
defaultCaps.Set("toolResponses", _templates.get()
|
|
458
|
+
->template_default->original_caps()
|
|
459
|
+
.supports_tool_responses);
|
|
460
|
+
defaultCaps.Set(
|
|
461
|
+
"systemRole",
|
|
462
|
+
_templates.get()->template_default->original_caps().supports_system_role);
|
|
463
|
+
defaultCaps.Set("parallelToolCalls", _templates.get()
|
|
464
|
+
->template_default->original_caps()
|
|
465
|
+
.supports_parallel_tool_calls);
|
|
466
|
+
defaultCaps.Set("toolCallId", _templates.get()
|
|
467
|
+
->template_default->original_caps()
|
|
468
|
+
.supports_tool_call_id);
|
|
428
469
|
minja.Set("defaultCaps", defaultCaps);
|
|
429
470
|
minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
|
|
430
471
|
if (_templates.get()->template_tool_use) {
|
|
431
472
|
Napi::Object toolUseCaps = Napi::Object::New(info.Env());
|
|
432
|
-
toolUseCaps.Set(
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
toolUseCaps.Set("
|
|
436
|
-
|
|
437
|
-
|
|
473
|
+
toolUseCaps.Set(
|
|
474
|
+
"tools",
|
|
475
|
+
_templates.get()->template_tool_use->original_caps().supports_tools);
|
|
476
|
+
toolUseCaps.Set("toolCalls", _templates.get()
|
|
477
|
+
->template_tool_use->original_caps()
|
|
478
|
+
.supports_tool_calls);
|
|
479
|
+
toolUseCaps.Set("toolResponses", _templates.get()
|
|
480
|
+
->template_tool_use->original_caps()
|
|
481
|
+
.supports_tool_responses);
|
|
482
|
+
toolUseCaps.Set("systemRole", _templates.get()
|
|
483
|
+
->template_tool_use->original_caps()
|
|
484
|
+
.supports_system_role);
|
|
485
|
+
toolUseCaps.Set("parallelToolCalls",
|
|
486
|
+
_templates.get()
|
|
487
|
+
->template_tool_use->original_caps()
|
|
488
|
+
.supports_parallel_tool_calls);
|
|
489
|
+
toolUseCaps.Set("toolCallId", _templates.get()
|
|
490
|
+
->template_tool_use->original_caps()
|
|
491
|
+
.supports_tool_call_id);
|
|
438
492
|
minja.Set("toolUseCaps", toolUseCaps);
|
|
439
493
|
}
|
|
440
494
|
chatTemplates.Set("minja", minja);
|
|
@@ -443,20 +497,17 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
443
497
|
details.Set("metadata", metadata);
|
|
444
498
|
|
|
445
499
|
// Deprecated: use chatTemplates.llamaChat instead
|
|
446
|
-
details.Set("isChatTemplateSupported",
|
|
500
|
+
details.Set("isChatTemplateSupported",
|
|
501
|
+
validateModelChatTemplate(_sess->model(), false, ""));
|
|
447
502
|
return details;
|
|
448
503
|
}
|
|
449
504
|
|
|
450
505
|
common_chat_params getFormattedChatWithJinja(
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
const std::string &tools,
|
|
457
|
-
const bool ¶llel_tool_calls,
|
|
458
|
-
const std::string &tool_choice
|
|
459
|
-
) {
|
|
506
|
+
const std::shared_ptr<LlamaSession> &sess,
|
|
507
|
+
const common_chat_templates_ptr &templates, const std::string &messages,
|
|
508
|
+
const std::string &chat_template, const std::string &json_schema,
|
|
509
|
+
const std::string &tools, const bool ¶llel_tool_calls,
|
|
510
|
+
const std::string &tool_choice) {
|
|
460
511
|
common_chat_templates_inputs inputs;
|
|
461
512
|
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
462
513
|
auto useTools = !tools.empty();
|
|
@@ -470,23 +521,22 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
470
521
|
if (!json_schema.empty()) {
|
|
471
522
|
inputs.json_schema = json::parse(json_schema);
|
|
472
523
|
}
|
|
473
|
-
inputs.extract_reasoning =
|
|
524
|
+
inputs.extract_reasoning =
|
|
525
|
+
sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
474
526
|
|
|
475
527
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
476
528
|
if (!chat_template.empty()) {
|
|
477
|
-
|
|
478
|
-
|
|
529
|
+
auto tmps = common_chat_templates_init(sess->model(), chat_template);
|
|
530
|
+
return common_chat_templates_apply(tmps.get(), inputs);
|
|
479
531
|
} else {
|
|
480
|
-
|
|
532
|
+
return common_chat_templates_apply(templates.get(), inputs);
|
|
481
533
|
}
|
|
482
534
|
}
|
|
483
535
|
|
|
484
|
-
std::string getFormattedChat(
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
const std::string &chat_template
|
|
489
|
-
) {
|
|
536
|
+
std::string getFormattedChat(const struct llama_model *model,
|
|
537
|
+
const common_chat_templates_ptr &templates,
|
|
538
|
+
const std::string &messages,
|
|
539
|
+
const std::string &chat_template) {
|
|
490
540
|
common_chat_templates_inputs inputs;
|
|
491
541
|
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
492
542
|
inputs.use_jinja = false;
|
|
@@ -503,7 +553,8 @@ std::string getFormattedChat(
|
|
|
503
553
|
// getFormattedChat(
|
|
504
554
|
// messages: [{ role: string, content: string }],
|
|
505
555
|
// chat_template: string,
|
|
506
|
-
// params: { jinja: boolean, json_schema: string, tools: string,
|
|
556
|
+
// params: { jinja: boolean, json_schema: string, tools: string,
|
|
557
|
+
// parallel_tool_calls: boolean, tool_choice: string }
|
|
507
558
|
// ): object | string
|
|
508
559
|
Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
509
560
|
Napi::Env env = info.Env();
|
|
@@ -514,32 +565,42 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
514
565
|
auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
|
|
515
566
|
|
|
516
567
|
auto has_params = info.Length() >= 2;
|
|
517
|
-
auto params =
|
|
568
|
+
auto params =
|
|
569
|
+
has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
|
|
518
570
|
|
|
519
571
|
if (get_option<bool>(params, "jinja", false)) {
|
|
520
572
|
std::string json_schema_str = "";
|
|
521
573
|
if (!is_nil(params.Get("response_format"))) {
|
|
522
574
|
auto response_format = params.Get("response_format").As<Napi::Object>();
|
|
523
|
-
auto response_format_type =
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
575
|
+
auto response_format_type =
|
|
576
|
+
get_option<std::string>(response_format, "type", "text");
|
|
577
|
+
if (response_format_type == "json_schema" &&
|
|
578
|
+
response_format.Has("json_schema")) {
|
|
579
|
+
auto json_schema =
|
|
580
|
+
response_format.Get("json_schema").As<Napi::Object>();
|
|
581
|
+
json_schema_str =
|
|
582
|
+
json_schema.Has("schema")
|
|
583
|
+
? json_stringify(json_schema.Get("schema").As<Napi::Object>())
|
|
584
|
+
: "{}";
|
|
529
585
|
} else if (response_format_type == "json_object") {
|
|
530
|
-
json_schema_str =
|
|
531
|
-
|
|
532
|
-
|
|
586
|
+
json_schema_str =
|
|
587
|
+
response_format.Has("schema")
|
|
588
|
+
? json_stringify(
|
|
589
|
+
response_format.Get("schema").As<Napi::Object>())
|
|
590
|
+
: "{}";
|
|
533
591
|
}
|
|
534
592
|
}
|
|
535
|
-
auto tools_str = params.Has("tools")
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
auto parallel_tool_calls =
|
|
593
|
+
auto tools_str = params.Has("tools")
|
|
594
|
+
? json_stringify(params.Get("tools").As<Napi::Array>())
|
|
595
|
+
: "";
|
|
596
|
+
auto parallel_tool_calls =
|
|
597
|
+
get_option<bool>(params, "parallel_tool_calls", false);
|
|
539
598
|
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
540
599
|
|
|
541
|
-
auto chatParams = getFormattedChatWithJinja(
|
|
542
|
-
|
|
600
|
+
auto chatParams = getFormattedChatWithJinja(
|
|
601
|
+
_sess, _templates, messages, chat_template, json_schema_str, tools_str,
|
|
602
|
+
parallel_tool_calls, tool_choice);
|
|
603
|
+
|
|
543
604
|
Napi::Object result = Napi::Object::New(env);
|
|
544
605
|
result.Set("prompt", chatParams.prompt);
|
|
545
606
|
// chat_format: int
|
|
@@ -551,30 +612,33 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
551
612
|
// grammar_triggers: [{ value: string, token: number }]
|
|
552
613
|
Napi::Array grammar_triggers = Napi::Array::New(env);
|
|
553
614
|
for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
615
|
+
const auto &trigger = chatParams.grammar_triggers[i];
|
|
616
|
+
Napi::Object triggerObj = Napi::Object::New(env);
|
|
617
|
+
triggerObj.Set("type", Napi::Number::New(env, trigger.type));
|
|
618
|
+
triggerObj.Set("value", Napi::String::New(env, trigger.value));
|
|
619
|
+
triggerObj.Set("token", Napi::Number::New(env, trigger.token));
|
|
620
|
+
grammar_triggers.Set(i, triggerObj);
|
|
560
621
|
}
|
|
561
622
|
result.Set("grammar_triggers", grammar_triggers);
|
|
562
623
|
// preserved_tokens: string[]
|
|
563
624
|
Napi::Array preserved_tokens = Napi::Array::New(env);
|
|
564
625
|
for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
|
|
565
|
-
|
|
626
|
+
preserved_tokens.Set(
|
|
627
|
+
i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
|
|
566
628
|
}
|
|
567
629
|
result.Set("preserved_tokens", preserved_tokens);
|
|
568
630
|
// additional_stops: string[]
|
|
569
631
|
Napi::Array additional_stops = Napi::Array::New(env);
|
|
570
632
|
for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
|
|
571
|
-
|
|
633
|
+
additional_stops.Set(
|
|
634
|
+
i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
|
|
572
635
|
}
|
|
573
636
|
result.Set("additional_stops", additional_stops);
|
|
574
637
|
|
|
575
638
|
return result;
|
|
576
639
|
} else {
|
|
577
|
-
auto formatted =
|
|
640
|
+
auto formatted =
|
|
641
|
+
getFormattedChat(_sess->model(), _templates, messages, chat_template);
|
|
578
642
|
return Napi::String::New(env, formatted);
|
|
579
643
|
}
|
|
580
644
|
}
|
|
@@ -607,22 +671,24 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
607
671
|
}
|
|
608
672
|
}
|
|
609
673
|
|
|
610
|
-
// Process
|
|
611
|
-
std::vector<std::string>
|
|
612
|
-
if (options.Has("
|
|
613
|
-
if (options.Get("
|
|
614
|
-
auto
|
|
615
|
-
for (size_t i = 0; i <
|
|
616
|
-
|
|
674
|
+
// Process media_paths parameter
|
|
675
|
+
std::vector<std::string> media_paths;
|
|
676
|
+
if (options.Has("media_paths")) {
|
|
677
|
+
if (options.Get("media_paths").IsArray()) {
|
|
678
|
+
auto media_paths_array = options.Get("media_paths").As<Napi::Array>();
|
|
679
|
+
for (size_t i = 0; i < media_paths_array.Length(); i++) {
|
|
680
|
+
media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
|
|
617
681
|
}
|
|
618
|
-
} else if (options.Get("
|
|
619
|
-
|
|
682
|
+
} else if (options.Get("media_paths").IsString()) {
|
|
683
|
+
media_paths.push_back(options.Get("media_paths").ToString().Utf8Value());
|
|
620
684
|
}
|
|
621
685
|
}
|
|
622
686
|
|
|
623
|
-
// Check if multimodal is enabled when
|
|
624
|
-
if (!
|
|
625
|
-
Napi::Error::New(env, "Multimodal support must be enabled via
|
|
687
|
+
// Check if multimodal is enabled when media_paths are provided
|
|
688
|
+
if (!media_paths.empty() && !(_has_multimodal && _mtmd_ctx != nullptr)) {
|
|
689
|
+
Napi::Error::New(env, "Multimodal support must be enabled via "
|
|
690
|
+
"initMultimodal to use media_paths")
|
|
691
|
+
.ThrowAsJavaScriptException();
|
|
626
692
|
return env.Undefined();
|
|
627
693
|
}
|
|
628
694
|
|
|
@@ -638,16 +704,20 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
638
704
|
std::string json_schema_str = "";
|
|
639
705
|
if (options.Has("response_format")) {
|
|
640
706
|
auto response_format = options.Get("response_format").As<Napi::Object>();
|
|
641
|
-
auto response_format_type =
|
|
642
|
-
|
|
707
|
+
auto response_format_type =
|
|
708
|
+
get_option<std::string>(response_format, "type", "text");
|
|
709
|
+
if (response_format_type == "json_schema" &&
|
|
710
|
+
response_format.Has("json_schema")) {
|
|
643
711
|
auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
|
|
644
|
-
json_schema_str =
|
|
645
|
-
|
|
646
|
-
|
|
712
|
+
json_schema_str =
|
|
713
|
+
json_schema.Has("schema")
|
|
714
|
+
? json_stringify(json_schema.Get("schema").As<Napi::Object>())
|
|
715
|
+
: "{}";
|
|
647
716
|
} else if (response_format_type == "json_object") {
|
|
648
|
-
json_schema_str =
|
|
649
|
-
|
|
650
|
-
|
|
717
|
+
json_schema_str =
|
|
718
|
+
response_format.Has("schema")
|
|
719
|
+
? json_stringify(response_format.Get("schema").As<Napi::Object>())
|
|
720
|
+
: "{}";
|
|
651
721
|
}
|
|
652
722
|
}
|
|
653
723
|
|
|
@@ -656,7 +726,9 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
656
726
|
auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
|
|
657
727
|
for (size_t i = 0; i < preserved_tokens.Length(); i++) {
|
|
658
728
|
auto token = preserved_tokens.Get(i).ToString().Utf8Value();
|
|
659
|
-
auto ids =
|
|
729
|
+
auto ids =
|
|
730
|
+
common_tokenize(_sess->context(), token, /* add_special= */ false,
|
|
731
|
+
/* parse_special= */ true);
|
|
660
732
|
if (ids.size() == 1) {
|
|
661
733
|
params.sampling.preserved_tokens.insert(ids[0]);
|
|
662
734
|
}
|
|
@@ -669,15 +741,22 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
669
741
|
for (size_t i = 0; i < grammar_triggers.Length(); i++) {
|
|
670
742
|
auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
|
|
671
743
|
|
|
672
|
-
auto type = static_cast<common_grammar_trigger_type>(
|
|
744
|
+
auto type = static_cast<common_grammar_trigger_type>(
|
|
745
|
+
trigger_obj.Get("type").ToNumber().Int32Value());
|
|
673
746
|
auto word = trigger_obj.Get("value").ToString().Utf8Value();
|
|
674
747
|
|
|
675
748
|
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
|
|
676
|
-
auto ids =
|
|
749
|
+
auto ids =
|
|
750
|
+
common_tokenize(_sess->context(), word, /* add_special= */ false,
|
|
751
|
+
/* parse_special= */ true);
|
|
677
752
|
if (ids.size() == 1) {
|
|
678
753
|
auto token = ids[0];
|
|
679
|
-
if (std::find(params.sampling.preserved_tokens.begin(),
|
|
680
|
-
|
|
754
|
+
if (std::find(params.sampling.preserved_tokens.begin(),
|
|
755
|
+
params.sampling.preserved_tokens.end(),
|
|
756
|
+
(llama_token)token) ==
|
|
757
|
+
params.sampling.preserved_tokens.end()) {
|
|
758
|
+
throw std::runtime_error(
|
|
759
|
+
"Grammar trigger word should be marked as preserved token");
|
|
681
760
|
}
|
|
682
761
|
common_grammar_trigger trigger;
|
|
683
762
|
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
|
|
@@ -685,14 +764,16 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
685
764
|
trigger.token = token;
|
|
686
765
|
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
687
766
|
} else {
|
|
688
|
-
params.sampling.grammar_triggers.push_back(
|
|
767
|
+
params.sampling.grammar_triggers.push_back(
|
|
768
|
+
{COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
|
|
689
769
|
}
|
|
690
770
|
} else {
|
|
691
771
|
common_grammar_trigger trigger;
|
|
692
772
|
trigger.type = type;
|
|
693
773
|
trigger.value = word;
|
|
694
774
|
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
|
695
|
-
auto token =
|
|
775
|
+
auto token =
|
|
776
|
+
(llama_token)trigger_obj.Get("token").ToNumber().Int32Value();
|
|
696
777
|
trigger.token = token;
|
|
697
778
|
}
|
|
698
779
|
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
|
@@ -702,7 +783,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
702
783
|
|
|
703
784
|
// Handle grammar_lazy from options
|
|
704
785
|
if (options.Has("grammar_lazy")) {
|
|
705
|
-
params.sampling.grammar_lazy =
|
|
786
|
+
params.sampling.grammar_lazy =
|
|
787
|
+
options.Get("grammar_lazy").ToBoolean().Value();
|
|
706
788
|
}
|
|
707
789
|
|
|
708
790
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
@@ -710,29 +792,27 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
710
792
|
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
711
793
|
auto jinja = get_option<bool>(options, "jinja", false);
|
|
712
794
|
if (jinja) {
|
|
713
|
-
auto tools_str =
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
auto
|
|
795
|
+
auto tools_str =
|
|
796
|
+
options.Has("tools")
|
|
797
|
+
? json_stringify(options.Get("tools").As<Napi::Array>())
|
|
798
|
+
: "";
|
|
799
|
+
auto parallel_tool_calls =
|
|
800
|
+
get_option<bool>(options, "parallel_tool_calls", false);
|
|
801
|
+
auto tool_choice =
|
|
802
|
+
get_option<std::string>(options, "tool_choice", "none");
|
|
718
803
|
|
|
719
804
|
auto chatParams = getFormattedChatWithJinja(
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
chat_template,
|
|
724
|
-
json_schema_str,
|
|
725
|
-
tools_str,
|
|
726
|
-
parallel_tool_calls,
|
|
727
|
-
tool_choice
|
|
728
|
-
);
|
|
729
|
-
|
|
805
|
+
_sess, _templates, json_stringify(messages), chat_template,
|
|
806
|
+
json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
807
|
+
|
|
730
808
|
params.prompt = chatParams.prompt;
|
|
731
809
|
|
|
732
810
|
chat_format = chatParams.format;
|
|
733
811
|
|
|
734
|
-
for (const auto &
|
|
735
|
-
auto ids =
|
|
812
|
+
for (const auto &token : chatParams.preserved_tokens) {
|
|
813
|
+
auto ids =
|
|
814
|
+
common_tokenize(_sess->context(), token, /* add_special= */ false,
|
|
815
|
+
/* parse_special= */ true);
|
|
736
816
|
if (ids.size() == 1) {
|
|
737
817
|
params.sampling.preserved_tokens.insert(ids[0]);
|
|
738
818
|
}
|
|
@@ -742,22 +822,18 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
742
822
|
// grammar param always wins jinja template & json_schema
|
|
743
823
|
params.sampling.grammar = chatParams.grammar;
|
|
744
824
|
params.sampling.grammar_lazy = chatParams.grammar_lazy;
|
|
745
|
-
for (const auto &
|
|
825
|
+
for (const auto &trigger : chatParams.grammar_triggers) {
|
|
746
826
|
params.sampling.grammar_triggers.push_back(trigger);
|
|
747
827
|
}
|
|
748
828
|
has_grammar_set = true;
|
|
749
829
|
}
|
|
750
|
-
|
|
751
|
-
for (const auto &
|
|
830
|
+
|
|
831
|
+
for (const auto &stop : chatParams.additional_stops) {
|
|
752
832
|
stop_words.push_back(stop);
|
|
753
833
|
}
|
|
754
834
|
} else {
|
|
755
835
|
auto formatted = getFormattedChat(
|
|
756
|
-
|
|
757
|
-
_templates,
|
|
758
|
-
json_stringify(messages),
|
|
759
|
-
chat_template
|
|
760
|
-
);
|
|
836
|
+
_sess->model(), _templates, json_stringify(messages), chat_template);
|
|
761
837
|
params.prompt = formatted;
|
|
762
838
|
}
|
|
763
839
|
} else {
|
|
@@ -769,7 +845,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
769
845
|
}
|
|
770
846
|
|
|
771
847
|
if (!has_grammar_set && !json_schema_str.empty()) {
|
|
772
|
-
params.sampling.grammar =
|
|
848
|
+
params.sampling.grammar =
|
|
849
|
+
json_schema_to_grammar(json::parse(json_schema_str));
|
|
773
850
|
}
|
|
774
851
|
|
|
775
852
|
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
|
|
@@ -791,16 +868,32 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
791
868
|
params.sampling.penalty_present =
|
|
792
869
|
get_option<float>(options, "penalty_present", 0.00f);
|
|
793
870
|
params.sampling.typ_p = get_option<float>(options, "typical_p", 1.00f);
|
|
794
|
-
params.sampling.xtc_threshold =
|
|
795
|
-
|
|
796
|
-
params.sampling.
|
|
871
|
+
params.sampling.xtc_threshold =
|
|
872
|
+
get_option<float>(options, "xtc_threshold", 0.00f);
|
|
873
|
+
params.sampling.xtc_probability =
|
|
874
|
+
get_option<float>(options, "xtc_probability", 0.10f);
|
|
875
|
+
params.sampling.dry_multiplier =
|
|
876
|
+
get_option<float>(options, "dry_multiplier", 1.75f);
|
|
797
877
|
params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
|
|
798
|
-
params.sampling.dry_allowed_length =
|
|
799
|
-
|
|
800
|
-
params.sampling.
|
|
878
|
+
params.sampling.dry_allowed_length =
|
|
879
|
+
get_option<float>(options, "dry_allowed_length", -1);
|
|
880
|
+
params.sampling.dry_penalty_last_n =
|
|
881
|
+
get_option<float>(options, "dry_penalty_last_n", 0);
|
|
882
|
+
params.sampling.top_n_sigma =
|
|
883
|
+
get_option<float>(options, "top_n_sigma", -1.0f);
|
|
801
884
|
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
802
885
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
803
|
-
params.sampling.seed =
|
|
886
|
+
params.sampling.seed =
|
|
887
|
+
get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
888
|
+
|
|
889
|
+
// guide_tokens
|
|
890
|
+
std::vector<llama_token> guide_tokens;
|
|
891
|
+
if (options.Has("guide_tokens")) {
|
|
892
|
+
auto guide_tokens_array = options.Get("guide_tokens").As<Napi::Array>();
|
|
893
|
+
for (size_t i = 0; i < guide_tokens_array.Length(); i++) {
|
|
894
|
+
guide_tokens.push_back(guide_tokens_array.Get(i).ToNumber().Int32Value());
|
|
895
|
+
}
|
|
896
|
+
}
|
|
804
897
|
|
|
805
898
|
Napi::Function callback;
|
|
806
899
|
if (info.Length() >= 2) {
|
|
@@ -808,7 +901,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
808
901
|
}
|
|
809
902
|
|
|
810
903
|
auto *worker =
|
|
811
|
-
new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
|
|
904
|
+
new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
|
|
905
|
+
chat_format, media_paths, guide_tokens);
|
|
812
906
|
worker->Queue();
|
|
813
907
|
_wip = worker;
|
|
814
908
|
worker->OnComplete([this]() { _wip = nullptr; });
|
|
@@ -833,14 +927,14 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
|
|
|
833
927
|
.ThrowAsJavaScriptException();
|
|
834
928
|
}
|
|
835
929
|
auto text = info[0].ToString().Utf8Value();
|
|
836
|
-
std::vector<std::string>
|
|
930
|
+
std::vector<std::string> media_paths;
|
|
837
931
|
if (info.Length() >= 2 && info[1].IsArray()) {
|
|
838
|
-
auto
|
|
839
|
-
for (size_t i = 0; i <
|
|
840
|
-
|
|
932
|
+
auto media_paths_array = info[1].As<Napi::Array>();
|
|
933
|
+
for (size_t i = 0; i < media_paths_array.Length(); i++) {
|
|
934
|
+
media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
|
|
841
935
|
}
|
|
842
936
|
}
|
|
843
|
-
auto *worker = new TokenizeWorker(info, _sess, text,
|
|
937
|
+
auto *worker = new TokenizeWorker(info, _sess, text, media_paths);
|
|
844
938
|
worker->Queue();
|
|
845
939
|
return worker->Promise();
|
|
846
940
|
}
|
|
@@ -962,7 +1056,8 @@ void LlamaContext::RemoveLoraAdapters(const Napi::CallbackInfo &info) {
|
|
|
962
1056
|
|
|
963
1057
|
// getLoadedLoraAdapters(): Promise<{ count, lora_adapters: [{ path: string,
|
|
964
1058
|
// scaled: number }] }>
|
|
965
|
-
Napi::Value
|
|
1059
|
+
Napi::Value
|
|
1060
|
+
LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info) {
|
|
966
1061
|
Napi::Env env = info.Env();
|
|
967
1062
|
Napi::Array lora_adapters = Napi::Array::New(env, _lora.size());
|
|
968
1063
|
for (size_t i = 0; i < _lora.size(); i++) {
|
|
@@ -980,18 +1075,18 @@ Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
|
|
|
980
1075
|
if (_wip != nullptr) {
|
|
981
1076
|
_wip->SetStop();
|
|
982
1077
|
}
|
|
983
|
-
|
|
1078
|
+
|
|
984
1079
|
if (_sess == nullptr) {
|
|
985
1080
|
auto promise = Napi::Promise::Deferred(env);
|
|
986
1081
|
promise.Resolve(env.Undefined());
|
|
987
1082
|
return promise.Promise();
|
|
988
1083
|
}
|
|
989
|
-
|
|
1084
|
+
|
|
990
1085
|
// Clear the mtmd context reference in the session
|
|
991
1086
|
if (_mtmd_ctx != nullptr) {
|
|
992
1087
|
_sess->set_mtmd_ctx(nullptr);
|
|
993
1088
|
}
|
|
994
|
-
|
|
1089
|
+
|
|
995
1090
|
auto *worker = new DisposeWorker(info, std::move(_sess));
|
|
996
1091
|
worker->Queue();
|
|
997
1092
|
return worker->Promise();
|
|
@@ -1019,7 +1114,8 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1019
1114
|
Napi::Env env = info.Env();
|
|
1020
1115
|
|
|
1021
1116
|
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
1022
|
-
Napi::TypeError::New(env, "Object expected for mmproj path")
|
|
1117
|
+
Napi::TypeError::New(env, "Object expected for mmproj path")
|
|
1118
|
+
.ThrowAsJavaScriptException();
|
|
1023
1119
|
}
|
|
1024
1120
|
|
|
1025
1121
|
auto options = info[0].As<Napi::Object>();
|
|
@@ -1027,7 +1123,8 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1027
1123
|
auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
|
|
1028
1124
|
|
|
1029
1125
|
if (mmproj_path.empty()) {
|
|
1030
|
-
Napi::TypeError::New(env, "mmproj path is required")
|
|
1126
|
+
Napi::TypeError::New(env, "mmproj path is required")
|
|
1127
|
+
.ThrowAsJavaScriptException();
|
|
1031
1128
|
}
|
|
1032
1129
|
|
|
1033
1130
|
console_log(env, "Initializing multimodal with mmproj path: " + mmproj_path);
|
|
@@ -1052,33 +1149,56 @@ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1052
1149
|
mtmd_params.n_threads = _sess->params().cpuparams.n_threads;
|
|
1053
1150
|
mtmd_params.verbosity = (ggml_log_level)GGML_LOG_LEVEL_INFO;
|
|
1054
1151
|
|
|
1055
|
-
console_log(env, format_string(
|
|
1056
|
-
|
|
1152
|
+
console_log(env, format_string(
|
|
1153
|
+
"Initializing mtmd context with threads=%d, use_gpu=%d",
|
|
1154
|
+
mtmd_params.n_threads, mtmd_params.use_gpu ? 1 : 0));
|
|
1057
1155
|
|
|
1058
1156
|
_mtmd_ctx = mtmd_init_from_file(mmproj_path.c_str(), model, mtmd_params);
|
|
1059
1157
|
if (_mtmd_ctx == nullptr) {
|
|
1060
|
-
Napi::Error::New(env, "Failed to initialize multimodal context")
|
|
1158
|
+
Napi::Error::New(env, "Failed to initialize multimodal context")
|
|
1159
|
+
.ThrowAsJavaScriptException();
|
|
1061
1160
|
return Napi::Boolean::New(env, false);
|
|
1062
1161
|
}
|
|
1063
1162
|
|
|
1064
1163
|
_has_multimodal = true;
|
|
1065
|
-
|
|
1164
|
+
|
|
1066
1165
|
// Share the mtmd context with the session
|
|
1067
1166
|
_sess->set_mtmd_ctx(_mtmd_ctx);
|
|
1068
1167
|
|
|
1069
1168
|
// Check if the model uses M-RoPE or non-causal attention
|
|
1070
1169
|
bool uses_mrope = mtmd_decode_use_mrope(_mtmd_ctx);
|
|
1071
1170
|
bool uses_non_causal = mtmd_decode_use_non_causal(_mtmd_ctx);
|
|
1072
|
-
console_log(
|
|
1073
|
-
|
|
1171
|
+
console_log(
|
|
1172
|
+
env, format_string(
|
|
1173
|
+
"Model multimodal properties: uses_mrope=%d, uses_non_causal=%d",
|
|
1174
|
+
uses_mrope ? 1 : 0, uses_non_causal ? 1 : 0));
|
|
1074
1175
|
|
|
1075
|
-
console_log(env, "Multimodal context initialized successfully with mmproj: " +
|
|
1176
|
+
console_log(env, "Multimodal context initialized successfully with mmproj: " +
|
|
1177
|
+
mmproj_path);
|
|
1076
1178
|
return Napi::Boolean::New(env, true);
|
|
1077
1179
|
}
|
|
1078
1180
|
|
|
1079
1181
|
// isMultimodalEnabled(): boolean
|
|
1080
1182
|
Napi::Value LlamaContext::IsMultimodalEnabled(const Napi::CallbackInfo &info) {
|
|
1081
|
-
return Napi::Boolean::New(info.Env(),
|
|
1183
|
+
return Napi::Boolean::New(info.Env(),
|
|
1184
|
+
_has_multimodal && _mtmd_ctx != nullptr);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
// getMultimodalSupport(): Promise<{ vision: boolean, audio: boolean }>
|
|
1188
|
+
Napi::Value LlamaContext::GetMultimodalSupport(const Napi::CallbackInfo &info) {
|
|
1189
|
+
Napi::Env env = info.Env();
|
|
1190
|
+
auto result = Napi::Object::New(env);
|
|
1191
|
+
|
|
1192
|
+
if (_has_multimodal && _mtmd_ctx != nullptr) {
|
|
1193
|
+
result.Set("vision",
|
|
1194
|
+
Napi::Boolean::New(env, mtmd_support_vision(_mtmd_ctx)));
|
|
1195
|
+
result.Set("audio", Napi::Boolean::New(env, mtmd_support_audio(_mtmd_ctx)));
|
|
1196
|
+
} else {
|
|
1197
|
+
result.Set("vision", Napi::Boolean::New(env, false));
|
|
1198
|
+
result.Set("audio", Napi::Boolean::New(env, false));
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
return result;
|
|
1082
1202
|
}
|
|
1083
1203
|
|
|
1084
1204
|
// releaseMultimodal(): void
|
|
@@ -1088,10 +1208,206 @@ void LlamaContext::ReleaseMultimodal(const Napi::CallbackInfo &info) {
|
|
|
1088
1208
|
if (_sess != nullptr) {
|
|
1089
1209
|
_sess->set_mtmd_ctx(nullptr);
|
|
1090
1210
|
}
|
|
1091
|
-
|
|
1211
|
+
|
|
1092
1212
|
// Free the mtmd context
|
|
1093
1213
|
mtmd_free(_mtmd_ctx);
|
|
1094
1214
|
_mtmd_ctx = nullptr;
|
|
1095
1215
|
_has_multimodal = false;
|
|
1096
1216
|
}
|
|
1097
1217
|
}
|
|
1218
|
+
|
|
1219
|
+
tts_type LlamaContext::getTTSType(Napi::Env env, nlohmann::json speaker) {
|
|
1220
|
+
if (speaker.is_object() && speaker.contains("version")) {
|
|
1221
|
+
std::string version = speaker["version"].get<std::string>();
|
|
1222
|
+
if (version == "0.2") {
|
|
1223
|
+
return OUTETTS_V0_2;
|
|
1224
|
+
} else if (version == "0.3") {
|
|
1225
|
+
return OUTETTS_V0_3;
|
|
1226
|
+
} else {
|
|
1227
|
+
Napi::Error::New(env, format_string("Unsupported speaker version '%s'\n",
|
|
1228
|
+
version.c_str()))
|
|
1229
|
+
.ThrowAsJavaScriptException();
|
|
1230
|
+
return UNKNOWN;
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
if (_tts_type != UNKNOWN) {
|
|
1234
|
+
return _tts_type;
|
|
1235
|
+
}
|
|
1236
|
+
const char *chat_template =
|
|
1237
|
+
llama_model_chat_template(_sess->model(), nullptr);
|
|
1238
|
+
if (chat_template && std::string(chat_template) == "outetts-0.3") {
|
|
1239
|
+
return OUTETTS_V0_3;
|
|
1240
|
+
}
|
|
1241
|
+
return OUTETTS_V0_2;
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
// initVocoder(path: string): boolean
|
|
1245
|
+
Napi::Value LlamaContext::InitVocoder(const Napi::CallbackInfo &info) {
|
|
1246
|
+
Napi::Env env = info.Env();
|
|
1247
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1248
|
+
Napi::TypeError::New(env, "String expected for vocoder path")
|
|
1249
|
+
.ThrowAsJavaScriptException();
|
|
1250
|
+
}
|
|
1251
|
+
auto vocoder_path = info[0].ToString().Utf8Value();
|
|
1252
|
+
if (vocoder_path.empty()) {
|
|
1253
|
+
Napi::TypeError::New(env, "vocoder path is required")
|
|
1254
|
+
.ThrowAsJavaScriptException();
|
|
1255
|
+
}
|
|
1256
|
+
if (_has_vocoder) {
|
|
1257
|
+
Napi::Error::New(env, "Vocoder already initialized")
|
|
1258
|
+
.ThrowAsJavaScriptException();
|
|
1259
|
+
return Napi::Boolean::New(env, false);
|
|
1260
|
+
}
|
|
1261
|
+
_tts_type = getTTSType(env);
|
|
1262
|
+
_vocoder.params = _sess->params();
|
|
1263
|
+
_vocoder.params.warmup = false;
|
|
1264
|
+
_vocoder.params.model.path = vocoder_path;
|
|
1265
|
+
_vocoder.params.embedding = true;
|
|
1266
|
+
_vocoder.params.ctx_shift = false;
|
|
1267
|
+
_vocoder.params.n_ubatch = _vocoder.params.n_batch;
|
|
1268
|
+
common_init_result result = common_init_from_params(_vocoder.params);
|
|
1269
|
+
if (result.model == nullptr || result.context == nullptr) {
|
|
1270
|
+
Napi::Error::New(env, "Failed to initialize vocoder")
|
|
1271
|
+
.ThrowAsJavaScriptException();
|
|
1272
|
+
return Napi::Boolean::New(env, false);
|
|
1273
|
+
}
|
|
1274
|
+
_vocoder.model = std::move(result.model);
|
|
1275
|
+
_vocoder.context = std::move(result.context);
|
|
1276
|
+
_has_vocoder = true;
|
|
1277
|
+
return Napi::Boolean::New(env, true);
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
// releaseVocoder(): void
|
|
1281
|
+
void LlamaContext::ReleaseVocoder(const Napi::CallbackInfo &info) {
|
|
1282
|
+
if (_has_vocoder) {
|
|
1283
|
+
_vocoder.model.reset();
|
|
1284
|
+
_vocoder.context.reset();
|
|
1285
|
+
_has_vocoder = false;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
|
|
1289
|
+
// isVocoderEnabled(): boolean
|
|
1290
|
+
Napi::Value LlamaContext::IsVocoderEnabled(const Napi::CallbackInfo &info) {
|
|
1291
|
+
Napi::Env env = info.Env();
|
|
1292
|
+
return Napi::Boolean::New(env, _has_vocoder);
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
// getFormattedAudioCompletion(speaker: string|null, text: string): string
|
|
1296
|
+
Napi::Value
|
|
1297
|
+
LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
|
|
1298
|
+
Napi::Env env = info.Env();
|
|
1299
|
+
if (info.Length() < 2 || !info[1].IsString()) {
|
|
1300
|
+
Napi::TypeError::New(env, "text parameter is required for audio completion")
|
|
1301
|
+
.ThrowAsJavaScriptException();
|
|
1302
|
+
}
|
|
1303
|
+
auto text = info[1].ToString().Utf8Value();
|
|
1304
|
+
auto speaker_json = info[0].IsString() ? info[0].ToString().Utf8Value() : "";
|
|
1305
|
+
nlohmann::json speaker =
|
|
1306
|
+
speaker_json.empty() ? nullptr : nlohmann::json::parse(speaker_json);
|
|
1307
|
+
const tts_type type = getTTSType(env, speaker);
|
|
1308
|
+
std::string audio_text = DEFAULT_AUDIO_TEXT;
|
|
1309
|
+
std::string audio_data = DEFAULT_AUDIO_DATA;
|
|
1310
|
+
if (type == OUTETTS_V0_3) {
|
|
1311
|
+
audio_text = std::regex_replace(audio_text, std::regex(R"(<\|text_sep\|>)"),
|
|
1312
|
+
"<|space|>");
|
|
1313
|
+
audio_data =
|
|
1314
|
+
std::regex_replace(audio_data, std::regex(R"(<\|code_start\|>)"), "");
|
|
1315
|
+
audio_data = std::regex_replace(audio_data, std::regex(R"(<\|code_end\|>)"),
|
|
1316
|
+
"<|space|>");
|
|
1317
|
+
}
|
|
1318
|
+
if (!speaker_json.empty()) {
|
|
1319
|
+
audio_text = audio_text_from_speaker(speaker, type);
|
|
1320
|
+
audio_data = audio_data_from_speaker(speaker, type);
|
|
1321
|
+
}
|
|
1322
|
+
return Napi::String::New(env, "<|im_start|>\n" + audio_text +
|
|
1323
|
+
process_text(text, type) +
|
|
1324
|
+
"<|text_end|>\n" + audio_data + "\n");
|
|
1325
|
+
}
|
|
1326
|
+
|
|
1327
|
+
// getAudioCompletionGuideTokens(text: string): Int32Array
|
|
1328
|
+
Napi::Value
|
|
1329
|
+
LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
|
|
1330
|
+
Napi::Env env = info.Env();
|
|
1331
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1332
|
+
Napi::TypeError::New(env,
|
|
1333
|
+
"String expected for audio completion guide tokens")
|
|
1334
|
+
.ThrowAsJavaScriptException();
|
|
1335
|
+
return env.Undefined();
|
|
1336
|
+
}
|
|
1337
|
+
auto text = info[0].ToString().Utf8Value();
|
|
1338
|
+
const tts_type type = getTTSType(env);
|
|
1339
|
+
auto clean_text = process_text(text, type);
|
|
1340
|
+
const std::string &delimiter =
|
|
1341
|
+
(type == OUTETTS_V0_3 ? "<|space|>" : "<|text_sep|>");
|
|
1342
|
+
const llama_vocab *vocab = llama_model_get_vocab(_sess->model());
|
|
1343
|
+
|
|
1344
|
+
std::vector<int32_t> result;
|
|
1345
|
+
size_t start = 0;
|
|
1346
|
+
size_t end = clean_text.find(delimiter);
|
|
1347
|
+
|
|
1348
|
+
// first token is always a newline, as it was not previously added
|
|
1349
|
+
result.push_back(common_tokenize(vocab, "\n", false, true)[0]);
|
|
1350
|
+
|
|
1351
|
+
while (end != std::string::npos) {
|
|
1352
|
+
std::string current_word = clean_text.substr(start, end - start);
|
|
1353
|
+
auto tmp = common_tokenize(vocab, current_word, false, true);
|
|
1354
|
+
result.push_back(tmp[0]);
|
|
1355
|
+
start = end + delimiter.length();
|
|
1356
|
+
end = clean_text.find(delimiter, start);
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
// Add the last part
|
|
1360
|
+
std::string current_word = clean_text.substr(start);
|
|
1361
|
+
auto tmp = common_tokenize(vocab, current_word, false, true);
|
|
1362
|
+
if (tmp.size() > 0) {
|
|
1363
|
+
result.push_back(tmp[0]);
|
|
1364
|
+
}
|
|
1365
|
+
auto tokens = Napi::Int32Array::New(env, result.size());
|
|
1366
|
+
memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
|
|
1367
|
+
return tokens;
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
// decodeAudioTokens(tokens: number[]|Int32Array): Float32Array
|
|
1371
|
+
Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
|
|
1372
|
+
Napi::Env env = info.Env();
|
|
1373
|
+
if (info.Length() < 1) {
|
|
1374
|
+
Napi::TypeError::New(env, "Tokens parameter is required")
|
|
1375
|
+
.ThrowAsJavaScriptException();
|
|
1376
|
+
}
|
|
1377
|
+
std::vector<int32_t> tokens;
|
|
1378
|
+
if (info[0].IsTypedArray()) {
|
|
1379
|
+
auto js_tokens = info[0].As<Napi::Int32Array>();
|
|
1380
|
+
tokens.resize(js_tokens.ElementLength());
|
|
1381
|
+
memcpy(tokens.data(), js_tokens.Data(),
|
|
1382
|
+
js_tokens.ElementLength() * sizeof(int32_t));
|
|
1383
|
+
} else if (info[0].IsArray()) {
|
|
1384
|
+
auto js_tokens = info[0].As<Napi::Array>();
|
|
1385
|
+
for (size_t i = 0; i < js_tokens.Length(); i++) {
|
|
1386
|
+
tokens.push_back(js_tokens.Get(i).ToNumber().Int32Value());
|
|
1387
|
+
}
|
|
1388
|
+
} else {
|
|
1389
|
+
Napi::TypeError::New(env, "Tokens must be an number array or a Int32Array")
|
|
1390
|
+
.ThrowAsJavaScriptException();
|
|
1391
|
+
return env.Undefined();
|
|
1392
|
+
}
|
|
1393
|
+
tts_type type = getTTSType(env);
|
|
1394
|
+
if (type == UNKNOWN) {
|
|
1395
|
+
Napi::Error::New(env, "Unsupported audio tokens")
|
|
1396
|
+
.ThrowAsJavaScriptException();
|
|
1397
|
+
return env.Undefined();
|
|
1398
|
+
}
|
|
1399
|
+
if (type == OUTETTS_V0_3 || type == OUTETTS_V0_2) {
|
|
1400
|
+
tokens.erase(
|
|
1401
|
+
std::remove_if(tokens.begin(), tokens.end(),
|
|
1402
|
+
[](llama_token t) { return t < 151672 || t > 155772; }),
|
|
1403
|
+
tokens.end());
|
|
1404
|
+
for (auto &token : tokens) {
|
|
1405
|
+
token -= 151672;
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
auto worker = new DecodeAudioTokenWorker(
|
|
1409
|
+
info, _vocoder.model.get(), _vocoder.context.get(),
|
|
1410
|
+
_sess->params().cpuparams.n_threads, tokens);
|
|
1411
|
+
worker->Queue();
|
|
1412
|
+
return worker->Promise();
|
|
1413
|
+
}
|