@fugood/llama.node 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -3
- package/lib/binding.js +1 -1
- package/lib/binding.ts +40 -14
- package/lib/index.js +4 -1
- package/lib/index.ts +13 -9
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +10 -10
- package/src/LlamaCompletionWorker.cpp +33 -33
- package/src/LlamaContext.cpp +53 -16
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/src/llama.cpp/common/chat-parser.h +10 -0
- package/src/llama.cpp/common/chat.cpp +461 -87
- package/src/llama.cpp/common/chat.h +6 -0
- package/src/llama.cpp/common/common.cpp +8 -1
- package/src/llama.cpp/common/common.h +12 -5
- package/src/llama.cpp/common/json-partial.cpp +19 -2
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
- package/src/llama.cpp/common/sampling.cpp +60 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
- package/src/llama.cpp/src/llama-grammar.cpp +17 -9
- package/src/llama.cpp/src/llama-impl.cpp +3 -3
- package/src/llama.cpp/src/llama-sampling.cpp +3 -6
- package/src/llama.cpp/src/llama-vocab.cpp +1 -0
package/src/LlamaContext.cpp
CHANGED
|
@@ -105,6 +105,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
105
105
|
InstanceMethod<&LlamaContext::GetModelInfo>(
|
|
106
106
|
"getModelInfo",
|
|
107
107
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
108
|
+
InstanceMethod<&LlamaContext::GetUsedDevices>(
|
|
109
|
+
"getUsedDevices",
|
|
110
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
108
111
|
InstanceMethod<&LlamaContext::GetFormattedChat>(
|
|
109
112
|
"getFormattedChat",
|
|
110
113
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
@@ -306,6 +309,19 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
306
309
|
llama_backend_init();
|
|
307
310
|
llama_numa_init(params.numa);
|
|
308
311
|
|
|
312
|
+
// Parse devices array
|
|
313
|
+
if (options.Has("devices") && options.Get("devices").IsArray()) {
|
|
314
|
+
auto devices_array = options.Get("devices").As<Napi::Array>();
|
|
315
|
+
for (size_t i = 0; i < devices_array.Length(); i++) {
|
|
316
|
+
auto device_name = devices_array.Get(i).ToString().Utf8Value();
|
|
317
|
+
auto * dev = ggml_backend_dev_by_name(device_name.c_str());
|
|
318
|
+
if (dev) {
|
|
319
|
+
params.devices.push_back(dev);
|
|
320
|
+
}
|
|
321
|
+
// Skip invalid device names silently
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
309
325
|
std::vector<common_adapter_lora_info> lora;
|
|
310
326
|
auto lora_path = get_option<std::string>(options, "lora", "");
|
|
311
327
|
auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
|
|
@@ -376,6 +392,18 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
376
392
|
_rn_ctx = nullptr;
|
|
377
393
|
Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
|
|
378
394
|
}
|
|
395
|
+
_rn_ctx->attachThreadpoolsIfAvailable();
|
|
396
|
+
|
|
397
|
+
// Collect used devices from the loaded model
|
|
398
|
+
if (_rn_ctx->llama_init.model) {
|
|
399
|
+
const auto &model_devices = _rn_ctx->llama_init.model->devices;
|
|
400
|
+
for (auto dev : model_devices) {
|
|
401
|
+
const char *dev_name = ggml_backend_dev_name(dev);
|
|
402
|
+
if (dev_name != nullptr) {
|
|
403
|
+
_used_devices.push_back(std::string(dev_name));
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
379
407
|
|
|
380
408
|
// Release progress callback after model is loaded
|
|
381
409
|
if (has_progress_callback) {
|
|
@@ -386,7 +414,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
386
414
|
if (!lora.empty()) {
|
|
387
415
|
_rn_ctx->applyLoraAdapters(lora);
|
|
388
416
|
}
|
|
389
|
-
|
|
417
|
+
|
|
390
418
|
_info = common_params_get_system_info(params);
|
|
391
419
|
}
|
|
392
420
|
|
|
@@ -582,6 +610,15 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
582
610
|
return details;
|
|
583
611
|
}
|
|
584
612
|
|
|
613
|
+
// getUsedDevices(): string[]
|
|
614
|
+
Napi::Value LlamaContext::GetUsedDevices(const Napi::CallbackInfo &info) {
|
|
615
|
+
Napi::Env env = info.Env();
|
|
616
|
+
Napi::Array devices = Napi::Array::New(env, _used_devices.size());
|
|
617
|
+
for (size_t i = 0; i < _used_devices.size(); i++) {
|
|
618
|
+
devices[i] = Napi::String::New(env, _used_devices[i]);
|
|
619
|
+
}
|
|
620
|
+
return devices;
|
|
621
|
+
}
|
|
585
622
|
|
|
586
623
|
|
|
587
624
|
// getFormattedChat(
|
|
@@ -636,7 +673,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
636
673
|
auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
|
|
637
674
|
auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
|
|
638
675
|
auto now_str = get_option<std::string>(params, "now", "");
|
|
639
|
-
|
|
676
|
+
|
|
640
677
|
std::map<std::string, std::string> chat_template_kwargs;
|
|
641
678
|
if (params.Has("chat_template_kwargs") && params.Get("chat_template_kwargs").IsObject()) {
|
|
642
679
|
auto kwargs_obj = params.Get("chat_template_kwargs").As<Napi::Object>();
|
|
@@ -873,7 +910,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
873
910
|
auto enable_thinking = get_option<bool>(options, "enable_thinking", true);
|
|
874
911
|
auto add_generation_prompt = get_option<bool>(options, "add_generation_prompt", true);
|
|
875
912
|
auto now_str = get_option<std::string>(options, "now", "");
|
|
876
|
-
|
|
913
|
+
|
|
877
914
|
std::map<std::string, std::string> chat_template_kwargs;
|
|
878
915
|
if (options.Has("chat_template_kwargs") && options.Get("chat_template_kwargs").IsObject()) {
|
|
879
916
|
auto kwargs_obj = options.Get("chat_template_kwargs").As<Napi::Object>();
|
|
@@ -886,7 +923,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
886
923
|
}
|
|
887
924
|
|
|
888
925
|
common_chat_params chatParams;
|
|
889
|
-
|
|
926
|
+
|
|
890
927
|
try {
|
|
891
928
|
chatParams = _rn_ctx->getFormattedChatWithJinja(
|
|
892
929
|
json_stringify(messages), chat_template,
|
|
@@ -1043,7 +1080,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
|
|
|
1043
1080
|
}
|
|
1044
1081
|
auto text = info[0].ToString().Utf8Value();
|
|
1045
1082
|
std::vector<std::string> media_paths;
|
|
1046
|
-
|
|
1083
|
+
|
|
1047
1084
|
if (info.Length() >= 2 && info[1].IsArray()) {
|
|
1048
1085
|
// Direct array format: tokenize(text, [media_paths])
|
|
1049
1086
|
auto media_paths_array = info[1].As<Napi::Array>();
|
|
@@ -1051,7 +1088,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
|
|
|
1051
1088
|
media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
|
|
1052
1089
|
}
|
|
1053
1090
|
}
|
|
1054
|
-
|
|
1091
|
+
|
|
1055
1092
|
auto *worker = new TokenizeWorker(info, _rn_ctx, text, media_paths);
|
|
1056
1093
|
worker->Queue();
|
|
1057
1094
|
return worker->Promise();
|
|
@@ -1072,7 +1109,7 @@ Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
|
|
|
1072
1109
|
for (size_t i = 0; i < tokens.Length(); i++) {
|
|
1073
1110
|
token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
|
|
1074
1111
|
}
|
|
1075
|
-
|
|
1112
|
+
|
|
1076
1113
|
auto *worker = new DetokenizeWorker(info, _rn_ctx, token_ids);
|
|
1077
1114
|
worker->Queue();
|
|
1078
1115
|
return worker->Promise();
|
|
@@ -1112,16 +1149,16 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
|
|
|
1112
1149
|
Napi::TypeError::New(env, "Context is disposed")
|
|
1113
1150
|
.ThrowAsJavaScriptException();
|
|
1114
1151
|
}
|
|
1115
|
-
|
|
1152
|
+
|
|
1116
1153
|
auto query = info[0].ToString().Utf8Value();
|
|
1117
1154
|
auto documents_array = info[1].As<Napi::Array>();
|
|
1118
|
-
|
|
1155
|
+
|
|
1119
1156
|
// Convert documents array to vector
|
|
1120
1157
|
std::vector<std::string> documents;
|
|
1121
1158
|
for (size_t i = 0; i < documents_array.Length(); i++) {
|
|
1122
1159
|
documents.push_back(documents_array.Get(i).ToString().Utf8Value());
|
|
1123
1160
|
}
|
|
1124
|
-
|
|
1161
|
+
|
|
1125
1162
|
auto options = Napi::Object::New(env);
|
|
1126
1163
|
if (info.Length() >= 3 && info[2].IsObject()) {
|
|
1127
1164
|
options = info[2].As<Napi::Object>();
|
|
@@ -1130,7 +1167,7 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
|
|
|
1130
1167
|
common_params rerankParams;
|
|
1131
1168
|
rerankParams.embedding = true;
|
|
1132
1169
|
rerankParams.embd_normalize = get_option<int32_t>(options, "normalize", -1);
|
|
1133
|
-
|
|
1170
|
+
|
|
1134
1171
|
auto *worker = new RerankWorker(info, _rn_ctx, query, documents, rerankParams);
|
|
1135
1172
|
worker->Queue();
|
|
1136
1173
|
return worker->Promise();
|
|
@@ -1379,13 +1416,13 @@ LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
|
|
|
1379
1416
|
}
|
|
1380
1417
|
auto text = info[1].ToString().Utf8Value();
|
|
1381
1418
|
auto speaker_json = info[0].IsString() ? info[0].ToString().Utf8Value() : "";
|
|
1382
|
-
|
|
1419
|
+
|
|
1383
1420
|
if (!_rn_ctx->tts_wrapper) {
|
|
1384
1421
|
Napi::Error::New(env, "Vocoder not initialized")
|
|
1385
1422
|
.ThrowAsJavaScriptException();
|
|
1386
1423
|
return env.Undefined();
|
|
1387
1424
|
}
|
|
1388
|
-
|
|
1425
|
+
|
|
1389
1426
|
auto result_data = _rn_ctx->tts_wrapper->getFormattedAudioCompletion(_rn_ctx, speaker_json, text);
|
|
1390
1427
|
Napi::Object result = Napi::Object::New(env);
|
|
1391
1428
|
result.Set("prompt", Napi::String::New(env, result_data.prompt));
|
|
@@ -1406,13 +1443,13 @@ LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
|
|
|
1406
1443
|
return env.Undefined();
|
|
1407
1444
|
}
|
|
1408
1445
|
auto text = info[0].ToString().Utf8Value();
|
|
1409
|
-
|
|
1446
|
+
|
|
1410
1447
|
if (!_rn_ctx->tts_wrapper) {
|
|
1411
1448
|
Napi::Error::New(env, "Vocoder not initialized")
|
|
1412
1449
|
.ThrowAsJavaScriptException();
|
|
1413
1450
|
return env.Undefined();
|
|
1414
1451
|
}
|
|
1415
|
-
|
|
1452
|
+
|
|
1416
1453
|
auto result = _rn_ctx->tts_wrapper->getAudioCompletionGuideTokens(_rn_ctx, text);
|
|
1417
1454
|
auto tokens = Napi::Int32Array::New(env, result.size());
|
|
1418
1455
|
memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
|
|
@@ -1448,7 +1485,7 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
|
|
|
1448
1485
|
.ThrowAsJavaScriptException();
|
|
1449
1486
|
return env.Undefined();
|
|
1450
1487
|
}
|
|
1451
|
-
|
|
1488
|
+
|
|
1452
1489
|
auto *worker = new DecodeAudioTokenWorker(info, _rn_ctx, tokens);
|
|
1453
1490
|
worker->Queue();
|
|
1454
1491
|
return worker->Promise();
|
package/src/LlamaContext.h
CHANGED
|
@@ -31,6 +31,7 @@ public:
|
|
|
31
31
|
private:
|
|
32
32
|
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
33
33
|
Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
|
|
34
|
+
Napi::Value GetUsedDevices(const Napi::CallbackInfo &info);
|
|
34
35
|
Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
|
|
35
36
|
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
36
37
|
void StopCompletion(const Napi::CallbackInfo &info);
|
|
@@ -69,6 +70,7 @@ private:
|
|
|
69
70
|
void CancelRequest(const Napi::CallbackInfo &info);
|
|
70
71
|
|
|
71
72
|
std::string _info;
|
|
73
|
+
std::vector<std::string> _used_devices;
|
|
72
74
|
Napi::Object _meta;
|
|
73
75
|
LlamaCompletionWorker *_wip = nullptr;
|
|
74
76
|
|