@fugood/llama.node 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CMakeLists.txt +7 -3
  2. package/lib/binding.js +1 -1
  3. package/lib/binding.ts +40 -14
  4. package/lib/index.js +4 -1
  5. package/lib/index.ts +13 -9
  6. package/package.json +14 -14
  7. package/scripts/llama.cpp.patch +10 -10
  8. package/src/LlamaCompletionWorker.cpp +33 -33
  9. package/src/LlamaContext.cpp +53 -16
  10. package/src/LlamaContext.h +2 -0
  11. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  12. package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
  13. package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
  14. package/src/llama.cpp/common/chat-parser.h +10 -0
  15. package/src/llama.cpp/common/chat.cpp +461 -87
  16. package/src/llama.cpp/common/chat.h +6 -0
  17. package/src/llama.cpp/common/common.cpp +8 -1
  18. package/src/llama.cpp/common/common.h +12 -5
  19. package/src/llama.cpp/common/json-partial.cpp +19 -2
  20. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
  21. package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
  22. package/src/llama.cpp/common/sampling.cpp +60 -6
  23. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
  24. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
  25. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
  26. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
  27. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
  28. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
  29. package/src/llama.cpp/src/llama-grammar.cpp +17 -9
  30. package/src/llama.cpp/src/llama-impl.cpp +3 -3
  31. package/src/llama.cpp/src/llama-sampling.cpp +3 -6
  32. package/src/llama.cpp/src/llama-vocab.cpp +1 -0
@@ -105,6 +105,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
105
105
  InstanceMethod<&LlamaContext::GetModelInfo>(
106
106
  "getModelInfo",
107
107
  static_cast<napi_property_attributes>(napi_enumerable)),
108
+ InstanceMethod<&LlamaContext::GetUsedDevices>(
109
+ "getUsedDevices",
110
+ static_cast<napi_property_attributes>(napi_enumerable)),
108
111
  InstanceMethod<&LlamaContext::GetFormattedChat>(
109
112
  "getFormattedChat",
110
113
  static_cast<napi_property_attributes>(napi_enumerable)),
@@ -306,6 +309,19 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
306
309
  llama_backend_init();
307
310
  llama_numa_init(params.numa);
308
311
 
312
+ // Parse devices array
313
+ if (options.Has("devices") && options.Get("devices").IsArray()) {
314
+ auto devices_array = options.Get("devices").As<Napi::Array>();
315
+ for (size_t i = 0; i < devices_array.Length(); i++) {
316
+ auto device_name = devices_array.Get(i).ToString().Utf8Value();
317
+ auto * dev = ggml_backend_dev_by_name(device_name.c_str());
318
+ if (dev) {
319
+ params.devices.push_back(dev);
320
+ }
321
+ // Skip invalid device names silently
322
+ }
323
+ }
324
+
309
325
  std::vector<common_adapter_lora_info> lora;
310
326
  auto lora_path = get_option<std::string>(options, "lora", "");
311
327
  auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
@@ -376,6 +392,18 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
376
392
  _rn_ctx = nullptr;
377
393
  Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
378
394
  }
395
+ _rn_ctx->attachThreadpoolsIfAvailable();
396
+
397
+ // Collect used devices from the loaded model
398
+ if (_rn_ctx->llama_init.model) {
399
+ const auto &model_devices = _rn_ctx->llama_init.model->devices;
400
+ for (auto dev : model_devices) {
401
+ const char *dev_name = ggml_backend_dev_name(dev);
402
+ if (dev_name != nullptr) {
403
+ _used_devices.push_back(std::string(dev_name));
404
+ }
405
+ }
406
+ }
379
407
 
380
408
  // Release progress callback after model is loaded
381
409
  if (has_progress_callback) {
@@ -386,7 +414,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
386
414
  if (!lora.empty()) {
387
415
  _rn_ctx->applyLoraAdapters(lora);
388
416
  }
389
-
417
+
390
418
  _info = common_params_get_system_info(params);
391
419
  }
392
420
 
@@ -582,6 +610,15 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
582
610
  return details;
583
611
  }
584
612
 
613
+ // getUsedDevices(): string[]
614
+ Napi::Value LlamaContext::GetUsedDevices(const Napi::CallbackInfo &info) {
615
+ Napi::Env env = info.Env();
616
+ Napi::Array devices = Napi::Array::New(env, _used_devices.size());
617
+ for (size_t i = 0; i < _used_devices.size(); i++) {
618
+ devices[i] = Napi::String::New(env, _used_devices[i]);
619
+ }
620
+ return devices;
621
+ }
585
622
 
586
623
 
587
624
  // getFormattedChat(
@@ -636,7 +673,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
636
673
  auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
637
674
  auto add_generation_prompt = get_option<bool>(params, "add_generation_prompt", true);
638
675
  auto now_str = get_option<std::string>(params, "now", "");
639
-
676
+
640
677
  std::map<std::string, std::string> chat_template_kwargs;
641
678
  if (params.Has("chat_template_kwargs") && params.Get("chat_template_kwargs").IsObject()) {
642
679
  auto kwargs_obj = params.Get("chat_template_kwargs").As<Napi::Object>();
@@ -873,7 +910,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
873
910
  auto enable_thinking = get_option<bool>(options, "enable_thinking", true);
874
911
  auto add_generation_prompt = get_option<bool>(options, "add_generation_prompt", true);
875
912
  auto now_str = get_option<std::string>(options, "now", "");
876
-
913
+
877
914
  std::map<std::string, std::string> chat_template_kwargs;
878
915
  if (options.Has("chat_template_kwargs") && options.Get("chat_template_kwargs").IsObject()) {
879
916
  auto kwargs_obj = options.Get("chat_template_kwargs").As<Napi::Object>();
@@ -886,7 +923,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
886
923
  }
887
924
 
888
925
  common_chat_params chatParams;
889
-
926
+
890
927
  try {
891
928
  chatParams = _rn_ctx->getFormattedChatWithJinja(
892
929
  json_stringify(messages), chat_template,
@@ -1043,7 +1080,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
1043
1080
  }
1044
1081
  auto text = info[0].ToString().Utf8Value();
1045
1082
  std::vector<std::string> media_paths;
1046
-
1083
+
1047
1084
  if (info.Length() >= 2 && info[1].IsArray()) {
1048
1085
  // Direct array format: tokenize(text, [media_paths])
1049
1086
  auto media_paths_array = info[1].As<Napi::Array>();
@@ -1051,7 +1088,7 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
1051
1088
  media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
1052
1089
  }
1053
1090
  }
1054
-
1091
+
1055
1092
  auto *worker = new TokenizeWorker(info, _rn_ctx, text, media_paths);
1056
1093
  worker->Queue();
1057
1094
  return worker->Promise();
@@ -1072,7 +1109,7 @@ Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
1072
1109
  for (size_t i = 0; i < tokens.Length(); i++) {
1073
1110
  token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
1074
1111
  }
1075
-
1112
+
1076
1113
  auto *worker = new DetokenizeWorker(info, _rn_ctx, token_ids);
1077
1114
  worker->Queue();
1078
1115
  return worker->Promise();
@@ -1112,16 +1149,16 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
1112
1149
  Napi::TypeError::New(env, "Context is disposed")
1113
1150
  .ThrowAsJavaScriptException();
1114
1151
  }
1115
-
1152
+
1116
1153
  auto query = info[0].ToString().Utf8Value();
1117
1154
  auto documents_array = info[1].As<Napi::Array>();
1118
-
1155
+
1119
1156
  // Convert documents array to vector
1120
1157
  std::vector<std::string> documents;
1121
1158
  for (size_t i = 0; i < documents_array.Length(); i++) {
1122
1159
  documents.push_back(documents_array.Get(i).ToString().Utf8Value());
1123
1160
  }
1124
-
1161
+
1125
1162
  auto options = Napi::Object::New(env);
1126
1163
  if (info.Length() >= 3 && info[2].IsObject()) {
1127
1164
  options = info[2].As<Napi::Object>();
@@ -1130,7 +1167,7 @@ Napi::Value LlamaContext::Rerank(const Napi::CallbackInfo &info) {
1130
1167
  common_params rerankParams;
1131
1168
  rerankParams.embedding = true;
1132
1169
  rerankParams.embd_normalize = get_option<int32_t>(options, "normalize", -1);
1133
-
1170
+
1134
1171
  auto *worker = new RerankWorker(info, _rn_ctx, query, documents, rerankParams);
1135
1172
  worker->Queue();
1136
1173
  return worker->Promise();
@@ -1379,13 +1416,13 @@ LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
1379
1416
  }
1380
1417
  auto text = info[1].ToString().Utf8Value();
1381
1418
  auto speaker_json = info[0].IsString() ? info[0].ToString().Utf8Value() : "";
1382
-
1419
+
1383
1420
  if (!_rn_ctx->tts_wrapper) {
1384
1421
  Napi::Error::New(env, "Vocoder not initialized")
1385
1422
  .ThrowAsJavaScriptException();
1386
1423
  return env.Undefined();
1387
1424
  }
1388
-
1425
+
1389
1426
  auto result_data = _rn_ctx->tts_wrapper->getFormattedAudioCompletion(_rn_ctx, speaker_json, text);
1390
1427
  Napi::Object result = Napi::Object::New(env);
1391
1428
  result.Set("prompt", Napi::String::New(env, result_data.prompt));
@@ -1406,13 +1443,13 @@ LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
1406
1443
  return env.Undefined();
1407
1444
  }
1408
1445
  auto text = info[0].ToString().Utf8Value();
1409
-
1446
+
1410
1447
  if (!_rn_ctx->tts_wrapper) {
1411
1448
  Napi::Error::New(env, "Vocoder not initialized")
1412
1449
  .ThrowAsJavaScriptException();
1413
1450
  return env.Undefined();
1414
1451
  }
1415
-
1452
+
1416
1453
  auto result = _rn_ctx->tts_wrapper->getAudioCompletionGuideTokens(_rn_ctx, text);
1417
1454
  auto tokens = Napi::Int32Array::New(env, result.size());
1418
1455
  memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
@@ -1448,7 +1485,7 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
1448
1485
  .ThrowAsJavaScriptException();
1449
1486
  return env.Undefined();
1450
1487
  }
1451
-
1488
+
1452
1489
  auto *worker = new DecodeAudioTokenWorker(info, _rn_ctx, tokens);
1453
1490
  worker->Queue();
1454
1491
  return worker->Promise();
@@ -31,6 +31,7 @@ public:
31
31
  private:
32
32
  Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
33
33
  Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
34
+ Napi::Value GetUsedDevices(const Napi::CallbackInfo &info);
34
35
  Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
35
36
  Napi::Value Completion(const Napi::CallbackInfo &info);
36
37
  void StopCompletion(const Napi::CallbackInfo &info);
@@ -69,6 +70,7 @@ private:
69
70
  void CancelRequest(const Napi::CallbackInfo &info);
70
71
 
71
72
  std::string _info;
73
+ std::vector<std::string> _used_devices;
72
74
  Napi::Object _meta;
73
75
  LlamaCompletionWorker *_wip = nullptr;
74
76
 
@@ -50,6 +50,8 @@ add_library(${TARGET} STATIC
50
50
  base64.hpp
51
51
  chat-parser.cpp
52
52
  chat-parser.h
53
+ chat-parser-xml-toolcall.h
54
+ chat-parser-xml-toolcall.cpp
53
55
  chat.cpp
54
56
  chat.h
55
57
  common.cpp