@fugood/llama.node 0.3.8 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +52 -8
  19. package/lib/index.ts +3 -1
  20. package/package.json +8 -1
  21. package/src/LlamaCompletionWorker.cpp +33 -6
  22. package/src/LlamaCompletionWorker.h +3 -1
  23. package/src/LlamaContext.cpp +387 -28
  24. package/src/LlamaContext.h +5 -0
  25. package/src/common.hpp +19 -2
  26. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  27. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  28. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  29. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  30. package/src/llama.cpp/CMakeLists.txt +10 -19
  31. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  32. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  33. package/src/llama.cpp/common/arg.cpp +66 -16
  34. package/src/llama.cpp/common/chat-template.hpp +515 -0
  35. package/src/llama.cpp/common/chat.cpp +966 -0
  36. package/src/llama.cpp/common/chat.hpp +52 -0
  37. package/src/llama.cpp/common/common.cpp +159 -36
  38. package/src/llama.cpp/common/common.h +56 -14
  39. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  40. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  41. package/src/llama.cpp/common/llguidance.cpp +270 -0
  42. package/src/llama.cpp/common/log.cpp +1 -10
  43. package/src/llama.cpp/common/log.h +10 -0
  44. package/src/llama.cpp/common/minja.hpp +2868 -0
  45. package/src/llama.cpp/common/sampling.cpp +22 -1
  46. package/src/llama.cpp/common/sampling.h +3 -0
  47. package/src/llama.cpp/docs/build.md +54 -9
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  49. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  50. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  51. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  52. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  53. package/src/llama.cpp/examples/llava/clip.h +2 -0
  54. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  55. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  56. package/src/llama.cpp/examples/main/main.cpp +26 -25
  57. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  59. package/src/llama.cpp/examples/run/run.cpp +224 -69
  60. package/src/llama.cpp/examples/server/server.cpp +252 -81
  61. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  62. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  63. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  65. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  66. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  67. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  68. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  71. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  73. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  74. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  76. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  77. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  79. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  81. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  82. package/src/llama.cpp/include/llama.h +14 -1
  83. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  85. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  87. package/src/llama.cpp/src/llama-arch.h +3 -1
  88. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  89. package/src/llama.cpp/src/llama-chat.h +1 -0
  90. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  91. package/src/llama.cpp/src/llama-grammar.h +22 -1
  92. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  93. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  94. package/src/llama.cpp/src/llama-model.cpp +76 -6
  95. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  96. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  97. package/src/llama.cpp/src/llama.cpp +181 -123
  98. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  99. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  100. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  101. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  102. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  103. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  104. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  105. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
@@ -1,6 +1,8 @@
1
1
  #include "ggml.h"
2
2
  #include "gguf.h"
3
3
  #include "llama-impl.h"
4
+ #include "json.hpp"
5
+ #include "json-schema-to-grammar.h"
4
6
  #include "LlamaContext.h"
5
7
  #include "DetokenizeWorker.h"
6
8
  #include "DisposeWorker.h"
@@ -10,6 +12,8 @@
10
12
  #include "SaveSessionWorker.h"
11
13
  #include "TokenizeWorker.h"
12
14
 
15
+ using json = nlohmann::ordered_json;
16
+
13
17
  // loadModelInfo(path: string): object
14
18
  Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
15
19
  Napi::Env env = info.Env();
@@ -103,6 +107,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
103
107
  InstanceMethod<&LlamaContext::LoadSession>(
104
108
  "loadSession",
105
109
  static_cast<napi_property_attributes>(napi_enumerable)),
110
+ InstanceMethod<&LlamaContext::ApplyLoraAdapters>(
111
+ "applyLoraAdapters",
112
+ static_cast<napi_property_attributes>(napi_enumerable)),
113
+ InstanceMethod<&LlamaContext::RemoveLoraAdapters>(
114
+ "removeLoraAdapters",
115
+ static_cast<napi_property_attributes>(napi_enumerable)),
116
+ InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
117
+ "getLoadedLoraAdapters",
118
+ static_cast<napi_property_attributes>(napi_enumerable)),
106
119
  InstanceMethod<&LlamaContext::Release>(
107
120
  "release", static_cast<napi_property_attributes>(napi_enumerable)),
108
121
  StaticMethod<&LlamaContext::ModelInfo>(
@@ -167,6 +180,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
167
180
  params.warmup = false;
168
181
  }
169
182
 
183
+ params.chat_template = get_option<std::string>(options, "chat_template", "");
184
+
170
185
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
171
186
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
172
187
  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
@@ -202,8 +217,52 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
202
217
  .ThrowAsJavaScriptException();
203
218
  }
204
219
 
220
+ auto ctx = sess->context();
221
+ auto model = sess->model();
222
+
223
+ std::vector<common_adapter_lora_info> lora;
224
+ auto lora_path = get_option<std::string>(options, "lora", "");
225
+ auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
226
+ if (lora_path != "") {
227
+ common_adapter_lora_info la;
228
+ la.path = lora_path;
229
+ la.scale = lora_scaled;
230
+ la.ptr = llama_adapter_lora_init(model, lora_path.c_str());
231
+ if (la.ptr == nullptr) {
232
+ Napi::TypeError::New(env, "Failed to load lora adapter")
233
+ .ThrowAsJavaScriptException();
234
+ }
235
+ lora.push_back(la);
236
+ }
237
+
238
+ if (options.Has("lora_list") && options.Get("lora_list").IsArray()) {
239
+ auto lora_list = options.Get("lora_list").As<Napi::Array>();
240
+ if (lora_list != nullptr) {
241
+ int lora_list_size = lora_list.Length();
242
+ for (int i = 0; i < lora_list_size; i++) {
243
+ auto lora_adapter = lora_list.Get(i).As<Napi::Object>();
244
+ auto path = lora_adapter.Get("path").ToString();
245
+ if (path != nullptr) {
246
+ common_adapter_lora_info la;
247
+ la.path = path;
248
+ la.scale = lora_adapter.Get("scaled").ToNumber().FloatValue();
249
+ la.ptr = llama_adapter_lora_init(model, path.Utf8Value().c_str());
250
+ if (la.ptr == nullptr) {
251
+ Napi::TypeError::New(env, "Failed to load lora adapter")
252
+ .ThrowAsJavaScriptException();
253
+ }
254
+ lora.push_back(la);
255
+ }
256
+ }
257
+ }
258
+ }
259
+ common_set_adapter_lora(ctx, lora);
260
+ _lora = lora;
261
+
205
262
  _sess = sess;
206
263
  _info = common_params_get_system_info(params);
264
+
265
+ _templates = common_chat_templates_from_model(model, params.chat_template);
207
266
  }
208
267
 
209
268
  // getSystemInfo(): string
@@ -211,17 +270,12 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
211
270
  return Napi::String::New(info.Env(), _info);
212
271
  }
213
272
 
214
- bool validateModelChatTemplate(const struct llama_model * model) {
215
- std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
216
- std::string template_key = "tokenizer.chat_template";
217
- int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
218
- if (res >= 0) {
219
- llama_chat_message chat[] = {{"user", "test"}};
220
- const char * tmpl = llama_model_chat_template(model);
221
- int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
222
- return chat_res > 0;
223
- }
224
- return res > 0;
273
+ bool validateModelChatTemplate(const struct llama_model * model, const bool use_jinja, const char * name) {
274
+ const char * tmpl = llama_model_chat_template(model, name);
275
+ if (tmpl == nullptr) {
276
+ return false;
277
+ }
278
+ return common_chat_verify_template(tmpl, use_jinja);
225
279
  }
226
280
 
227
281
  // getModelInfo(): object
@@ -235,29 +289,204 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
235
289
  for (int i = 0; i < count; i++) {
236
290
  char key[256];
237
291
  llama_model_meta_key_by_index(model, i, key, sizeof(key));
238
- char val[2048];
292
+ char val[4096];
239
293
  llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
240
294
 
241
295
  metadata.Set(key, val);
242
296
  }
243
297
  Napi::Object details = Napi::Object::New(info.Env());
244
298
  details.Set("desc", desc);
299
+ details.Set("nEmbd", llama_model_n_embd(model));
245
300
  details.Set("nParams", llama_model_n_params(model));
246
301
  details.Set("size", llama_model_size(model));
247
- details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
302
+
303
+ Napi::Object chatTemplates = Napi::Object::New(info.Env());
304
+ chatTemplates.Set("llamaChat", validateModelChatTemplate(model, false, ""));
305
+ Napi::Object minja = Napi::Object::New(info.Env());
306
+ minja.Set("default", validateModelChatTemplate(model, true, ""));
307
+ Napi::Object defaultCaps = Napi::Object::New(info.Env());
308
+ defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
309
+ defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
310
+ defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
311
+ defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
312
+ defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
313
+ defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
314
+ minja.Set("defaultCaps", defaultCaps);
315
+ Napi::Object toolUse = Napi::Object::New(info.Env());
316
+ toolUse.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
317
+ if (_templates.template_tool_use) {
318
+ Napi::Object toolUseCaps = Napi::Object::New(info.Env());
319
+ toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
320
+ toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
321
+ toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
322
+ toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
323
+ toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
324
+ toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
325
+ toolUse.Set("toolUseCaps", toolUseCaps);
326
+ }
327
+ minja.Set("toolUse", toolUse);
328
+ chatTemplates.Set("minja", minja);
329
+ details.Set("chatTemplates", chatTemplates);
330
+
248
331
  details.Set("metadata", metadata);
249
332
  return details;
250
333
  }
251
334
 
252
- // getFormattedChat(messages: [{ role: string, content: string }]): string
335
+ common_chat_params getFormattedChatWithJinja(
336
+ const struct llama_model * model,
337
+ const common_chat_templates &templates,
338
+ const std::string &messages,
339
+ const std::string &chat_template,
340
+ const std::string &json_schema,
341
+ const std::string &tools,
342
+ const bool &parallel_tool_calls,
343
+ const std::string &tool_choice
344
+ ) {
345
+ common_chat_inputs inputs;
346
+ inputs.messages = json::parse(messages);
347
+ auto useTools = !tools.empty();
348
+ if (useTools) {
349
+ inputs.tools = json::parse(tools);
350
+ }
351
+ inputs.parallel_tool_calls = parallel_tool_calls;
352
+ if (!tool_choice.empty()) {
353
+ inputs.tool_choice = tool_choice;
354
+ }
355
+ if (!json_schema.empty()) {
356
+ inputs.json_schema = json::parse(json_schema);
357
+ }
358
+ inputs.stream = true;
359
+
360
+ // If chat_template is provided, create new one and use it (probably slow)
361
+ if (!chat_template.empty()) {
362
+ auto tmp = common_chat_templates_from_model(model, chat_template);
363
+ const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
364
+ if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
365
+ inputs.parallel_tool_calls = false;
366
+ }
367
+ return common_chat_params_init(*template_ptr, inputs);
368
+ } else {
369
+ const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
370
+ if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
371
+ inputs.parallel_tool_calls = false;
372
+ }
373
+ return common_chat_params_init(*template_ptr, inputs);
374
+ }
375
+ }
376
+
377
+ std::string getFormattedChat(
378
+ const struct llama_model * model,
379
+ const common_chat_templates &templates,
380
+ const std::string &messages,
381
+ const std::string &chat_template
382
+ ) {
383
+ auto chat_json = json::parse(messages);
384
+
385
+ // Handle regular chat without tools
386
+ std::vector<common_chat_msg> chat_msgs;
387
+ for (const auto &msg : chat_json) {
388
+ chat_msgs.push_back({
389
+ msg["role"].get<std::string>(),
390
+ msg["content"].get<std::string>()
391
+ });
392
+ }
393
+
394
+ // If chat_template is provided, create new one and use it (probably slow)
395
+ if (!chat_template.empty()) {
396
+ auto tmp = common_chat_templates_from_model(model, chat_template);
397
+ return common_chat_apply_template(
398
+ *tmp.template_default,
399
+ chat_msgs,
400
+ true,
401
+ false
402
+ );
403
+ } else {
404
+ return common_chat_apply_template(
405
+ *templates.template_default,
406
+ chat_msgs,
407
+ true,
408
+ false
409
+ );
410
+ }
411
+ }
412
+
413
+ // getFormattedChat(
414
+ // messages: [{ role: string, content: string }],
415
+ // chat_template: string,
416
+ // params: { jinja: boolean, json_schema: string, tools: string, parallel_tool_calls: boolean, tool_choice: string }
417
+ // ): object | string
253
418
  Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
254
419
  Napi::Env env = info.Env();
255
420
  if (info.Length() < 1 || !info[0].IsArray()) {
256
421
  Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
257
422
  }
258
- auto messages = info[0].As<Napi::Array>();
259
- auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
260
- return Napi::String::New(env, formatted);
423
+ auto messages = json_stringify(info[0].As<Napi::Array>());
424
+ printf("messages: %s\n", messages.c_str());
425
+ auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
426
+
427
+ auto has_params = info.Length() >= 2;
428
+ auto params = has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
429
+
430
+ if (get_option<bool>(params, "jinja", false)) {
431
+ std::string json_schema_str = "";
432
+ if (!is_nil(params.Get("response_format"))) {
433
+ auto response_format = params.Get("response_format").As<Napi::Object>();
434
+ auto response_format_type = get_option<std::string>(response_format, "type", "text");
435
+ if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
436
+ auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
437
+ json_schema_str = json_schema.Has("schema") ?
438
+ json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
439
+ "{}";
440
+ } else if (response_format_type == "json_object") {
441
+ json_schema_str = response_format.Has("schema") ?
442
+ json_stringify(response_format.Get("schema").As<Napi::Object>()) :
443
+ "{}";
444
+ }
445
+ }
446
+ auto tools_str = params.Has("tools") ?
447
+ json_stringify(params.Get("tools").As<Napi::Array>()) :
448
+ "";
449
+ auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
450
+ auto tool_choice = get_option<std::string>(params, "tool_choice", "");
451
+
452
+ auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
453
+
454
+ Napi::Object result = Napi::Object::New(env);
455
+ result.Set("prompt", chatParams.prompt.get<std::string>());
456
+ // chat_format: int
457
+ result.Set("chat_format", static_cast<int>(chatParams.format));
458
+ // grammar: string
459
+ result.Set("grammar", chatParams.grammar);
460
+ // grammar_lazy: boolean
461
+ result.Set("grammea_lazy", chatParams.grammar_lazy);
462
+ // grammar_triggers: [{ word: string, at_start: boolean }]
463
+ Napi::Array grammar_triggers = Napi::Array::New(env);
464
+ for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
465
+ const auto & trigger = chatParams.grammar_triggers[i];
466
+ Napi::Object triggerObj = Napi::Object::New(env);
467
+ triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
468
+ triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
469
+ grammar_triggers.Set(i, triggerObj);
470
+ }
471
+ result.Set("grammar_triggers", grammar_triggers);
472
+ // preserved_tokens: string[]
473
+ Napi::Array preserved_tokens = Napi::Array::New(env);
474
+ for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
475
+ preserved_tokens.Set(i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
476
+ }
477
+ result.Set("preserved_tokens", preserved_tokens);
478
+ // additional_stops: string[]
479
+ Napi::Array additional_stops = Napi::Array::New(env);
480
+ for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
481
+ additional_stops.Set(i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
482
+ }
483
+ result.Set("additional_stops", additional_stops);
484
+
485
+ return result;
486
+ } else {
487
+ auto formatted = getFormattedChat(_sess->model(), _templates, messages, chat_template);
488
+ return Napi::String::New(env, formatted);
489
+ }
261
490
  }
262
491
 
263
492
  // completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
@@ -280,11 +509,101 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
280
509
  }
281
510
  auto options = info[0].As<Napi::Object>();
282
511
 
512
+ std::vector<std::string> stop_words;
513
+ if (options.Has("stop") && options.Get("stop").IsArray()) {
514
+ auto stop_words_array = options.Get("stop").As<Napi::Array>();
515
+ for (size_t i = 0; i < stop_words_array.Length(); i++) {
516
+ stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
517
+ }
518
+ }
519
+
520
+ int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
521
+
283
522
  common_params params = _sess->params();
523
+ auto grammar_from_params = get_option<std::string>(options, "grammar", "");
524
+ auto has_grammar_set = !grammar_from_params.empty();
525
+ if (has_grammar_set) {
526
+ params.sampling.grammar = grammar_from_params;
527
+ }
528
+
529
+ std::string json_schema_str = "";
530
+ if (options.Has("response_format")) {
531
+ auto response_format = options.Get("response_format").As<Napi::Object>();
532
+ auto response_format_type = get_option<std::string>(response_format, "type", "text");
533
+ if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
534
+ auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
535
+ json_schema_str = json_schema.Has("schema") ?
536
+ json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
537
+ "{}";
538
+ } else if (response_format_type == "json_object") {
539
+ json_schema_str = response_format.Has("schema") ?
540
+ json_stringify(response_format.Get("schema").As<Napi::Object>()) :
541
+ "{}";
542
+ }
543
+ }
544
+
284
545
  if (options.Has("messages") && options.Get("messages").IsArray()) {
285
546
  auto messages = options.Get("messages").As<Napi::Array>();
286
- auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
287
- params.prompt = formatted;
547
+ auto chat_template = get_option<std::string>(options, "chat_template", "");
548
+ auto jinja = get_option<bool>(options, "jinja", false);
549
+ if (jinja) {
550
+ auto tools_str = options.Has("tools") ?
551
+ json_stringify(options.Get("tools").As<Napi::Array>()) :
552
+ "";
553
+ auto parallel_tool_calls = get_option<bool>(options, "parallel_tool_calls", false);
554
+ auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
555
+
556
+ auto chatParams = getFormattedChatWithJinja(
557
+ _sess->model(),
558
+ _templates,
559
+ json_stringify(messages),
560
+ chat_template,
561
+ json_schema_str,
562
+ tools_str,
563
+ parallel_tool_calls,
564
+ tool_choice
565
+ );
566
+
567
+ params.prompt = chatParams.prompt.get<std::string>();
568
+
569
+ chat_format = chatParams.format;
570
+
571
+ if (!has_grammar_set) {
572
+ // grammar param always wins jinja template & json_schema
573
+ params.sampling.grammar = chatParams.grammar;
574
+ params.sampling.grammar_lazy = chatParams.grammar_lazy;
575
+
576
+ for (const auto & trigger : chatParams.grammar_triggers) {
577
+ auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
578
+ if (ids.size() == 1) {
579
+ params.sampling.grammar_trigger_tokens.push_back(ids[0]);
580
+ params.sampling.preserved_tokens.insert(ids[0]);
581
+ continue;
582
+ }
583
+ params.sampling.grammar_trigger_words.push_back(trigger);
584
+ }
585
+ has_grammar_set = true;
586
+ }
587
+
588
+ for (const auto & token : chatParams.preserved_tokens) {
589
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
590
+ if (ids.size() == 1) {
591
+ params.sampling.preserved_tokens.insert(ids[0]);
592
+ }
593
+ }
594
+
595
+ for (const auto & stop : chatParams.additional_stops) {
596
+ stop_words.push_back(stop);
597
+ }
598
+ } else {
599
+ auto formatted = getFormattedChat(
600
+ _sess->model(),
601
+ _templates,
602
+ json_stringify(messages),
603
+ chat_template
604
+ );
605
+ params.prompt = formatted;
606
+ }
288
607
  } else {
289
608
  params.prompt = get_option<std::string>(options, "prompt", "");
290
609
  }
@@ -292,6 +611,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
292
611
  Napi::TypeError::New(env, "Prompt is required")
293
612
  .ThrowAsJavaScriptException();
294
613
  }
614
+
615
+ if (!has_grammar_set && !json_schema_str.empty()) {
616
+ params.sampling.grammar = json_schema_to_grammar(json::parse(json_schema_str));
617
+ }
618
+
295
619
  params.n_predict = get_option<int32_t>(options, "n_predict", -1);
296
620
  params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
297
621
  params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
@@ -318,16 +642,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
318
642
  params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
319
643
  params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
320
644
  params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
321
- params.sampling.grammar = get_option<std::string>(options, "grammar", "");
322
645
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
323
646
  params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
324
- std::vector<std::string> stop_words;
325
- if (options.Has("stop") && options.Get("stop").IsArray()) {
326
- auto stop_words_array = options.Get("stop").As<Napi::Array>();
327
- for (size_t i = 0; i < stop_words_array.Length(); i++) {
328
- stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
329
- }
330
- }
331
647
 
332
648
  Napi::Function callback;
333
649
  if (info.Length() >= 2) {
@@ -335,7 +651,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
335
651
  }
336
652
 
337
653
  auto *worker =
338
- new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
654
+ new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
339
655
  worker->Queue();
340
656
  _wip = worker;
341
657
  worker->onComplete([this]() { _wip = nullptr; });
@@ -451,6 +767,49 @@ Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
451
767
  return worker->Promise();
452
768
  }
453
769
 
770
+ // applyLoraAdapters(lora_adapters: [{ path: string, scaled: number }]): void
771
+ void LlamaContext::ApplyLoraAdapters(const Napi::CallbackInfo &info) {
772
+ Napi::Env env = info.Env();
773
+ std::vector<common_adapter_lora_info> lora;
774
+ auto lora_adapters = info[0].As<Napi::Array>();
775
+ for (size_t i = 0; i < lora_adapters.Length(); i++) {
776
+ auto lora_adapter = lora_adapters.Get(i).As<Napi::Object>();
777
+ auto path = lora_adapter.Get("path").ToString().Utf8Value();
778
+ auto scaled = lora_adapter.Get("scaled").ToNumber().FloatValue();
779
+ common_adapter_lora_info la;
780
+ la.path = path;
781
+ la.scale = scaled;
782
+ la.ptr = llama_adapter_lora_init(_sess->model(), path.c_str());
783
+ if (la.ptr == nullptr) {
784
+ Napi::TypeError::New(env, "Failed to load lora adapter")
785
+ .ThrowAsJavaScriptException();
786
+ }
787
+ lora.push_back(la);
788
+ }
789
+ common_set_adapter_lora(_sess->context(), lora);
790
+ _lora = lora;
791
+ }
792
+
793
+ // removeLoraAdapters(): void
794
+ void LlamaContext::RemoveLoraAdapters(const Napi::CallbackInfo &info) {
795
+ _lora.clear();
796
+ common_set_adapter_lora(_sess->context(), _lora);
797
+ }
798
+
799
+ // getLoadedLoraAdapters(): Promise<{ count, lora_adapters: [{ path: string,
800
+ // scaled: number }] }>
801
+ Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info) {
802
+ Napi::Env env = info.Env();
803
+ Napi::Array lora_adapters = Napi::Array::New(env, _lora.size());
804
+ for (size_t i = 0; i < _lora.size(); i++) {
805
+ Napi::Object lora_adapter = Napi::Object::New(env);
806
+ lora_adapter.Set("path", _lora[i].path);
807
+ lora_adapter.Set("scaled", _lora[i].scale);
808
+ lora_adapters.Set(i, lora_adapter);
809
+ }
810
+ return lora_adapters;
811
+ }
812
+
454
813
  // release(): Promise<void>
455
814
  Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
456
815
  auto env = info.Env();
@@ -19,10 +19,15 @@ private:
19
19
  Napi::Value Embedding(const Napi::CallbackInfo &info);
20
20
  Napi::Value SaveSession(const Napi::CallbackInfo &info);
21
21
  Napi::Value LoadSession(const Napi::CallbackInfo &info);
22
+ void ApplyLoraAdapters(const Napi::CallbackInfo &info);
23
+ void RemoveLoraAdapters(const Napi::CallbackInfo &info);
24
+ Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
22
25
  Napi::Value Release(const Napi::CallbackInfo &info);
23
26
 
24
27
  std::string _info;
25
28
  Napi::Object _meta;
26
29
  LlamaSessionPtr _sess = nullptr;
30
+ common_chat_templates _templates;
31
+ std::vector<common_adapter_lora_info> _lora;
27
32
  LlamaCompletionWorker *_wip = nullptr;
28
33
  };
package/src/common.hpp CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  #include "common/common.h"
4
4
  #include "common/sampling.h"
5
+ #include "chat.hpp"
6
+ #include "chat-template.hpp"
5
7
  #include "llama.h"
6
8
  #include <memory>
7
9
  #include <mutex>
@@ -15,11 +17,26 @@ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
15
17
  LlamaCppSampling;
16
18
  typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
17
19
 
20
+ static bool is_nil(const Napi::Value &value) {
21
+ return value.IsNull() || value.IsUndefined();
22
+ }
23
+
24
+ static std::string json_stringify(const Napi::Object &obj) {
25
+ Napi::Env env = obj.Env();
26
+ Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
27
+ Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
28
+ return stringify.Call(json, { obj }).As<Napi::String>().ToString();
29
+ }
30
+
31
+ static void console_log(Napi::Env env, const std::string& message) {
32
+ Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
33
+ consoleLog.Call({ Napi::String::New(env, message) });
34
+ }
35
+
18
36
  template <typename T>
19
37
  constexpr T get_option(const Napi::Object &options, const std::string &name,
20
38
  const T default_value) {
21
- if (options.Has(name) && !options.Get(name).IsUndefined() &&
22
- !options.Get(name).IsNull()) {
39
+ if (options.Has(name) && !is_nil(options.Get(name))) {
23
40
  if constexpr (std::is_same<T, std::string>::value) {
24
41
  return options.Get(name).ToString().operator T();
25
42
  } else if constexpr (std::is_same<T, int32_t>::value ||