@fugood/llama.node 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +47 -8
  19. package/lib/index.js +21 -1
  20. package/lib/index.ts +31 -1
  21. package/package.json +12 -3
  22. package/src/LlamaCompletionWorker.cpp +33 -6
  23. package/src/LlamaCompletionWorker.h +3 -1
  24. package/src/LlamaContext.cpp +336 -28
  25. package/src/LlamaContext.h +2 -0
  26. package/src/common.hpp +19 -2
  27. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  29. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  31. package/src/llama.cpp/CMakeLists.txt +10 -19
  32. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  33. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  34. package/src/llama.cpp/common/arg.cpp +66 -16
  35. package/src/llama.cpp/common/chat-template.hpp +515 -0
  36. package/src/llama.cpp/common/chat.cpp +966 -0
  37. package/src/llama.cpp/common/chat.hpp +52 -0
  38. package/src/llama.cpp/common/common.cpp +159 -36
  39. package/src/llama.cpp/common/common.h +56 -14
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  41. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  42. package/src/llama.cpp/common/llguidance.cpp +270 -0
  43. package/src/llama.cpp/common/log.cpp +1 -10
  44. package/src/llama.cpp/common/log.h +10 -0
  45. package/src/llama.cpp/common/minja.hpp +2868 -0
  46. package/src/llama.cpp/common/sampling.cpp +22 -1
  47. package/src/llama.cpp/common/sampling.h +3 -0
  48. package/src/llama.cpp/docs/build.md +54 -9
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  50. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  51. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  52. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  53. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  54. package/src/llama.cpp/examples/llava/clip.h +2 -0
  55. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  56. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  57. package/src/llama.cpp/examples/main/main.cpp +26 -25
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  59. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  60. package/src/llama.cpp/examples/run/run.cpp +224 -69
  61. package/src/llama.cpp/examples/server/server.cpp +252 -81
  62. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  63. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  64. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  65. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  66. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  67. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  68. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  71. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  73. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  74. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  75. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  77. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  81. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  82. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  83. package/src/llama.cpp/include/llama.h +14 -1
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  85. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  86. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  87. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  88. package/src/llama.cpp/src/llama-arch.h +3 -1
  89. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  90. package/src/llama.cpp/src/llama-chat.h +1 -0
  91. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  92. package/src/llama.cpp/src/llama-grammar.h +22 -1
  93. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  94. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  95. package/src/llama.cpp/src/llama-model.cpp +76 -6
  96. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  97. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  98. package/src/llama.cpp/src/llama.cpp +181 -123
  99. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  100. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  101. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  102. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  103. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  104. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  105. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  106. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
@@ -1,6 +1,8 @@
1
1
  #include "ggml.h"
2
2
  #include "gguf.h"
3
3
  #include "llama-impl.h"
4
+ #include "json.hpp"
5
+ #include "json-schema-to-grammar.h"
4
6
  #include "LlamaContext.h"
5
7
  #include "DetokenizeWorker.h"
6
8
  #include "DisposeWorker.h"
@@ -10,6 +12,8 @@
10
12
  #include "SaveSessionWorker.h"
11
13
  #include "TokenizeWorker.h"
12
14
 
15
+ using json = nlohmann::ordered_json;
16
+
13
17
  // loadModelInfo(path: string): object
14
18
  Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
15
19
  Napi::Env env = info.Env();
@@ -116,6 +120,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
116
120
  "release", static_cast<napi_property_attributes>(napi_enumerable)),
117
121
  StaticMethod<&LlamaContext::ModelInfo>(
118
122
  "loadModelInfo",
123
+ static_cast<napi_property_attributes>(napi_enumerable)),
124
+ StaticMethod<&LlamaContext::ToggleNativeLog>(
125
+ "toggleNativeLog",
119
126
  static_cast<napi_property_attributes>(napi_enumerable))});
120
127
  Napi::FunctionReference *constructor = new Napi::FunctionReference();
121
128
  *constructor = Napi::Persistent(func);
@@ -176,6 +183,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
176
183
  params.warmup = false;
177
184
  }
178
185
 
186
+ params.chat_template = get_option<std::string>(options, "chat_template", "");
187
+
179
188
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
180
189
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
181
190
  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
@@ -255,6 +264,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
255
264
 
256
265
  _sess = sess;
257
266
  _info = common_params_get_system_info(params);
267
+
268
+ _templates = common_chat_templates_from_model(model, params.chat_template);
258
269
  }
259
270
 
260
271
  // getSystemInfo(): string
@@ -262,17 +273,52 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
262
273
  return Napi::String::New(info.Env(), _info);
263
274
  }
264
275
 
265
- bool validateModelChatTemplate(const struct llama_model * model) {
266
- std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
267
- std::string template_key = "tokenizer.chat_template";
268
- int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
269
- if (res >= 0) {
270
- llama_chat_message chat[] = {{"user", "test"}};
271
- const char * tmpl = llama_model_chat_template(model);
272
- int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
273
- return chat_res > 0;
274
- }
275
- return res > 0;
276
+ bool validateModelChatTemplate(const struct llama_model * model, const bool use_jinja, const char * name) {
277
+ const char * tmpl = llama_model_chat_template(model, name);
278
+ if (tmpl == nullptr) {
279
+ return false;
280
+ }
281
+ return common_chat_verify_template(tmpl, use_jinja);
282
+ }
283
+
284
+ static Napi::FunctionReference _log_callback;
285
+
286
+ // toggleNativeLog(enable: boolean, callback: (log: string) => void): void
287
+ void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
288
+ bool enable = info[0].ToBoolean().Value();
289
+ if (enable) {
290
+ _log_callback.Reset(info[1].As<Napi::Function>());
291
+
292
+ llama_log_set([](ggml_log_level level, const char * text, void * user_data) {
293
+ llama_log_callback_default(level, text, user_data);
294
+
295
+ std::string level_str = "";
296
+ if (level == GGML_LOG_LEVEL_ERROR) {
297
+ level_str = "error";
298
+ } else if (level == GGML_LOG_LEVEL_INFO) {
299
+ level_str = "info";
300
+ } else if (level == GGML_LOG_LEVEL_WARN) {
301
+ level_str = "warn";
302
+ }
303
+
304
+ if (_log_callback.IsEmpty()) {
305
+ return;
306
+ }
307
+ try {
308
+ Napi::Env env = _log_callback.Env();
309
+ Napi::HandleScope scope(env);
310
+ _log_callback.Call({
311
+ Napi::String::New(env, level_str),
312
+ Napi::String::New(env, text)
313
+ });
314
+ } catch (const std::exception &e) {
315
+ // printf("Error calling log callback: %s\n", e.what());
316
+ }
317
+ }, nullptr);
318
+ } else {
319
+ _log_callback.Reset();
320
+ llama_log_set(llama_log_callback_default, nullptr);
321
+ }
276
322
  }
277
323
 
278
324
  // getModelInfo(): object
@@ -286,7 +332,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
286
332
  for (int i = 0; i < count; i++) {
287
333
  char key[256];
288
334
  llama_model_meta_key_by_index(model, i, key, sizeof(key));
289
- char val[2048];
335
+ char val[4096];
290
336
  llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
291
337
 
292
338
  metadata.Set(key, val);
@@ -296,20 +342,195 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
296
342
  details.Set("nEmbd", llama_model_n_embd(model));
297
343
  details.Set("nParams", llama_model_n_params(model));
298
344
  details.Set("size", llama_model_size(model));
299
- details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
345
+
346
+ Napi::Object chatTemplates = Napi::Object::New(info.Env());
347
+ chatTemplates.Set("llamaChat", validateModelChatTemplate(model, false, ""));
348
+ Napi::Object minja = Napi::Object::New(info.Env());
349
+ minja.Set("default", validateModelChatTemplate(model, true, ""));
350
+ Napi::Object defaultCaps = Napi::Object::New(info.Env());
351
+ defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
352
+ defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
353
+ defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
354
+ defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
355
+ defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
356
+ defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
357
+ minja.Set("defaultCaps", defaultCaps);
358
+ minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
359
+ if (_templates.template_tool_use) {
360
+ Napi::Object toolUseCaps = Napi::Object::New(info.Env());
361
+ toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
362
+ toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
363
+ toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
364
+ toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
365
+ toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
366
+ toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
367
+ minja.Set("toolUseCaps", toolUseCaps);
368
+ }
369
+ chatTemplates.Set("minja", minja);
370
+ details.Set("chatTemplates", chatTemplates);
371
+
300
372
  details.Set("metadata", metadata);
373
+
374
+ // Deprecated: use chatTemplates.llamaChat instead
375
+ details.Set("isChatTemplateSupported", validateModelChatTemplate(_sess->model(), false, ""));
301
376
  return details;
302
377
  }
303
378
 
304
- // getFormattedChat(messages: [{ role: string, content: string }]): string
379
+ common_chat_params getFormattedChatWithJinja(
380
+ const struct llama_model * model,
381
+ const common_chat_templates &templates,
382
+ const std::string &messages,
383
+ const std::string &chat_template,
384
+ const std::string &json_schema,
385
+ const std::string &tools,
386
+ const bool &parallel_tool_calls,
387
+ const std::string &tool_choice
388
+ ) {
389
+ common_chat_inputs inputs;
390
+ inputs.messages = json::parse(messages);
391
+ auto useTools = !tools.empty();
392
+ if (useTools) {
393
+ inputs.tools = json::parse(tools);
394
+ }
395
+ inputs.parallel_tool_calls = parallel_tool_calls;
396
+ if (!tool_choice.empty()) {
397
+ inputs.tool_choice = tool_choice;
398
+ }
399
+ if (!json_schema.empty()) {
400
+ inputs.json_schema = json::parse(json_schema);
401
+ }
402
+ inputs.stream = true;
403
+
404
+ // If chat_template is provided, create new one and use it (probably slow)
405
+ if (!chat_template.empty()) {
406
+ auto tmp = common_chat_templates_from_model(model, chat_template);
407
+ const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
408
+ if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
409
+ inputs.parallel_tool_calls = false;
410
+ }
411
+ return common_chat_params_init(*template_ptr, inputs);
412
+ } else {
413
+ const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
414
+ if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
415
+ inputs.parallel_tool_calls = false;
416
+ }
417
+ return common_chat_params_init(*template_ptr, inputs);
418
+ }
419
+ }
420
+
421
+ std::string getFormattedChat(
422
+ const struct llama_model * model,
423
+ const common_chat_templates &templates,
424
+ const std::string &messages,
425
+ const std::string &chat_template
426
+ ) {
427
+ auto chat_json = json::parse(messages);
428
+
429
+ // Handle regular chat without tools
430
+ std::vector<common_chat_msg> chat_msgs;
431
+ for (const auto &msg : chat_json) {
432
+ chat_msgs.push_back({
433
+ msg["role"].get<std::string>(),
434
+ msg["content"].get<std::string>()
435
+ });
436
+ }
437
+
438
+ // If chat_template is provided, create new one and use it (probably slow)
439
+ if (!chat_template.empty()) {
440
+ auto tmp = common_chat_templates_from_model(model, chat_template);
441
+ return common_chat_apply_template(
442
+ *tmp.template_default,
443
+ chat_msgs,
444
+ true,
445
+ false
446
+ );
447
+ } else {
448
+ return common_chat_apply_template(
449
+ *templates.template_default,
450
+ chat_msgs,
451
+ true,
452
+ false
453
+ );
454
+ }
455
+ }
456
+
457
+ // getFormattedChat(
458
+ // messages: [{ role: string, content: string }],
459
+ // chat_template: string,
460
+ // params: { jinja: boolean, json_schema: string, tools: string, parallel_tool_calls: boolean, tool_choice: string }
461
+ // ): object | string
305
462
  Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
306
463
  Napi::Env env = info.Env();
307
464
  if (info.Length() < 1 || !info[0].IsArray()) {
308
465
  Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
309
466
  }
310
- auto messages = info[0].As<Napi::Array>();
311
- auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
312
- return Napi::String::New(env, formatted);
467
+ auto messages = json_stringify(info[0].As<Napi::Array>());
468
+ printf("messages: %s\n", messages.c_str());
469
+ auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
470
+
471
+ auto has_params = info.Length() >= 2;
472
+ auto params = has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
473
+
474
+ if (get_option<bool>(params, "jinja", false)) {
475
+ std::string json_schema_str = "";
476
+ if (!is_nil(params.Get("response_format"))) {
477
+ auto response_format = params.Get("response_format").As<Napi::Object>();
478
+ auto response_format_type = get_option<std::string>(response_format, "type", "text");
479
+ if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
480
+ auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
481
+ json_schema_str = json_schema.Has("schema") ?
482
+ json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
483
+ "{}";
484
+ } else if (response_format_type == "json_object") {
485
+ json_schema_str = response_format.Has("schema") ?
486
+ json_stringify(response_format.Get("schema").As<Napi::Object>()) :
487
+ "{}";
488
+ }
489
+ }
490
+ auto tools_str = params.Has("tools") ?
491
+ json_stringify(params.Get("tools").As<Napi::Array>()) :
492
+ "";
493
+ auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
494
+ auto tool_choice = get_option<std::string>(params, "tool_choice", "");
495
+
496
+ auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
497
+
498
+ Napi::Object result = Napi::Object::New(env);
499
+ result.Set("prompt", chatParams.prompt.get<std::string>());
500
+ // chat_format: int
501
+ result.Set("chat_format", static_cast<int>(chatParams.format));
502
+ // grammar: string
503
+ result.Set("grammar", chatParams.grammar);
504
+ // grammar_lazy: boolean
505
+ result.Set("grammea_lazy", chatParams.grammar_lazy);
506
+ // grammar_triggers: [{ word: string, at_start: boolean }]
507
+ Napi::Array grammar_triggers = Napi::Array::New(env);
508
+ for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
509
+ const auto & trigger = chatParams.grammar_triggers[i];
510
+ Napi::Object triggerObj = Napi::Object::New(env);
511
+ triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
512
+ triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
513
+ grammar_triggers.Set(i, triggerObj);
514
+ }
515
+ result.Set("grammar_triggers", grammar_triggers);
516
+ // preserved_tokens: string[]
517
+ Napi::Array preserved_tokens = Napi::Array::New(env);
518
+ for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
519
+ preserved_tokens.Set(i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
520
+ }
521
+ result.Set("preserved_tokens", preserved_tokens);
522
+ // additional_stops: string[]
523
+ Napi::Array additional_stops = Napi::Array::New(env);
524
+ for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
525
+ additional_stops.Set(i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
526
+ }
527
+ result.Set("additional_stops", additional_stops);
528
+
529
+ return result;
530
+ } else {
531
+ auto formatted = getFormattedChat(_sess->model(), _templates, messages, chat_template);
532
+ return Napi::String::New(env, formatted);
533
+ }
313
534
  }
314
535
 
315
536
  // completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
@@ -332,11 +553,101 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
332
553
  }
333
554
  auto options = info[0].As<Napi::Object>();
334
555
 
556
+ std::vector<std::string> stop_words;
557
+ if (options.Has("stop") && options.Get("stop").IsArray()) {
558
+ auto stop_words_array = options.Get("stop").As<Napi::Array>();
559
+ for (size_t i = 0; i < stop_words_array.Length(); i++) {
560
+ stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
561
+ }
562
+ }
563
+
564
+ int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
565
+
335
566
  common_params params = _sess->params();
567
+ auto grammar_from_params = get_option<std::string>(options, "grammar", "");
568
+ auto has_grammar_set = !grammar_from_params.empty();
569
+ if (has_grammar_set) {
570
+ params.sampling.grammar = grammar_from_params;
571
+ }
572
+
573
+ std::string json_schema_str = "";
574
+ if (options.Has("response_format")) {
575
+ auto response_format = options.Get("response_format").As<Napi::Object>();
576
+ auto response_format_type = get_option<std::string>(response_format, "type", "text");
577
+ if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
578
+ auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
579
+ json_schema_str = json_schema.Has("schema") ?
580
+ json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
581
+ "{}";
582
+ } else if (response_format_type == "json_object") {
583
+ json_schema_str = response_format.Has("schema") ?
584
+ json_stringify(response_format.Get("schema").As<Napi::Object>()) :
585
+ "{}";
586
+ }
587
+ }
588
+
336
589
  if (options.Has("messages") && options.Get("messages").IsArray()) {
337
590
  auto messages = options.Get("messages").As<Napi::Array>();
338
- auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
339
- params.prompt = formatted;
591
+ auto chat_template = get_option<std::string>(options, "chat_template", "");
592
+ auto jinja = get_option<bool>(options, "jinja", false);
593
+ if (jinja) {
594
+ auto tools_str = options.Has("tools") ?
595
+ json_stringify(options.Get("tools").As<Napi::Array>()) :
596
+ "";
597
+ auto parallel_tool_calls = get_option<bool>(options, "parallel_tool_calls", false);
598
+ auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
599
+
600
+ auto chatParams = getFormattedChatWithJinja(
601
+ _sess->model(),
602
+ _templates,
603
+ json_stringify(messages),
604
+ chat_template,
605
+ json_schema_str,
606
+ tools_str,
607
+ parallel_tool_calls,
608
+ tool_choice
609
+ );
610
+
611
+ params.prompt = chatParams.prompt.get<std::string>();
612
+
613
+ chat_format = chatParams.format;
614
+
615
+ if (!has_grammar_set) {
616
+ // grammar param always wins jinja template & json_schema
617
+ params.sampling.grammar = chatParams.grammar;
618
+ params.sampling.grammar_lazy = chatParams.grammar_lazy;
619
+
620
+ for (const auto & trigger : chatParams.grammar_triggers) {
621
+ auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
622
+ if (ids.size() == 1) {
623
+ params.sampling.grammar_trigger_tokens.push_back(ids[0]);
624
+ params.sampling.preserved_tokens.insert(ids[0]);
625
+ continue;
626
+ }
627
+ params.sampling.grammar_trigger_words.push_back(trigger);
628
+ }
629
+ has_grammar_set = true;
630
+ }
631
+
632
+ for (const auto & token : chatParams.preserved_tokens) {
633
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
634
+ if (ids.size() == 1) {
635
+ params.sampling.preserved_tokens.insert(ids[0]);
636
+ }
637
+ }
638
+
639
+ for (const auto & stop : chatParams.additional_stops) {
640
+ stop_words.push_back(stop);
641
+ }
642
+ } else {
643
+ auto formatted = getFormattedChat(
644
+ _sess->model(),
645
+ _templates,
646
+ json_stringify(messages),
647
+ chat_template
648
+ );
649
+ params.prompt = formatted;
650
+ }
340
651
  } else {
341
652
  params.prompt = get_option<std::string>(options, "prompt", "");
342
653
  }
@@ -344,6 +655,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
344
655
  Napi::TypeError::New(env, "Prompt is required")
345
656
  .ThrowAsJavaScriptException();
346
657
  }
658
+
659
+ if (!has_grammar_set && !json_schema_str.empty()) {
660
+ params.sampling.grammar = json_schema_to_grammar(json::parse(json_schema_str));
661
+ }
662
+
347
663
  params.n_predict = get_option<int32_t>(options, "n_predict", -1);
348
664
  params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
349
665
  params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
@@ -370,16 +686,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
370
686
  params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
371
687
  params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
372
688
  params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
373
- params.sampling.grammar = get_option<std::string>(options, "grammar", "");
374
689
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
375
690
  params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
376
- std::vector<std::string> stop_words;
377
- if (options.Has("stop") && options.Get("stop").IsArray()) {
378
- auto stop_words_array = options.Get("stop").As<Napi::Array>();
379
- for (size_t i = 0; i < stop_words_array.Length(); i++) {
380
- stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
381
- }
382
- }
383
691
 
384
692
  Napi::Function callback;
385
693
  if (info.Length() >= 2) {
@@ -387,7 +695,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
387
695
  }
388
696
 
389
697
  auto *worker =
390
- new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
698
+ new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
391
699
  worker->Queue();
392
700
  _wip = worker;
393
701
  worker->onComplete([this]() { _wip = nullptr; });
@@ -5,6 +5,7 @@ class LlamaCompletionWorker;
5
5
  class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
6
6
  public:
7
7
  LlamaContext(const Napi::CallbackInfo &info);
8
+ static void ToggleNativeLog(const Napi::CallbackInfo &info);
8
9
  static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
9
10
  static void Init(Napi::Env env, Napi::Object &exports);
10
11
 
@@ -27,6 +28,7 @@ private:
27
28
  std::string _info;
28
29
  Napi::Object _meta;
29
30
  LlamaSessionPtr _sess = nullptr;
31
+ common_chat_templates _templates;
30
32
  std::vector<common_adapter_lora_info> _lora;
31
33
  LlamaCompletionWorker *_wip = nullptr;
32
34
  };
package/src/common.hpp CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  #include "common/common.h"
4
4
  #include "common/sampling.h"
5
+ #include "chat.hpp"
6
+ #include "chat-template.hpp"
5
7
  #include "llama.h"
6
8
  #include <memory>
7
9
  #include <mutex>
@@ -15,11 +17,26 @@ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
15
17
  LlamaCppSampling;
16
18
  typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
17
19
 
20
+ static bool is_nil(const Napi::Value &value) {
21
+ return value.IsNull() || value.IsUndefined();
22
+ }
23
+
24
+ static std::string json_stringify(const Napi::Object &obj) {
25
+ Napi::Env env = obj.Env();
26
+ Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
27
+ Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
28
+ return stringify.Call(json, { obj }).As<Napi::String>().ToString();
29
+ }
30
+
31
+ static void console_log(Napi::Env env, const std::string& message) {
32
+ Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
33
+ consoleLog.Call({ Napi::String::New(env, message) });
34
+ }
35
+
18
36
  template <typename T>
19
37
  constexpr T get_option(const Napi::Object &options, const std::string &name,
20
38
  const T default_value) {
21
- if (options.Has(name) && !options.Get(name).IsUndefined() &&
22
- !options.Get(name).IsNull()) {
39
+ if (options.Has(name) && !is_nil(options.Get(name))) {
23
40
  if constexpr (std::is_same<T, std::string>::value) {
24
41
  return options.Get(name).ToString().operator T();
25
42
  } else if constexpr (std::is_same<T, int32_t>::value ||