@fugood/llama.node 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.js +2 -2
  18. package/lib/binding.ts +46 -8
  19. package/lib/index.ts +3 -1
  20. package/package.json +8 -1
  21. package/src/LlamaCompletionWorker.cpp +33 -6
  22. package/src/LlamaCompletionWorker.h +3 -1
  23. package/src/LlamaContext.cpp +292 -28
  24. package/src/LlamaContext.h +1 -0
  25. package/src/common.hpp +19 -2
  26. package/src/llama.cpp/.github/workflows/build.yml +289 -107
  27. package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
  28. package/src/llama.cpp/.github/workflows/docker.yml +2 -1
  29. package/src/llama.cpp/.github/workflows/server.yml +25 -2
  30. package/src/llama.cpp/CMakeLists.txt +10 -19
  31. package/src/llama.cpp/cmake/build-info.cmake +1 -1
  32. package/src/llama.cpp/common/CMakeLists.txt +32 -0
  33. package/src/llama.cpp/common/arg.cpp +66 -16
  34. package/src/llama.cpp/common/chat-template.hpp +515 -0
  35. package/src/llama.cpp/common/chat.cpp +966 -0
  36. package/src/llama.cpp/common/chat.hpp +52 -0
  37. package/src/llama.cpp/common/common.cpp +159 -36
  38. package/src/llama.cpp/common/common.h +56 -14
  39. package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
  40. package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
  41. package/src/llama.cpp/common/llguidance.cpp +270 -0
  42. package/src/llama.cpp/common/log.cpp +1 -10
  43. package/src/llama.cpp/common/log.h +10 -0
  44. package/src/llama.cpp/common/minja.hpp +2868 -0
  45. package/src/llama.cpp/common/sampling.cpp +22 -1
  46. package/src/llama.cpp/common/sampling.h +3 -0
  47. package/src/llama.cpp/docs/build.md +54 -9
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
  49. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
  50. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  51. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
  52. package/src/llama.cpp/examples/llava/clip.cpp +133 -14
  53. package/src/llama.cpp/examples/llava/clip.h +2 -0
  54. package/src/llama.cpp/examples/llava/llava.cpp +22 -8
  55. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
  56. package/src/llama.cpp/examples/main/main.cpp +26 -25
  57. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
  58. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
  59. package/src/llama.cpp/examples/run/run.cpp +224 -69
  60. package/src/llama.cpp/examples/server/server.cpp +252 -81
  61. package/src/llama.cpp/examples/server/utils.hpp +73 -21
  62. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
  63. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
  65. package/src/llama.cpp/ggml/include/ggml.h +1 -1
  66. package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
  67. package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
  68. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
  69. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
  71. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
  73. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
  74. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
  76. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
  77. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
  78. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
  79. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  81. package/src/llama.cpp/ggml/src/ggml.c +23 -13
  82. package/src/llama.cpp/include/llama.h +14 -1
  83. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  84. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  85. package/src/llama.cpp/src/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/src/llama-arch.cpp +7 -2
  87. package/src/llama.cpp/src/llama-arch.h +3 -1
  88. package/src/llama.cpp/src/llama-chat.cpp +11 -2
  89. package/src/llama.cpp/src/llama-chat.h +1 -0
  90. package/src/llama.cpp/src/llama-grammar.cpp +86 -6
  91. package/src/llama.cpp/src/llama-grammar.h +22 -1
  92. package/src/llama.cpp/src/llama-mmap.cpp +1 -0
  93. package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
  94. package/src/llama.cpp/src/llama-model.cpp +76 -6
  95. package/src/llama.cpp/src/llama-sampling.cpp +47 -4
  96. package/src/llama.cpp/src/llama-vocab.cpp +10 -4
  97. package/src/llama.cpp/src/llama.cpp +181 -123
  98. package/src/llama.cpp/tests/CMakeLists.txt +4 -0
  99. package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
  100. package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
  101. package/src/llama.cpp/tests/test-chat.cpp +607 -0
  102. package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
  103. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
  104. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
  105. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
@@ -1,6 +1,8 @@
1
1
  #include "ggml.h"
2
2
  #include "gguf.h"
3
3
  #include "llama-impl.h"
4
+ #include "json.hpp"
5
+ #include "json-schema-to-grammar.h"
4
6
  #include "LlamaContext.h"
5
7
  #include "DetokenizeWorker.h"
6
8
  #include "DisposeWorker.h"
@@ -10,6 +12,8 @@
10
12
  #include "SaveSessionWorker.h"
11
13
  #include "TokenizeWorker.h"
12
14
 
15
+ using json = nlohmann::ordered_json;
16
+
13
17
  // loadModelInfo(path: string): object
14
18
  Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
15
19
  Napi::Env env = info.Env();
@@ -176,6 +180,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
176
180
  params.warmup = false;
177
181
  }
178
182
 
183
+ params.chat_template = get_option<std::string>(options, "chat_template", "");
184
+
179
185
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
180
186
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
181
187
  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
@@ -255,6 +261,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
255
261
 
256
262
  _sess = sess;
257
263
  _info = common_params_get_system_info(params);
264
+
265
+ _templates = common_chat_templates_from_model(model, params.chat_template);
258
266
  }
259
267
 
260
268
  // getSystemInfo(): string
@@ -262,17 +270,12 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
262
270
  return Napi::String::New(info.Env(), _info);
263
271
  }
264
272
 
265
- bool validateModelChatTemplate(const struct llama_model * model) {
266
- std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
267
- std::string template_key = "tokenizer.chat_template";
268
- int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
269
- if (res >= 0) {
270
- llama_chat_message chat[] = {{"user", "test"}};
271
- const char * tmpl = llama_model_chat_template(model);
272
- int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
273
- return chat_res > 0;
274
- }
275
- return res > 0;
273
+ bool validateModelChatTemplate(const struct llama_model * model, const bool use_jinja, const char * name) {
274
+ const char * tmpl = llama_model_chat_template(model, name);
275
+ if (tmpl == nullptr) {
276
+ return false;
277
+ }
278
+ return common_chat_verify_template(tmpl, use_jinja);
276
279
  }
277
280
 
278
281
  // getModelInfo(): object
@@ -286,7 +289,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
286
289
  for (int i = 0; i < count; i++) {
287
290
  char key[256];
288
291
  llama_model_meta_key_by_index(model, i, key, sizeof(key));
289
- char val[2048];
292
+ char val[4096];
290
293
  llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
291
294
 
292
295
  metadata.Set(key, val);
@@ -296,20 +299,194 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
296
299
  details.Set("nEmbd", llama_model_n_embd(model));
297
300
  details.Set("nParams", llama_model_n_params(model));
298
301
  details.Set("size", llama_model_size(model));
299
- details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
302
+
303
+ Napi::Object chatTemplates = Napi::Object::New(info.Env());
304
+ chatTemplates.Set("llamaChat", validateModelChatTemplate(model, false, ""));
305
+ Napi::Object minja = Napi::Object::New(info.Env());
306
+ minja.Set("default", validateModelChatTemplate(model, true, ""));
307
+ Napi::Object defaultCaps = Napi::Object::New(info.Env());
308
+ defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
309
+ defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
310
+ defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
311
+ defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
312
+ defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
313
+ defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
314
+ minja.Set("defaultCaps", defaultCaps);
315
+ Napi::Object toolUse = Napi::Object::New(info.Env());
316
+ toolUse.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
317
+ if (_templates.template_tool_use) {
318
+ Napi::Object toolUseCaps = Napi::Object::New(info.Env());
319
+ toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
320
+ toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
321
+ toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
322
+ toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
323
+ toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
324
+ toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
325
+ toolUse.Set("toolUseCaps", toolUseCaps);
326
+ }
327
+ minja.Set("toolUse", toolUse);
328
+ chatTemplates.Set("minja", minja);
329
+ details.Set("chatTemplates", chatTemplates);
330
+
300
331
  details.Set("metadata", metadata);
301
332
  return details;
302
333
  }
303
334
 
304
- // getFormattedChat(messages: [{ role: string, content: string }]): string
335
+ common_chat_params getFormattedChatWithJinja(
336
+ const struct llama_model * model,
337
+ const common_chat_templates &templates,
338
+ const std::string &messages,
339
+ const std::string &chat_template,
340
+ const std::string &json_schema,
341
+ const std::string &tools,
342
+ const bool &parallel_tool_calls,
343
+ const std::string &tool_choice
344
+ ) {
345
+ common_chat_inputs inputs;
346
+ inputs.messages = json::parse(messages);
347
+ auto useTools = !tools.empty();
348
+ if (useTools) {
349
+ inputs.tools = json::parse(tools);
350
+ }
351
+ inputs.parallel_tool_calls = parallel_tool_calls;
352
+ if (!tool_choice.empty()) {
353
+ inputs.tool_choice = tool_choice;
354
+ }
355
+ if (!json_schema.empty()) {
356
+ inputs.json_schema = json::parse(json_schema);
357
+ }
358
+ inputs.stream = true;
359
+
360
+ // If chat_template is provided, create new one and use it (probably slow)
361
+ if (!chat_template.empty()) {
362
+ auto tmp = common_chat_templates_from_model(model, chat_template);
363
+ const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
364
+ if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
365
+ inputs.parallel_tool_calls = false;
366
+ }
367
+ return common_chat_params_init(*template_ptr, inputs);
368
+ } else {
369
+ const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
370
+ if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
371
+ inputs.parallel_tool_calls = false;
372
+ }
373
+ return common_chat_params_init(*template_ptr, inputs);
374
+ }
375
+ }
376
+
377
+ std::string getFormattedChat(
378
+ const struct llama_model * model,
379
+ const common_chat_templates &templates,
380
+ const std::string &messages,
381
+ const std::string &chat_template
382
+ ) {
383
+ auto chat_json = json::parse(messages);
384
+
385
+ // Handle regular chat without tools
386
+ std::vector<common_chat_msg> chat_msgs;
387
+ for (const auto &msg : chat_json) {
388
+ chat_msgs.push_back({
389
+ msg["role"].get<std::string>(),
390
+ msg["content"].get<std::string>()
391
+ });
392
+ }
393
+
394
+ // If chat_template is provided, create new one and use it (probably slow)
395
+ if (!chat_template.empty()) {
396
+ auto tmp = common_chat_templates_from_model(model, chat_template);
397
+ return common_chat_apply_template(
398
+ *tmp.template_default,
399
+ chat_msgs,
400
+ true,
401
+ false
402
+ );
403
+ } else {
404
+ return common_chat_apply_template(
405
+ *templates.template_default,
406
+ chat_msgs,
407
+ true,
408
+ false
409
+ );
410
+ }
411
+ }
412
+
413
+ // getFormattedChat(
414
+ // messages: [{ role: string, content: string }],
415
+ // chat_template: string,
416
+ // params: { jinja: boolean, json_schema: string, tools: string, parallel_tool_calls: boolean, tool_choice: string }
417
+ // ): object | string
305
418
  Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
306
419
  Napi::Env env = info.Env();
307
420
  if (info.Length() < 1 || !info[0].IsArray()) {
308
421
  Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
309
422
  }
310
- auto messages = info[0].As<Napi::Array>();
311
- auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
312
- return Napi::String::New(env, formatted);
423
+ auto messages = json_stringify(info[0].As<Napi::Array>());
424
+ printf("messages: %s\n", messages.c_str());
425
+ auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
426
+
427
+ auto has_params = info.Length() >= 2;
428
+ auto params = has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
429
+
430
+ if (get_option<bool>(params, "jinja", false)) {
431
+ std::string json_schema_str = "";
432
+ if (!is_nil(params.Get("response_format"))) {
433
+ auto response_format = params.Get("response_format").As<Napi::Object>();
434
+ auto response_format_type = get_option<std::string>(response_format, "type", "text");
435
+ if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
436
+ auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
437
+ json_schema_str = json_schema.Has("schema") ?
438
+ json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
439
+ "{}";
440
+ } else if (response_format_type == "json_object") {
441
+ json_schema_str = response_format.Has("schema") ?
442
+ json_stringify(response_format.Get("schema").As<Napi::Object>()) :
443
+ "{}";
444
+ }
445
+ }
446
+ auto tools_str = params.Has("tools") ?
447
+ json_stringify(params.Get("tools").As<Napi::Array>()) :
448
+ "";
449
+ auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
450
+ auto tool_choice = get_option<std::string>(params, "tool_choice", "");
451
+
452
+ auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
453
+
454
+ Napi::Object result = Napi::Object::New(env);
455
+ result.Set("prompt", chatParams.prompt.get<std::string>());
456
+ // chat_format: int
457
+ result.Set("chat_format", static_cast<int>(chatParams.format));
458
+ // grammar: string
459
+ result.Set("grammar", chatParams.grammar);
460
+ // grammar_lazy: boolean
461
+ result.Set("grammea_lazy", chatParams.grammar_lazy);
462
+ // grammar_triggers: [{ word: string, at_start: boolean }]
463
+ Napi::Array grammar_triggers = Napi::Array::New(env);
464
+ for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
465
+ const auto & trigger = chatParams.grammar_triggers[i];
466
+ Napi::Object triggerObj = Napi::Object::New(env);
467
+ triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
468
+ triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
469
+ grammar_triggers.Set(i, triggerObj);
470
+ }
471
+ result.Set("grammar_triggers", grammar_triggers);
472
+ // preserved_tokens: string[]
473
+ Napi::Array preserved_tokens = Napi::Array::New(env);
474
+ for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
475
+ preserved_tokens.Set(i, Napi::String::New(env, chatParams.preserved_tokens[i].c_str()));
476
+ }
477
+ result.Set("preserved_tokens", preserved_tokens);
478
+ // additional_stops: string[]
479
+ Napi::Array additional_stops = Napi::Array::New(env);
480
+ for (size_t i = 0; i < chatParams.additional_stops.size(); i++) {
481
+ additional_stops.Set(i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
482
+ }
483
+ result.Set("additional_stops", additional_stops);
484
+
485
+ return result;
486
+ } else {
487
+ auto formatted = getFormattedChat(_sess->model(), _templates, messages, chat_template);
488
+ return Napi::String::New(env, formatted);
489
+ }
313
490
  }
314
491
 
315
492
  // completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
@@ -332,11 +509,101 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
332
509
  }
333
510
  auto options = info[0].As<Napi::Object>();
334
511
 
512
+ std::vector<std::string> stop_words;
513
+ if (options.Has("stop") && options.Get("stop").IsArray()) {
514
+ auto stop_words_array = options.Get("stop").As<Napi::Array>();
515
+ for (size_t i = 0; i < stop_words_array.Length(); i++) {
516
+ stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
517
+ }
518
+ }
519
+
520
+ int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
521
+
335
522
  common_params params = _sess->params();
523
+ auto grammar_from_params = get_option<std::string>(options, "grammar", "");
524
+ auto has_grammar_set = !grammar_from_params.empty();
525
+ if (has_grammar_set) {
526
+ params.sampling.grammar = grammar_from_params;
527
+ }
528
+
529
+ std::string json_schema_str = "";
530
+ if (options.Has("response_format")) {
531
+ auto response_format = options.Get("response_format").As<Napi::Object>();
532
+ auto response_format_type = get_option<std::string>(response_format, "type", "text");
533
+ if (response_format_type == "json_schema" && response_format.Has("json_schema")) {
534
+ auto json_schema = response_format.Get("json_schema").As<Napi::Object>();
535
+ json_schema_str = json_schema.Has("schema") ?
536
+ json_stringify(json_schema.Get("schema").As<Napi::Object>()) :
537
+ "{}";
538
+ } else if (response_format_type == "json_object") {
539
+ json_schema_str = response_format.Has("schema") ?
540
+ json_stringify(response_format.Get("schema").As<Napi::Object>()) :
541
+ "{}";
542
+ }
543
+ }
544
+
336
545
  if (options.Has("messages") && options.Get("messages").IsArray()) {
337
546
  auto messages = options.Get("messages").As<Napi::Array>();
338
- auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
339
- params.prompt = formatted;
547
+ auto chat_template = get_option<std::string>(options, "chat_template", "");
548
+ auto jinja = get_option<bool>(options, "jinja", false);
549
+ if (jinja) {
550
+ auto tools_str = options.Has("tools") ?
551
+ json_stringify(options.Get("tools").As<Napi::Array>()) :
552
+ "";
553
+ auto parallel_tool_calls = get_option<bool>(options, "parallel_tool_calls", false);
554
+ auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
555
+
556
+ auto chatParams = getFormattedChatWithJinja(
557
+ _sess->model(),
558
+ _templates,
559
+ json_stringify(messages),
560
+ chat_template,
561
+ json_schema_str,
562
+ tools_str,
563
+ parallel_tool_calls,
564
+ tool_choice
565
+ );
566
+
567
+ params.prompt = chatParams.prompt.get<std::string>();
568
+
569
+ chat_format = chatParams.format;
570
+
571
+ if (!has_grammar_set) {
572
+ // grammar param always wins jinja template & json_schema
573
+ params.sampling.grammar = chatParams.grammar;
574
+ params.sampling.grammar_lazy = chatParams.grammar_lazy;
575
+
576
+ for (const auto & trigger : chatParams.grammar_triggers) {
577
+ auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
578
+ if (ids.size() == 1) {
579
+ params.sampling.grammar_trigger_tokens.push_back(ids[0]);
580
+ params.sampling.preserved_tokens.insert(ids[0]);
581
+ continue;
582
+ }
583
+ params.sampling.grammar_trigger_words.push_back(trigger);
584
+ }
585
+ has_grammar_set = true;
586
+ }
587
+
588
+ for (const auto & token : chatParams.preserved_tokens) {
589
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
590
+ if (ids.size() == 1) {
591
+ params.sampling.preserved_tokens.insert(ids[0]);
592
+ }
593
+ }
594
+
595
+ for (const auto & stop : chatParams.additional_stops) {
596
+ stop_words.push_back(stop);
597
+ }
598
+ } else {
599
+ auto formatted = getFormattedChat(
600
+ _sess->model(),
601
+ _templates,
602
+ json_stringify(messages),
603
+ chat_template
604
+ );
605
+ params.prompt = formatted;
606
+ }
340
607
  } else {
341
608
  params.prompt = get_option<std::string>(options, "prompt", "");
342
609
  }
@@ -344,6 +611,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
344
611
  Napi::TypeError::New(env, "Prompt is required")
345
612
  .ThrowAsJavaScriptException();
346
613
  }
614
+
615
+ if (!has_grammar_set && !json_schema_str.empty()) {
616
+ params.sampling.grammar = json_schema_to_grammar(json::parse(json_schema_str));
617
+ }
618
+
347
619
  params.n_predict = get_option<int32_t>(options, "n_predict", -1);
348
620
  params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
349
621
  params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
@@ -370,16 +642,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
370
642
  params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
371
643
  params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
372
644
  params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
373
- params.sampling.grammar = get_option<std::string>(options, "grammar", "");
374
645
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
375
646
  params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
376
- std::vector<std::string> stop_words;
377
- if (options.Has("stop") && options.Get("stop").IsArray()) {
378
- auto stop_words_array = options.Get("stop").As<Napi::Array>();
379
- for (size_t i = 0; i < stop_words_array.Length(); i++) {
380
- stop_words.push_back(stop_words_array.Get(i).ToString().Utf8Value());
381
- }
382
- }
383
647
 
384
648
  Napi::Function callback;
385
649
  if (info.Length() >= 2) {
@@ -387,7 +651,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
387
651
  }
388
652
 
389
653
  auto *worker =
390
- new LlamaCompletionWorker(info, _sess, callback, params, stop_words);
654
+ new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
391
655
  worker->Queue();
392
656
  _wip = worker;
393
657
  worker->onComplete([this]() { _wip = nullptr; });
@@ -27,6 +27,7 @@ private:
27
27
  std::string _info;
28
28
  Napi::Object _meta;
29
29
  LlamaSessionPtr _sess = nullptr;
30
+ common_chat_templates _templates;
30
31
  std::vector<common_adapter_lora_info> _lora;
31
32
  LlamaCompletionWorker *_wip = nullptr;
32
33
  };
package/src/common.hpp CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  #include "common/common.h"
4
4
  #include "common/sampling.h"
5
+ #include "chat.hpp"
6
+ #include "chat-template.hpp"
5
7
  #include "llama.h"
6
8
  #include <memory>
7
9
  #include <mutex>
@@ -15,11 +17,26 @@ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
15
17
  LlamaCppSampling;
16
18
  typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
17
19
 
20
+ static bool is_nil(const Napi::Value &value) {
21
+ return value.IsNull() || value.IsUndefined();
22
+ }
23
+
24
+ static std::string json_stringify(const Napi::Object &obj) {
25
+ Napi::Env env = obj.Env();
26
+ Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
27
+ Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
28
+ return stringify.Call(json, { obj }).As<Napi::String>().ToString();
29
+ }
30
+
31
+ static void console_log(Napi::Env env, const std::string& message) {
32
+ Napi::Function consoleLog = env.Global().Get("console").As<Napi::Object>().Get("log").As<Napi::Function>();
33
+ consoleLog.Call({ Napi::String::New(env, message) });
34
+ }
35
+
18
36
  template <typename T>
19
37
  constexpr T get_option(const Napi::Object &options, const std::string &name,
20
38
  const T default_value) {
21
- if (options.Has(name) && !options.Get(name).IsUndefined() &&
22
- !options.Get(name).IsNull()) {
39
+ if (options.Has(name) && !is_nil(options.Get(name))) {
23
40
  if constexpr (std::is_same<T, std::string>::value) {
24
41
  return options.Get(name).ToString().operator T();
25
42
  } else if constexpr (std::is_same<T, int32_t>::value ||