@fugood/llama.node 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +2 -1
  18. package/package.json +1 -1
  19. package/src/LlamaCompletionWorker.cpp +14 -0
  20. package/src/LlamaContext.cpp +110 -79
  21. package/src/LlamaContext.h +1 -1
  22. package/src/common.hpp +1 -2
  23. package/src/llama.cpp/.github/workflows/build.yml +95 -13
  24. package/src/llama.cpp/.github/workflows/docker.yml +2 -0
  25. package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  27. package/src/llama.cpp/common/CMakeLists.txt +23 -6
  28. package/src/llama.cpp/common/arg.cpp +292 -14
  29. package/src/llama.cpp/common/chat.cpp +1128 -315
  30. package/src/llama.cpp/common/chat.h +135 -0
  31. package/src/llama.cpp/common/common.cpp +27 -171
  32. package/src/llama.cpp/common/common.h +41 -73
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  34. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  35. package/src/llama.cpp/common/llguidance.cpp +3 -3
  36. package/src/llama.cpp/common/log.cpp +1 -0
  37. package/src/llama.cpp/common/log.h +2 -1
  38. package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +21 -7
  39. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +61 -14
  40. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  41. package/src/llama.cpp/common/sampling.cpp +93 -49
  42. package/src/llama.cpp/common/speculative.cpp +6 -5
  43. package/src/llama.cpp/common/speculative.h +1 -1
  44. package/src/llama.cpp/docs/build.md +47 -9
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
  47. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  48. package/src/llama.cpp/examples/imatrix/imatrix.cpp +4 -4
  49. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
  50. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
  52. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  53. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  54. package/src/llama.cpp/examples/llava/clip.h +19 -3
  55. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  56. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  57. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  58. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
  59. package/src/llama.cpp/examples/main/main.cpp +73 -28
  60. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
  61. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
  62. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
  63. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  64. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  65. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  66. package/src/llama.cpp/examples/run/run.cpp +115 -79
  67. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/server/httplib.h +381 -292
  69. package/src/llama.cpp/examples/server/server.cpp +134 -128
  70. package/src/llama.cpp/examples/server/utils.hpp +95 -106
  71. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  72. package/src/llama.cpp/examples/tts/tts.cpp +251 -142
  73. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  74. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  75. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  76. package/src/llama.cpp/ggml/include/ggml-cpu.h +4 -1
  77. package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
  78. package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
  79. package/src/llama.cpp/ggml/include/ggml.h +6 -2
  80. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  81. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  82. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  83. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  84. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  85. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
  86. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  87. package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
  88. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
  89. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  90. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +156 -11
  93. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +2235 -641
  94. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1572 -198
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +24 -5
  96. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  97. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
  101. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +16 -3
  102. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  103. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  104. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  105. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
  106. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
  107. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +246 -120
  109. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  110. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  111. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
  112. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  113. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  114. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
  115. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  116. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  117. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  120. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
  121. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  123. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +174 -728
  124. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
  125. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
  126. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  128. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +949 -602
  129. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +37 -3
  130. package/src/llama.cpp/ggml/src/ggml.c +9 -4
  131. package/src/llama.cpp/include/llama.h +32 -14
  132. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  133. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  134. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  135. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  136. package/src/llama.cpp/requirements.txt +1 -0
  137. package/src/llama.cpp/src/llama-arch.cpp +21 -0
  138. package/src/llama.cpp/src/llama-arch.h +1 -0
  139. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  140. package/src/llama.cpp/src/llama-grammar.cpp +183 -183
  141. package/src/llama.cpp/src/llama-grammar.h +13 -4
  142. package/src/llama.cpp/src/llama-impl.h +6 -6
  143. package/src/llama.cpp/src/llama-kv-cache.h +2 -1
  144. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  145. package/src/llama.cpp/src/llama-mmap.h +1 -0
  146. package/src/llama.cpp/src/llama-model.cpp +70 -6
  147. package/src/llama.cpp/src/llama-sampling.cpp +174 -67
  148. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  149. package/src/llama.cpp/src/llama.cpp +154 -5
  150. package/src/llama.cpp/src/unicode.cpp +9 -2
  151. package/src/llama.cpp/tests/test-backend-ops.cpp +171 -115
  152. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  153. package/src/llama.cpp/tests/test-chat.cpp +691 -325
  154. package/src/llama.cpp/tests/test-gguf.cpp +4 -4
  155. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  156. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  157. package/src/llama.cpp/tests/test-sampling.cpp +15 -0
  158. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  159. package/src/llama.cpp/common/chat.hpp +0 -52
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -8,6 +8,7 @@ export type ChatMessage = {
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
10
  chat_template?: string
11
+ reasoning_format?: string
11
12
  embedding?: boolean
12
13
  embd_normalize?: number
13
14
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
@@ -86,7 +87,7 @@ export type LlamaCompletionOptions = {
86
87
  stop?: string[]
87
88
  grammar?: string
88
89
  grammar_lazy?: boolean
89
- grammar_triggers?: { word: string; at_start: boolean }[]
90
+ grammar_triggers?: { type: number; word: string; at_start: boolean }[]
90
91
  preserved_tokens?: string[]
91
92
  }
92
93
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.12",
4
+ "version": "0.3.14",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -165,9 +165,17 @@ void LlamaCompletionWorker::OnOK() {
165
165
  Napi::String::New(env, _result.text.c_str()));
166
166
 
167
167
  Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
168
+ std::string * reasoning_content = nullptr;
169
+ std::string * content = nullptr;
168
170
  if (!_stop) {
169
171
  try {
170
172
  common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
173
+ if (!message.reasoning_content.empty()) {
174
+ reasoning_content = &message.reasoning_content;
175
+ }
176
+ if (!message.content.empty()) {
177
+ content = &message.content;
178
+ }
171
179
  for (size_t i = 0; i < message.tool_calls.size(); i++) {
172
180
  const auto &tc = message.tool_calls[i];
173
181
  Napi::Object tool_call = Napi::Object::New(env);
@@ -188,6 +196,12 @@ void LlamaCompletionWorker::OnOK() {
188
196
  if (tool_calls.Length() > 0) {
189
197
  result.Set("tool_calls", tool_calls);
190
198
  }
199
+ if (reasoning_content) {
200
+ result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
201
+ }
202
+ if (content) {
203
+ result.Set("content", Napi::String::New(env, content->c_str()));
204
+ }
191
205
 
192
206
  auto ctx = _sess->context();
193
207
  const auto timings_token = llama_perf_context(ctx);
@@ -185,6 +185,13 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
185
185
 
186
186
  params.chat_template = get_option<std::string>(options, "chat_template", "");
187
187
 
188
+ std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
189
+ if (reasoning_format == "deepseek") {
190
+ params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
191
+ } else {
192
+ params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
193
+ }
194
+
188
195
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
189
196
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
190
197
  params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
@@ -265,7 +272,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
265
272
  _sess = sess;
266
273
  _info = common_params_get_system_info(params);
267
274
 
268
- _templates = common_chat_templates_from_model(model, params.chat_template);
275
+ _templates = common_chat_templates_init(model, params.chat_template);
269
276
  }
270
277
 
271
278
  // getSystemInfo(): string
@@ -348,22 +355,22 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
348
355
  Napi::Object minja = Napi::Object::New(info.Env());
349
356
  minja.Set("default", validateModelChatTemplate(model, true, ""));
350
357
  Napi::Object defaultCaps = Napi::Object::New(info.Env());
351
- defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
352
- defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
353
- defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
354
- defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
355
- defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
356
- defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
358
+ defaultCaps.Set("tools", _templates.get()->template_default->original_caps().supports_tools);
359
+ defaultCaps.Set("toolCalls", _templates.get()->template_default->original_caps().supports_tool_calls);
360
+ defaultCaps.Set("toolResponses", _templates.get()->template_default->original_caps().supports_tool_responses);
361
+ defaultCaps.Set("systemRole", _templates.get()->template_default->original_caps().supports_system_role);
362
+ defaultCaps.Set("parallelToolCalls", _templates.get()->template_default->original_caps().supports_parallel_tool_calls);
363
+ defaultCaps.Set("toolCallId", _templates.get()->template_default->original_caps().supports_tool_call_id);
357
364
  minja.Set("defaultCaps", defaultCaps);
358
365
  minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
359
- if (_templates.template_tool_use) {
366
+ if (_templates.get()->template_tool_use) {
360
367
  Napi::Object toolUseCaps = Napi::Object::New(info.Env());
361
- toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
362
- toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
363
- toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
364
- toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
365
- toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
366
- toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
368
+ toolUseCaps.Set("tools", _templates.get()->template_tool_use->original_caps().supports_tools);
369
+ toolUseCaps.Set("toolCalls", _templates.get()->template_tool_use->original_caps().supports_tool_calls);
370
+ toolUseCaps.Set("toolResponses", _templates.get()->template_tool_use->original_caps().supports_tool_responses);
371
+ toolUseCaps.Set("systemRole", _templates.get()->template_tool_use->original_caps().supports_system_role);
372
+ toolUseCaps.Set("parallelToolCalls", _templates.get()->template_tool_use->original_caps().supports_parallel_tool_calls);
373
+ toolUseCaps.Set("toolCallId", _templates.get()->template_tool_use->original_caps().supports_tool_call_id);
367
374
  minja.Set("toolUseCaps", toolUseCaps);
368
375
  }
369
376
  chatTemplates.Set("minja", minja);
@@ -377,8 +384,8 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
377
384
  }
378
385
 
379
386
  common_chat_params getFormattedChatWithJinja(
380
- const struct llama_model * model,
381
- const common_chat_templates &templates,
387
+ const std::shared_ptr<LlamaSession> &sess,
388
+ const common_chat_templates_ptr &templates,
382
389
  const std::string &messages,
383
390
  const std::string &chat_template,
384
391
  const std::string &json_schema,
@@ -386,71 +393,46 @@ common_chat_params getFormattedChatWithJinja(
386
393
  const bool &parallel_tool_calls,
387
394
  const std::string &tool_choice
388
395
  ) {
389
- common_chat_inputs inputs;
390
- inputs.messages = json::parse(messages);
396
+ common_chat_templates_inputs inputs;
397
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
391
398
  auto useTools = !tools.empty();
392
399
  if (useTools) {
393
- inputs.tools = json::parse(tools);
400
+ inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
394
401
  }
395
402
  inputs.parallel_tool_calls = parallel_tool_calls;
396
403
  if (!tool_choice.empty()) {
397
- inputs.tool_choice = tool_choice;
404
+ inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
398
405
  }
399
406
  if (!json_schema.empty()) {
400
- inputs.json_schema = json::parse(json_schema);
407
+ inputs.json_schema = json::parse(json_schema);
401
408
  }
402
- inputs.stream = true;
409
+ inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
403
410
 
404
411
  // If chat_template is provided, create new one and use it (probably slow)
405
412
  if (!chat_template.empty()) {
406
- auto tmp = common_chat_templates_from_model(model, chat_template);
407
- const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
408
- if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
409
- inputs.parallel_tool_calls = false;
410
- }
411
- return common_chat_params_init(*template_ptr, inputs);
413
+ auto tmps = common_chat_templates_init(sess->model(), chat_template);
414
+ return common_chat_templates_apply(tmps.get(), inputs);
412
415
  } else {
413
- const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
414
- if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
415
- inputs.parallel_tool_calls = false;
416
- }
417
- return common_chat_params_init(*template_ptr, inputs);
416
+ return common_chat_templates_apply(templates.get(), inputs);
418
417
  }
419
418
  }
420
419
 
421
420
  std::string getFormattedChat(
422
421
  const struct llama_model * model,
423
- const common_chat_templates &templates,
422
+ const common_chat_templates_ptr &templates,
424
423
  const std::string &messages,
425
424
  const std::string &chat_template
426
425
  ) {
427
- auto chat_json = json::parse(messages);
428
-
429
- // Handle regular chat without tools
430
- std::vector<common_chat_msg> chat_msgs;
431
- for (const auto &msg : chat_json) {
432
- chat_msgs.push_back({
433
- msg["role"].get<std::string>(),
434
- msg["content"].get<std::string>()
435
- });
436
- }
426
+ common_chat_templates_inputs inputs;
427
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
428
+ inputs.use_jinja = false;
437
429
 
438
430
  // If chat_template is provided, create new one and use it (probably slow)
439
431
  if (!chat_template.empty()) {
440
- auto tmp = common_chat_templates_from_model(model, chat_template);
441
- return common_chat_apply_template(
442
- *tmp.template_default,
443
- chat_msgs,
444
- true,
445
- false
446
- );
432
+ auto tmps = common_chat_templates_init(model, chat_template);
433
+ return common_chat_templates_apply(tmps.get(), inputs).prompt;
447
434
  } else {
448
- return common_chat_apply_template(
449
- *templates.template_default,
450
- chat_msgs,
451
- true,
452
- false
453
- );
435
+ return common_chat_templates_apply(templates.get(), inputs).prompt;
454
436
  }
455
437
  }
456
438
 
@@ -493,23 +475,24 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
493
475
  auto parallel_tool_calls = get_option<bool>(params, "parallel_tool_calls", false);
494
476
  auto tool_choice = get_option<std::string>(params, "tool_choice", "");
495
477
 
496
- auto chatParams = getFormattedChatWithJinja(_sess->model(), _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
478
+ auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
497
479
 
498
480
  Napi::Object result = Napi::Object::New(env);
499
- result.Set("prompt", chatParams.prompt.get<std::string>());
481
+ result.Set("prompt", chatParams.prompt);
500
482
  // chat_format: int
501
483
  result.Set("chat_format", static_cast<int>(chatParams.format));
502
484
  // grammar: string
503
485
  result.Set("grammar", chatParams.grammar);
504
486
  // grammar_lazy: boolean
505
487
  result.Set("grammea_lazy", chatParams.grammar_lazy);
506
- // grammar_triggers: [{ word: string, at_start: boolean }]
488
+ // grammar_triggers: [{ value: string, token: number }]
507
489
  Napi::Array grammar_triggers = Napi::Array::New(env);
508
490
  for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
509
491
  const auto & trigger = chatParams.grammar_triggers[i];
510
492
  Napi::Object triggerObj = Napi::Object::New(env);
511
- triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
512
- triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
493
+ triggerObj.Set("type", Napi::Number::New(env, trigger.type));
494
+ triggerObj.Set("value", Napi::String::New(env, trigger.value));
495
+ triggerObj.Set("token", Napi::Number::New(env, trigger.token));
513
496
  grammar_triggers.Set(i, triggerObj);
514
497
  }
515
498
  result.Set("grammar_triggers", grammar_triggers);
@@ -586,6 +569,60 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
586
569
  }
587
570
  }
588
571
 
572
+ // Handle preserved_tokens from options
573
+ if (options.Has("preserved_tokens")) {
574
+ auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
575
+ for (size_t i = 0; i < preserved_tokens.Length(); i++) {
576
+ auto token = preserved_tokens.Get(i).ToString().Utf8Value();
577
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
578
+ if (ids.size() == 1) {
579
+ params.sampling.preserved_tokens.insert(ids[0]);
580
+ }
581
+ }
582
+ }
583
+
584
+ // Handle grammar_triggers from options
585
+ if (options.Has("grammar_triggers")) {
586
+ auto grammar_triggers = options.Get("grammar_triggers").As<Napi::Array>();
587
+ for (size_t i = 0; i < grammar_triggers.Length(); i++) {
588
+ auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
589
+
590
+ auto type = static_cast<common_grammar_trigger_type>(trigger_obj.Get("type").ToNumber().Int32Value());
591
+ auto word = trigger_obj.Get("value").ToString().Utf8Value();
592
+
593
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
594
+ auto ids = common_tokenize(_sess->context(), word, /* add_special= */ false, /* parse_special= */ true);
595
+ if (ids.size() == 1) {
596
+ auto token = ids[0];
597
+ if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
598
+ throw std::runtime_error("Grammar trigger word should be marked as preserved token");
599
+ }
600
+ common_grammar_trigger trigger;
601
+ trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
602
+ trigger.value = word;
603
+ trigger.token = token;
604
+ params.sampling.grammar_triggers.push_back(std::move(trigger));
605
+ } else {
606
+ params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
607
+ }
608
+ } else {
609
+ common_grammar_trigger trigger;
610
+ trigger.type = type;
611
+ trigger.value = word;
612
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
613
+ auto token = (llama_token) trigger_obj.Get("token").ToNumber().Int32Value();
614
+ trigger.token = token;
615
+ }
616
+ params.sampling.grammar_triggers.push_back(std::move(trigger));
617
+ }
618
+ }
619
+ }
620
+
621
+ // Handle grammar_lazy from options
622
+ if (options.Has("grammar_lazy")) {
623
+ params.sampling.grammar_lazy = options.Get("grammar_lazy").ToBoolean().Value();
624
+ }
625
+
589
626
  if (options.Has("messages") && options.Get("messages").IsArray()) {
590
627
  auto messages = options.Get("messages").As<Napi::Array>();
591
628
  auto chat_template = get_option<std::string>(options, "chat_template", "");
@@ -598,7 +635,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
598
635
  auto tool_choice = get_option<std::string>(options, "tool_choice", "none");
599
636
 
600
637
  auto chatParams = getFormattedChatWithJinja(
601
- _sess->model(),
638
+ _sess,
602
639
  _templates,
603
640
  json_stringify(messages),
604
641
  chat_template,
@@ -608,33 +645,26 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
608
645
  tool_choice
609
646
  );
610
647
 
611
- params.prompt = chatParams.prompt.get<std::string>();
648
+ params.prompt = chatParams.prompt;
612
649
 
613
650
  chat_format = chatParams.format;
614
651
 
652
+ for (const auto & token : chatParams.preserved_tokens) {
653
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
654
+ if (ids.size() == 1) {
655
+ params.sampling.preserved_tokens.insert(ids[0]);
656
+ }
657
+ }
658
+
615
659
  if (!has_grammar_set) {
616
660
  // grammar param always wins jinja template & json_schema
617
661
  params.sampling.grammar = chatParams.grammar;
618
662
  params.sampling.grammar_lazy = chatParams.grammar_lazy;
619
-
620
663
  for (const auto & trigger : chatParams.grammar_triggers) {
621
- auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
622
- if (ids.size() == 1) {
623
- params.sampling.grammar_trigger_tokens.push_back(ids[0]);
624
- params.sampling.preserved_tokens.insert(ids[0]);
625
- continue;
626
- }
627
- params.sampling.grammar_trigger_words.push_back(trigger);
664
+ params.sampling.grammar_triggers.push_back(trigger);
628
665
  }
629
666
  has_grammar_set = true;
630
667
  }
631
-
632
- for (const auto & token : chatParams.preserved_tokens) {
633
- auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
634
- if (ids.size() == 1) {
635
- params.sampling.preserved_tokens.insert(ids[0]);
636
- }
637
- }
638
668
 
639
669
  for (const auto & stop : chatParams.additional_stops) {
640
670
  stop_words.push_back(stop);
@@ -685,6 +715,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
685
715
  params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
686
716
  params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
687
717
  params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
718
+ params.sampling.top_n_sigma = get_option<float>(options, "top_n_sigma", -1.0f);
688
719
  params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
689
720
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
690
721
  params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
@@ -28,7 +28,7 @@ private:
28
28
  std::string _info;
29
29
  Napi::Object _meta;
30
30
  LlamaSessionPtr _sess = nullptr;
31
- common_chat_templates _templates;
31
+ common_chat_templates_ptr _templates;
32
32
  std::vector<common_adapter_lora_info> _lora;
33
33
  LlamaCompletionWorker *_wip = nullptr;
34
34
  };
package/src/common.hpp CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  #include "common/common.h"
4
4
  #include "common/sampling.h"
5
- #include "chat.hpp"
6
- #include "chat-template.hpp"
5
+ #include "chat.h"
7
6
  #include "llama.h"
8
7
  #include <memory>
9
8
  #include <mutex>