@fugood/llama.node 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +1 -1
  18. package/package.json +1 -1
  19. package/src/LlamaContext.cpp +98 -76
  20. package/src/LlamaContext.h +1 -1
  21. package/src/common.hpp +1 -2
  22. package/src/llama.cpp/.github/workflows/build.yml +89 -10
  23. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  24. package/src/llama.cpp/CMakeLists.txt +9 -1
  25. package/src/llama.cpp/cmake/common.cmake +2 -0
  26. package/src/llama.cpp/common/CMakeLists.txt +3 -3
  27. package/src/llama.cpp/common/arg.cpp +132 -13
  28. package/src/llama.cpp/common/chat.cpp +960 -266
  29. package/src/llama.cpp/common/chat.h +135 -0
  30. package/src/llama.cpp/common/common.cpp +33 -174
  31. package/src/llama.cpp/common/common.h +27 -67
  32. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  33. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  34. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
  35. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  36. package/src/llama.cpp/common/sampling.cpp +45 -7
  37. package/src/llama.cpp/common/speculative.cpp +10 -9
  38. package/src/llama.cpp/common/speculative.h +1 -1
  39. package/src/llama.cpp/docs/build.md +45 -7
  40. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
  41. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +4 -2
  42. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -1
  43. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  44. package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
  45. package/src/llama.cpp/examples/imatrix/imatrix.cpp +3 -4
  46. package/src/llama.cpp/examples/infill/infill.cpp +2 -2
  47. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
  48. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +5 -5
  49. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  50. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  51. package/src/llama.cpp/examples/llava/clip.h +19 -3
  52. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  53. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  54. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  55. package/src/llama.cpp/examples/lookahead/lookahead.cpp +7 -6
  56. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  57. package/src/llama.cpp/examples/main/main.cpp +79 -34
  58. package/src/llama.cpp/examples/parallel/parallel.cpp +6 -5
  59. package/src/llama.cpp/examples/passkey/passkey.cpp +15 -14
  60. package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
  61. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  62. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
  63. package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
  64. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  65. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  66. package/src/llama.cpp/examples/run/run.cpp +196 -108
  67. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
  68. package/src/llama.cpp/examples/server/server.cpp +113 -101
  69. package/src/llama.cpp/examples/server/utils.hpp +94 -105
  70. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  71. package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
  72. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  73. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  74. package/src/llama.cpp/examples/tts/tts.cpp +263 -151
  75. package/src/llama.cpp/ggml/CMakeLists.txt +14 -1
  76. package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
  77. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  78. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  79. package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
  80. package/src/llama.cpp/ggml/include/ggml.h +29 -1
  81. package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -34
  82. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  83. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  84. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  85. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  86. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
  87. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -7
  88. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  89. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +139 -16
  90. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  91. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  93. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
  94. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1546 -387
  95. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1645 -113
  96. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
  97. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  101. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
  102. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
  103. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
  104. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  105. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  106. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  107. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +242 -0
  108. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -6
  109. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  110. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -138
  111. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  112. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  113. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +5 -0
  114. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  116. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +117 -36
  117. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  118. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  119. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  120. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  121. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  122. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +147 -16
  123. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
  124. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +307 -0
  125. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  126. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +262 -746
  127. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
  128. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -78
  129. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
  130. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +4 -1
  132. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  134. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  135. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  136. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +498 -188
  137. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
  138. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +16 -3
  139. package/src/llama.cpp/ggml/src/ggml.c +93 -5
  140. package/src/llama.cpp/include/llama.h +105 -27
  141. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  142. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  143. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  144. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  145. package/src/llama.cpp/requirements.txt +1 -0
  146. package/src/llama.cpp/src/CMakeLists.txt +5 -2
  147. package/src/llama.cpp/src/llama-adapter.cpp +19 -20
  148. package/src/llama.cpp/src/llama-adapter.h +11 -9
  149. package/src/llama.cpp/src/llama-arch.cpp +123 -16
  150. package/src/llama.cpp/src/llama-arch.h +19 -0
  151. package/src/llama.cpp/src/llama-batch.h +2 -2
  152. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  153. package/src/llama.cpp/src/llama-context.cpp +2253 -1222
  154. package/src/llama.cpp/src/llama-context.h +214 -77
  155. package/src/llama.cpp/src/llama-cparams.h +1 -0
  156. package/src/llama.cpp/src/llama-grammar.cpp +182 -182
  157. package/src/llama.cpp/src/llama-grammar.h +12 -3
  158. package/src/llama.cpp/src/llama-graph.cpp +1662 -0
  159. package/src/llama.cpp/src/llama-graph.h +574 -0
  160. package/src/llama.cpp/src/llama-hparams.cpp +8 -0
  161. package/src/llama.cpp/src/llama-hparams.h +9 -0
  162. package/src/llama.cpp/src/llama-io.cpp +15 -0
  163. package/src/llama.cpp/src/llama-io.h +35 -0
  164. package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
  165. package/src/llama.cpp/src/llama-kv-cache.h +178 -109
  166. package/src/llama.cpp/src/llama-memory.cpp +1 -0
  167. package/src/llama.cpp/src/llama-memory.h +21 -0
  168. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  169. package/src/llama.cpp/src/llama-model.cpp +8230 -122
  170. package/src/llama.cpp/src/llama-model.h +34 -1
  171. package/src/llama.cpp/src/llama-quant.cpp +10 -1
  172. package/src/llama.cpp/src/llama-sampling.cpp +43 -10
  173. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  174. package/src/llama.cpp/src/llama.cpp +51 -9837
  175. package/src/llama.cpp/tests/test-backend-ops.cpp +247 -112
  176. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  177. package/src/llama.cpp/tests/test-chat.cpp +593 -395
  178. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  179. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  180. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  181. package/src/llama.cpp/common/chat.hpp +0 -55
  182. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
  183. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
  184. /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -87,7 +87,7 @@ export type LlamaCompletionOptions = {
87
87
  stop?: string[]
88
88
  grammar?: string
89
89
  grammar_lazy?: boolean
90
- grammar_triggers?: { word: string; at_start: boolean }[]
90
+ grammar_triggers?: { type: number; word: string; at_start: boolean }[]
91
91
  preserved_tokens?: string[]
92
92
  }
93
93
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.13",
4
+ "version": "0.3.15",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -272,7 +272,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
272
272
  _sess = sess;
273
273
  _info = common_params_get_system_info(params);
274
274
 
275
- _templates = common_chat_templates_from_model(model, params.chat_template);
275
+ _templates = common_chat_templates_init(model, params.chat_template);
276
276
  }
277
277
 
278
278
  // getSystemInfo(): string
@@ -355,22 +355,22 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
355
355
  Napi::Object minja = Napi::Object::New(info.Env());
356
356
  minja.Set("default", validateModelChatTemplate(model, true, ""));
357
357
  Napi::Object defaultCaps = Napi::Object::New(info.Env());
358
- defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
359
- defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
360
- defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
361
- defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
362
- defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
363
- defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
358
+ defaultCaps.Set("tools", _templates.get()->template_default->original_caps().supports_tools);
359
+ defaultCaps.Set("toolCalls", _templates.get()->template_default->original_caps().supports_tool_calls);
360
+ defaultCaps.Set("toolResponses", _templates.get()->template_default->original_caps().supports_tool_responses);
361
+ defaultCaps.Set("systemRole", _templates.get()->template_default->original_caps().supports_system_role);
362
+ defaultCaps.Set("parallelToolCalls", _templates.get()->template_default->original_caps().supports_parallel_tool_calls);
363
+ defaultCaps.Set("toolCallId", _templates.get()->template_default->original_caps().supports_tool_call_id);
364
364
  minja.Set("defaultCaps", defaultCaps);
365
365
  minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
366
- if (_templates.template_tool_use) {
366
+ if (_templates.get()->template_tool_use) {
367
367
  Napi::Object toolUseCaps = Napi::Object::New(info.Env());
368
- toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
369
- toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
370
- toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
371
- toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
372
- toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
373
- toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
368
+ toolUseCaps.Set("tools", _templates.get()->template_tool_use->original_caps().supports_tools);
369
+ toolUseCaps.Set("toolCalls", _templates.get()->template_tool_use->original_caps().supports_tool_calls);
370
+ toolUseCaps.Set("toolResponses", _templates.get()->template_tool_use->original_caps().supports_tool_responses);
371
+ toolUseCaps.Set("systemRole", _templates.get()->template_tool_use->original_caps().supports_system_role);
372
+ toolUseCaps.Set("parallelToolCalls", _templates.get()->template_tool_use->original_caps().supports_parallel_tool_calls);
373
+ toolUseCaps.Set("toolCallId", _templates.get()->template_tool_use->original_caps().supports_tool_call_id);
374
374
  minja.Set("toolUseCaps", toolUseCaps);
375
375
  }
376
376
  chatTemplates.Set("minja", minja);
@@ -385,7 +385,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
385
385
 
386
386
  common_chat_params getFormattedChatWithJinja(
387
387
  const std::shared_ptr<LlamaSession> &sess,
388
- const common_chat_templates &templates,
388
+ const common_chat_templates_ptr &templates,
389
389
  const std::string &messages,
390
390
  const std::string &chat_template,
391
391
  const std::string &json_schema,
@@ -393,72 +393,46 @@ common_chat_params getFormattedChatWithJinja(
393
393
  const bool &parallel_tool_calls,
394
394
  const std::string &tool_choice
395
395
  ) {
396
- common_chat_inputs inputs;
397
- inputs.messages = json::parse(messages);
396
+ common_chat_templates_inputs inputs;
397
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
398
398
  auto useTools = !tools.empty();
399
399
  if (useTools) {
400
- inputs.tools = json::parse(tools);
400
+ inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
401
401
  }
402
402
  inputs.parallel_tool_calls = parallel_tool_calls;
403
403
  if (!tool_choice.empty()) {
404
- inputs.tool_choice = tool_choice;
404
+ inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
405
405
  }
406
406
  if (!json_schema.empty()) {
407
- inputs.json_schema = json::parse(json_schema);
407
+ inputs.json_schema = json::parse(json_schema);
408
408
  }
409
409
  inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
410
- inputs.stream = true;
411
410
 
412
411
  // If chat_template is provided, create new one and use it (probably slow)
413
412
  if (!chat_template.empty()) {
414
- auto tmp = common_chat_templates_from_model(sess->model(), chat_template);
415
- const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
416
- if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
417
- inputs.parallel_tool_calls = false;
418
- }
419
- return common_chat_params_init(*template_ptr, inputs);
413
+ auto tmps = common_chat_templates_init(sess->model(), chat_template);
414
+ return common_chat_templates_apply(tmps.get(), inputs);
420
415
  } else {
421
- const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
422
- if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
423
- inputs.parallel_tool_calls = false;
424
- }
425
- return common_chat_params_init(*template_ptr, inputs);
416
+ return common_chat_templates_apply(templates.get(), inputs);
426
417
  }
427
418
  }
428
419
 
429
420
  std::string getFormattedChat(
430
421
  const struct llama_model * model,
431
- const common_chat_templates &templates,
422
+ const common_chat_templates_ptr &templates,
432
423
  const std::string &messages,
433
424
  const std::string &chat_template
434
425
  ) {
435
- auto chat_json = json::parse(messages);
436
-
437
- // Handle regular chat without tools
438
- std::vector<common_chat_msg> chat_msgs;
439
- for (const auto &msg : chat_json) {
440
- chat_msgs.push_back({
441
- msg["role"].get<std::string>(),
442
- msg["content"].get<std::string>()
443
- });
444
- }
426
+ common_chat_templates_inputs inputs;
427
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
428
+ inputs.use_jinja = false;
445
429
 
446
430
  // If chat_template is provided, create new one and use it (probably slow)
447
431
  if (!chat_template.empty()) {
448
- auto tmp = common_chat_templates_from_model(model, chat_template);
449
- return common_chat_apply_template(
450
- *tmp.template_default,
451
- chat_msgs,
452
- true,
453
- false
454
- );
432
+ auto tmps = common_chat_templates_init(model, chat_template);
433
+ return common_chat_templates_apply(tmps.get(), inputs).prompt;
455
434
  } else {
456
- return common_chat_apply_template(
457
- *templates.template_default,
458
- chat_msgs,
459
- true,
460
- false
461
- );
435
+ return common_chat_templates_apply(templates.get(), inputs).prompt;
462
436
  }
463
437
  }
464
438
 
@@ -504,20 +478,21 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
504
478
  auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
505
479
 
506
480
  Napi::Object result = Napi::Object::New(env);
507
- result.Set("prompt", chatParams.prompt.get<std::string>());
481
+ result.Set("prompt", chatParams.prompt);
508
482
  // chat_format: int
509
483
  result.Set("chat_format", static_cast<int>(chatParams.format));
510
484
  // grammar: string
511
485
  result.Set("grammar", chatParams.grammar);
512
486
  // grammar_lazy: boolean
513
487
  result.Set("grammea_lazy", chatParams.grammar_lazy);
514
- // grammar_triggers: [{ word: string, at_start: boolean }]
488
+ // grammar_triggers: [{ value: string, token: number }]
515
489
  Napi::Array grammar_triggers = Napi::Array::New(env);
516
490
  for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
517
491
  const auto & trigger = chatParams.grammar_triggers[i];
518
492
  Napi::Object triggerObj = Napi::Object::New(env);
519
- triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
520
- triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
493
+ triggerObj.Set("type", Napi::Number::New(env, trigger.type));
494
+ triggerObj.Set("value", Napi::String::New(env, trigger.value));
495
+ triggerObj.Set("token", Napi::Number::New(env, trigger.token));
521
496
  grammar_triggers.Set(i, triggerObj);
522
497
  }
523
498
  result.Set("grammar_triggers", grammar_triggers);
@@ -594,6 +569,60 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
594
569
  }
595
570
  }
596
571
 
572
+ // Handle preserved_tokens from options
573
+ if (options.Has("preserved_tokens")) {
574
+ auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
575
+ for (size_t i = 0; i < preserved_tokens.Length(); i++) {
576
+ auto token = preserved_tokens.Get(i).ToString().Utf8Value();
577
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
578
+ if (ids.size() == 1) {
579
+ params.sampling.preserved_tokens.insert(ids[0]);
580
+ }
581
+ }
582
+ }
583
+
584
+ // Handle grammar_triggers from options
585
+ if (options.Has("grammar_triggers")) {
586
+ auto grammar_triggers = options.Get("grammar_triggers").As<Napi::Array>();
587
+ for (size_t i = 0; i < grammar_triggers.Length(); i++) {
588
+ auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
589
+
590
+ auto type = static_cast<common_grammar_trigger_type>(trigger_obj.Get("type").ToNumber().Int32Value());
591
+ auto word = trigger_obj.Get("value").ToString().Utf8Value();
592
+
593
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
594
+ auto ids = common_tokenize(_sess->context(), word, /* add_special= */ false, /* parse_special= */ true);
595
+ if (ids.size() == 1) {
596
+ auto token = ids[0];
597
+ if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
598
+ throw std::runtime_error("Grammar trigger word should be marked as preserved token");
599
+ }
600
+ common_grammar_trigger trigger;
601
+ trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
602
+ trigger.value = word;
603
+ trigger.token = token;
604
+ params.sampling.grammar_triggers.push_back(std::move(trigger));
605
+ } else {
606
+ params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
607
+ }
608
+ } else {
609
+ common_grammar_trigger trigger;
610
+ trigger.type = type;
611
+ trigger.value = word;
612
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
613
+ auto token = (llama_token) trigger_obj.Get("token").ToNumber().Int32Value();
614
+ trigger.token = token;
615
+ }
616
+ params.sampling.grammar_triggers.push_back(std::move(trigger));
617
+ }
618
+ }
619
+ }
620
+
621
+ // Handle grammar_lazy from options
622
+ if (options.Has("grammar_lazy")) {
623
+ params.sampling.grammar_lazy = options.Get("grammar_lazy").ToBoolean().Value();
624
+ }
625
+
597
626
  if (options.Has("messages") && options.Get("messages").IsArray()) {
598
627
  auto messages = options.Get("messages").As<Napi::Array>();
599
628
  auto chat_template = get_option<std::string>(options, "chat_template", "");
@@ -616,33 +645,26 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
616
645
  tool_choice
617
646
  );
618
647
 
619
- params.prompt = chatParams.prompt.get<std::string>();
648
+ params.prompt = chatParams.prompt;
620
649
 
621
650
  chat_format = chatParams.format;
622
651
 
652
+ for (const auto & token : chatParams.preserved_tokens) {
653
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
654
+ if (ids.size() == 1) {
655
+ params.sampling.preserved_tokens.insert(ids[0]);
656
+ }
657
+ }
658
+
623
659
  if (!has_grammar_set) {
624
660
  // grammar param always wins jinja template & json_schema
625
661
  params.sampling.grammar = chatParams.grammar;
626
662
  params.sampling.grammar_lazy = chatParams.grammar_lazy;
627
-
628
663
  for (const auto & trigger : chatParams.grammar_triggers) {
629
- auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
630
- if (ids.size() == 1) {
631
- params.sampling.grammar_trigger_tokens.push_back(ids[0]);
632
- params.sampling.preserved_tokens.insert(ids[0]);
633
- continue;
634
- }
635
- params.sampling.grammar_trigger_words.push_back(trigger);
664
+ params.sampling.grammar_triggers.push_back(trigger);
636
665
  }
637
666
  has_grammar_set = true;
638
667
  }
639
-
640
- for (const auto & token : chatParams.preserved_tokens) {
641
- auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
642
- if (ids.size() == 1) {
643
- params.sampling.preserved_tokens.insert(ids[0]);
644
- }
645
- }
646
668
 
647
669
  for (const auto & stop : chatParams.additional_stops) {
648
670
  stop_words.push_back(stop);
@@ -28,7 +28,7 @@ private:
28
28
  std::string _info;
29
29
  Napi::Object _meta;
30
30
  LlamaSessionPtr _sess = nullptr;
31
- common_chat_templates _templates;
31
+ common_chat_templates_ptr _templates;
32
32
  std::vector<common_adapter_lora_info> _lora;
33
33
  LlamaCompletionWorker *_wip = nullptr;
34
34
  };
package/src/common.hpp CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  #include "common/common.h"
4
4
  #include "common/sampling.h"
5
- #include "chat.hpp"
6
- #include "chat-template.hpp"
5
+ #include "chat.h"
7
6
  #include "llama.h"
8
7
  #include <memory>
9
8
  #include <mutex>
@@ -173,7 +173,15 @@ jobs:
173
173
  name: llama-bin-macos-x64.zip
174
174
 
175
175
  ubuntu-cpu-cmake:
176
- runs-on: ubuntu-22.04
176
+ strategy:
177
+ matrix:
178
+ include:
179
+ - build: 'x64'
180
+ os: ubuntu-22.04
181
+ - build: 'arm64'
182
+ os: ubuntu-22.04-arm
183
+
184
+ runs-on: ${{ matrix.os }}
177
185
 
178
186
  steps:
179
187
  - name: Clone
@@ -239,14 +247,14 @@ jobs:
239
247
  run: |
240
248
  cp LICENSE ./build/bin/
241
249
  cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
242
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
250
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
243
251
 
244
252
  - name: Upload artifacts
245
253
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
246
254
  uses: actions/upload-artifact@v4
247
255
  with:
248
- path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
249
- name: llama-bin-ubuntu-x64.zip
256
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
257
+ name: llama-bin-ubuntu-${{ matrix.build }}.zip
250
258
 
251
259
  ubuntu-latest-cmake-sanitizer:
252
260
  runs-on: ubuntu-latest
@@ -459,6 +467,7 @@ jobs:
459
467
  run: |
460
468
  cmake -B build -S . \
461
469
  -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
470
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
462
471
  -DGGML_HIP=ON
463
472
  cmake --build build --config Release -j $(nproc)
464
473
 
@@ -468,6 +477,7 @@ jobs:
468
477
  cmake -B build2 -S . \
469
478
  -DCMAKE_C_COMPILER=hipcc \
470
479
  -DCMAKE_CXX_COMPILER=hipcc \
480
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
471
481
  -DGGML_HIP=ON
472
482
  cmake --build build2 --config Release -j $(nproc)
473
483
 
@@ -666,6 +676,35 @@ jobs:
666
676
  -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
667
677
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
668
678
 
679
+ macOS-latest-cmake-visionos:
680
+ runs-on: macos-latest
681
+
682
+ steps:
683
+ - name: Clone
684
+ id: checkout
685
+ uses: actions/checkout@v4
686
+
687
+ - name: Dependencies
688
+ id: depends
689
+ continue-on-error: true
690
+ run: |
691
+ brew update
692
+
693
+ - name: Build
694
+ id: cmake_build
695
+ run: |
696
+ sysctl -a
697
+ cmake -B build -G Xcode \
698
+ -DGGML_METAL_USE_BF16=ON \
699
+ -DGGML_METAL_EMBED_LIBRARY=ON \
700
+ -DLLAMA_BUILD_EXAMPLES=OFF \
701
+ -DLLAMA_BUILD_TESTS=OFF \
702
+ -DLLAMA_BUILD_SERVER=OFF \
703
+ -DCMAKE_SYSTEM_NAME=visionOS \
704
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
705
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
706
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
707
+
669
708
  macOS-latest-swift:
670
709
  runs-on: macos-latest
671
710
 
@@ -702,12 +741,11 @@ jobs:
702
741
  -DLLAMA_BUILD_SERVER=OFF \
703
742
  -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
704
743
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
705
- sudo cmake --install build --config Release
706
744
 
707
745
  - name: xcodebuild for swift package
708
746
  id: xcodebuild
709
747
  run: |
710
- xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
748
+ ./build-xcframework.sh
711
749
 
712
750
  windows-msys2:
713
751
  runs-on: windows-latest
@@ -765,7 +803,7 @@ jobs:
765
803
  env:
766
804
  OPENBLAS_VERSION: 0.3.23
767
805
  SDE_VERSION: 9.33.0-2024-01-07
768
- VULKAN_VERSION: 1.3.261.1
806
+ VULKAN_VERSION: 1.4.304.1
769
807
 
770
808
  strategy:
771
809
  matrix:
@@ -1195,6 +1233,11 @@ jobs:
1195
1233
  id: checkout
1196
1234
  uses: actions/checkout@v4
1197
1235
 
1236
+ - name: Clone rocWMMA repository
1237
+ id: clone_rocwmma
1238
+ run: |
1239
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1240
+
1198
1241
  - name: Install
1199
1242
  id: depends
1200
1243
  run: |
@@ -1224,8 +1267,10 @@ jobs:
1224
1267
  cmake -G "Unix Makefiles" -B build -S . `
1225
1268
  -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1226
1269
  -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1270
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1227
1271
  -DCMAKE_BUILD_TYPE=Release `
1228
1272
  -DGGML_HIP=ON `
1273
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
1229
1274
  -DGGML_RPC=ON
1230
1275
  cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1231
1276
 
@@ -1244,6 +1289,11 @@ jobs:
1244
1289
  with:
1245
1290
  fetch-depth: 0
1246
1291
 
1292
+ - name: Clone rocWMMA repository
1293
+ id: clone_rocwmma
1294
+ run: |
1295
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1296
+
1247
1297
  - name: ccache
1248
1298
  uses: hendrikmuhs/ccache-action@v1.2.16
1249
1299
  with:
@@ -1273,8 +1323,10 @@ jobs:
1273
1323
  cmake -G "Unix Makefiles" -B build -S . `
1274
1324
  -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1275
1325
  -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1326
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1276
1327
  -DCMAKE_BUILD_TYPE=Release `
1277
1328
  -DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
1329
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
1278
1330
  -DGGML_HIP=ON `
1279
1331
  -DGGML_RPC=ON
1280
1332
  cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
@@ -1313,6 +1365,8 @@ jobs:
1313
1365
  steps:
1314
1366
  - name: Checkout code
1315
1367
  uses: actions/checkout@v4
1368
+ with:
1369
+ fetch-depth: 0
1316
1370
 
1317
1371
  - name: Build
1318
1372
  id: cmake_build
@@ -1328,15 +1382,40 @@ jobs:
1328
1382
  -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1329
1383
  -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1330
1384
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1331
- sudo cmake --install build --config Release
1332
1385
 
1333
1386
  - name: xcodebuild for swift package
1334
1387
  id: xcodebuild
1335
1388
  run: |
1336
- xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
1389
+ ./build-xcframework.sh
1337
1390
 
1338
1391
  - name: Build Xcode project
1339
- run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1392
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
1393
+
1394
+ - name: Determine tag name
1395
+ id: tag
1396
+ shell: bash
1397
+ run: |
1398
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1399
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1400
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1401
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1402
+ else
1403
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1404
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1405
+ fi
1406
+
1407
+ - name: Pack artifacts
1408
+ id: pack_artifacts
1409
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1410
+ run: |
1411
+ zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
1412
+
1413
+ - name: Upload artifacts
1414
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1415
+ uses: actions/upload-artifact@v4
1416
+ with:
1417
+ path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
1418
+ name: llama-${{ steps.tag.outputs.name }}-xcframework
1340
1419
 
1341
1420
  android-build:
1342
1421
  runs-on: ubuntu-latest
@@ -161,6 +161,8 @@ jobs:
161
161
  - name: Tests
162
162
  id: server_integration_tests
163
163
  if: ${{ matrix.sanitizer == '' }}
164
+ env:
165
+ GITHUB_ACTIONS: "true"
164
166
  run: |
165
167
  cd examples/server/tests
166
168
  ./tests.sh
@@ -29,6 +29,8 @@ else()
29
29
  set(LLAMA_STANDALONE OFF)
30
30
  endif()
31
31
 
32
+ option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
33
+
32
34
  if (EMSCRIPTEN)
33
35
  set(BUILD_SHARED_LIBS_DEFAULT OFF)
34
36
 
@@ -145,7 +147,13 @@ endif()
145
147
  # 3rd-party
146
148
  #
147
149
 
148
- if (NOT TARGET ggml)
150
+ if (LLAMA_USE_SYSTEM_GGML)
151
+ message(STATUS "Using system-provided libggml, skipping ggml build")
152
+ find_package(ggml REQUIRED)
153
+ add_library(ggml ALIAS ggml::ggml)
154
+ endif()
155
+
156
+ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
149
157
  add_subdirectory(ggml)
150
158
  # ... otherwise assume ggml is added by a parent CMakeLists.txt
151
159
  endif()
@@ -1,3 +1,5 @@
1
+ include("ggml/cmake/common.cmake")
2
+
1
3
  function(llama_add_compile_flags)
2
4
  if (LLAMA_FATAL_WARNINGS)
3
5
  if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
@@ -57,8 +57,7 @@ add_library(${TARGET} STATIC
57
57
  arg.h
58
58
  base64.hpp
59
59
  chat.cpp
60
- chat.hpp
61
- chat-template.hpp
60
+ chat.h
62
61
  common.cpp
63
62
  common.h
64
63
  console.cpp
@@ -68,7 +67,8 @@ add_library(${TARGET} STATIC
68
67
  llguidance.cpp
69
68
  log.cpp
70
69
  log.h
71
- minja.hpp
70
+ minja/chat-template.hpp
71
+ minja/minja.hpp
72
72
  ngram-cache.cpp
73
73
  ngram-cache.h
74
74
  sampling.cpp