@fugood/llama.node 0.3.13 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +1 -1
  18. package/package.json +1 -1
  19. package/src/LlamaContext.cpp +98 -76
  20. package/src/LlamaContext.h +1 -1
  21. package/src/common.hpp +1 -2
  22. package/src/llama.cpp/.github/workflows/build.yml +60 -10
  23. package/src/llama.cpp/.github/workflows/server.yml +2 -0
  24. package/src/llama.cpp/common/CMakeLists.txt +3 -3
  25. package/src/llama.cpp/common/arg.cpp +112 -11
  26. package/src/llama.cpp/common/chat.cpp +960 -266
  27. package/src/llama.cpp/common/chat.h +135 -0
  28. package/src/llama.cpp/common/common.cpp +27 -171
  29. package/src/llama.cpp/common/common.h +27 -67
  30. package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
  31. package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
  32. package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
  33. package/src/llama.cpp/common/ngram-cache.cpp +1 -0
  34. package/src/llama.cpp/common/sampling.cpp +45 -7
  35. package/src/llama.cpp/common/speculative.cpp +6 -5
  36. package/src/llama.cpp/common/speculative.h +1 -1
  37. package/src/llama.cpp/docs/build.md +45 -7
  38. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
  39. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
  40. package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
  41. package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -3
  42. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
  43. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  44. package/src/llama.cpp/examples/llava/clip.cpp +373 -107
  45. package/src/llama.cpp/examples/llava/clip.h +19 -3
  46. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
  47. package/src/llama.cpp/examples/llava/llava.cpp +4 -2
  48. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
  49. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
  50. package/src/llama.cpp/examples/main/main.cpp +73 -28
  51. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
  52. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
  53. package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
  54. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
  55. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
  56. package/src/llama.cpp/examples/run/run.cpp +110 -67
  57. package/src/llama.cpp/examples/server/server.cpp +82 -87
  58. package/src/llama.cpp/examples/server/utils.hpp +94 -107
  59. package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
  60. package/src/llama.cpp/examples/tts/tts.cpp +251 -142
  61. package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
  62. package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
  63. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
  64. package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
  65. package/src/llama.cpp/ggml/include/ggml.h +5 -1
  66. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
  67. package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
  68. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
  69. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
  70. package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
  71. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
  72. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
  73. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
  74. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
  75. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
  76. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
  77. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
  78. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1396 -386
  79. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1432 -151
  80. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
  81. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
  82. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
  83. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
  84. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  85. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
  86. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
  87. package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
  89. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
  90. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
  91. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
  92. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +220 -116
  93. package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
  94. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
  95. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
  96. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  97. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
  98. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
  99. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
  100. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
  101. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  102. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  103. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
  104. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
  105. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
  106. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
  107. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +168 -721
  108. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
  109. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
  110. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  111. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  112. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +146 -42
  113. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
  114. package/src/llama.cpp/ggml/src/ggml.c +8 -3
  115. package/src/llama.cpp/include/llama.h +19 -5
  116. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  117. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  118. package/src/llama.cpp/requirements/requirements-all.txt +1 -0
  119. package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  120. package/src/llama.cpp/requirements.txt +1 -0
  121. package/src/llama.cpp/src/llama-arch.cpp +21 -0
  122. package/src/llama.cpp/src/llama-arch.h +1 -0
  123. package/src/llama.cpp/src/llama-chat.cpp +1 -0
  124. package/src/llama.cpp/src/llama-grammar.cpp +182 -182
  125. package/src/llama.cpp/src/llama-grammar.h +12 -3
  126. package/src/llama.cpp/src/llama-kv-cache.h +1 -0
  127. package/src/llama.cpp/src/llama-mmap.cpp +11 -1
  128. package/src/llama.cpp/src/llama-model.cpp +69 -5
  129. package/src/llama.cpp/src/llama-sampling.cpp +43 -10
  130. package/src/llama.cpp/src/llama-vocab.cpp +12 -0
  131. package/src/llama.cpp/src/llama.cpp +147 -0
  132. package/src/llama.cpp/tests/test-backend-ops.cpp +166 -110
  133. package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
  134. package/src/llama.cpp/tests/test-chat.cpp +593 -395
  135. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
  136. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
  137. package/src/llama.cpp/Sources/llama/llama.h +0 -4
  138. package/src/llama.cpp/common/chat.hpp +0 -55
  139. /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -87,7 +87,7 @@ export type LlamaCompletionOptions = {
87
87
  stop?: string[]
88
88
  grammar?: string
89
89
  grammar_lazy?: boolean
90
- grammar_triggers?: { word: string; at_start: boolean }[]
90
+ grammar_triggers?: { type: number; word: string; at_start: boolean }[]
91
91
  preserved_tokens?: string[]
92
92
  }
93
93
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.13",
4
+ "version": "0.3.14",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -272,7 +272,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
272
272
  _sess = sess;
273
273
  _info = common_params_get_system_info(params);
274
274
 
275
- _templates = common_chat_templates_from_model(model, params.chat_template);
275
+ _templates = common_chat_templates_init(model, params.chat_template);
276
276
  }
277
277
 
278
278
  // getSystemInfo(): string
@@ -355,22 +355,22 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
355
355
  Napi::Object minja = Napi::Object::New(info.Env());
356
356
  minja.Set("default", validateModelChatTemplate(model, true, ""));
357
357
  Napi::Object defaultCaps = Napi::Object::New(info.Env());
358
- defaultCaps.Set("tools", _templates.template_default->original_caps().supports_tools);
359
- defaultCaps.Set("toolCalls", _templates.template_default->original_caps().supports_tool_calls);
360
- defaultCaps.Set("toolResponses", _templates.template_default->original_caps().supports_tool_responses);
361
- defaultCaps.Set("systemRole", _templates.template_default->original_caps().supports_system_role);
362
- defaultCaps.Set("parallelToolCalls", _templates.template_default->original_caps().supports_parallel_tool_calls);
363
- defaultCaps.Set("toolCallId", _templates.template_default->original_caps().supports_tool_call_id);
358
+ defaultCaps.Set("tools", _templates.get()->template_default->original_caps().supports_tools);
359
+ defaultCaps.Set("toolCalls", _templates.get()->template_default->original_caps().supports_tool_calls);
360
+ defaultCaps.Set("toolResponses", _templates.get()->template_default->original_caps().supports_tool_responses);
361
+ defaultCaps.Set("systemRole", _templates.get()->template_default->original_caps().supports_system_role);
362
+ defaultCaps.Set("parallelToolCalls", _templates.get()->template_default->original_caps().supports_parallel_tool_calls);
363
+ defaultCaps.Set("toolCallId", _templates.get()->template_default->original_caps().supports_tool_call_id);
364
364
  minja.Set("defaultCaps", defaultCaps);
365
365
  minja.Set("toolUse", validateModelChatTemplate(model, true, "tool_use"));
366
- if (_templates.template_tool_use) {
366
+ if (_templates.get()->template_tool_use) {
367
367
  Napi::Object toolUseCaps = Napi::Object::New(info.Env());
368
- toolUseCaps.Set("tools", _templates.template_tool_use->original_caps().supports_tools);
369
- toolUseCaps.Set("toolCalls", _templates.template_tool_use->original_caps().supports_tool_calls);
370
- toolUseCaps.Set("toolResponses", _templates.template_tool_use->original_caps().supports_tool_responses);
371
- toolUseCaps.Set("systemRole", _templates.template_tool_use->original_caps().supports_system_role);
372
- toolUseCaps.Set("parallelToolCalls", _templates.template_tool_use->original_caps().supports_parallel_tool_calls);
373
- toolUseCaps.Set("toolCallId", _templates.template_tool_use->original_caps().supports_tool_call_id);
368
+ toolUseCaps.Set("tools", _templates.get()->template_tool_use->original_caps().supports_tools);
369
+ toolUseCaps.Set("toolCalls", _templates.get()->template_tool_use->original_caps().supports_tool_calls);
370
+ toolUseCaps.Set("toolResponses", _templates.get()->template_tool_use->original_caps().supports_tool_responses);
371
+ toolUseCaps.Set("systemRole", _templates.get()->template_tool_use->original_caps().supports_system_role);
372
+ toolUseCaps.Set("parallelToolCalls", _templates.get()->template_tool_use->original_caps().supports_parallel_tool_calls);
373
+ toolUseCaps.Set("toolCallId", _templates.get()->template_tool_use->original_caps().supports_tool_call_id);
374
374
  minja.Set("toolUseCaps", toolUseCaps);
375
375
  }
376
376
  chatTemplates.Set("minja", minja);
@@ -385,7 +385,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
385
385
 
386
386
  common_chat_params getFormattedChatWithJinja(
387
387
  const std::shared_ptr<LlamaSession> &sess,
388
- const common_chat_templates &templates,
388
+ const common_chat_templates_ptr &templates,
389
389
  const std::string &messages,
390
390
  const std::string &chat_template,
391
391
  const std::string &json_schema,
@@ -393,72 +393,46 @@ common_chat_params getFormattedChatWithJinja(
393
393
  const bool &parallel_tool_calls,
394
394
  const std::string &tool_choice
395
395
  ) {
396
- common_chat_inputs inputs;
397
- inputs.messages = json::parse(messages);
396
+ common_chat_templates_inputs inputs;
397
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
398
398
  auto useTools = !tools.empty();
399
399
  if (useTools) {
400
- inputs.tools = json::parse(tools);
400
+ inputs.tools = common_chat_tools_parse_oaicompat(json::parse(tools));
401
401
  }
402
402
  inputs.parallel_tool_calls = parallel_tool_calls;
403
403
  if (!tool_choice.empty()) {
404
- inputs.tool_choice = tool_choice;
404
+ inputs.tool_choice = common_chat_tool_choice_parse_oaicompat(tool_choice);
405
405
  }
406
406
  if (!json_schema.empty()) {
407
- inputs.json_schema = json::parse(json_schema);
407
+ inputs.json_schema = json::parse(json_schema);
408
408
  }
409
409
  inputs.extract_reasoning = sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
410
- inputs.stream = true;
411
410
 
412
411
  // If chat_template is provided, create new one and use it (probably slow)
413
412
  if (!chat_template.empty()) {
414
- auto tmp = common_chat_templates_from_model(sess->model(), chat_template);
415
- const common_chat_template* template_ptr = useTools && tmp.template_tool_use ? tmp.template_tool_use.get() : tmp.template_default.get();
416
- if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
417
- inputs.parallel_tool_calls = false;
418
- }
419
- return common_chat_params_init(*template_ptr, inputs);
413
+ auto tmps = common_chat_templates_init(sess->model(), chat_template);
414
+ return common_chat_templates_apply(tmps.get(), inputs);
420
415
  } else {
421
- const common_chat_template* template_ptr = useTools && templates.template_tool_use ? templates.template_tool_use.get() : templates.template_default.get();
422
- if (inputs.parallel_tool_calls && !template_ptr->original_caps().supports_parallel_tool_calls) {
423
- inputs.parallel_tool_calls = false;
424
- }
425
- return common_chat_params_init(*template_ptr, inputs);
416
+ return common_chat_templates_apply(templates.get(), inputs);
426
417
  }
427
418
  }
428
419
 
429
420
  std::string getFormattedChat(
430
421
  const struct llama_model * model,
431
- const common_chat_templates &templates,
422
+ const common_chat_templates_ptr &templates,
432
423
  const std::string &messages,
433
424
  const std::string &chat_template
434
425
  ) {
435
- auto chat_json = json::parse(messages);
436
-
437
- // Handle regular chat without tools
438
- std::vector<common_chat_msg> chat_msgs;
439
- for (const auto &msg : chat_json) {
440
- chat_msgs.push_back({
441
- msg["role"].get<std::string>(),
442
- msg["content"].get<std::string>()
443
- });
444
- }
426
+ common_chat_templates_inputs inputs;
427
+ inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
428
+ inputs.use_jinja = false;
445
429
 
446
430
  // If chat_template is provided, create new one and use it (probably slow)
447
431
  if (!chat_template.empty()) {
448
- auto tmp = common_chat_templates_from_model(model, chat_template);
449
- return common_chat_apply_template(
450
- *tmp.template_default,
451
- chat_msgs,
452
- true,
453
- false
454
- );
432
+ auto tmps = common_chat_templates_init(model, chat_template);
433
+ return common_chat_templates_apply(tmps.get(), inputs).prompt;
455
434
  } else {
456
- return common_chat_apply_template(
457
- *templates.template_default,
458
- chat_msgs,
459
- true,
460
- false
461
- );
435
+ return common_chat_templates_apply(templates.get(), inputs).prompt;
462
436
  }
463
437
  }
464
438
 
@@ -504,20 +478,21 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
504
478
  auto chatParams = getFormattedChatWithJinja(_sess, _templates, messages, chat_template, json_schema_str, tools_str, parallel_tool_calls, tool_choice);
505
479
 
506
480
  Napi::Object result = Napi::Object::New(env);
507
- result.Set("prompt", chatParams.prompt.get<std::string>());
481
+ result.Set("prompt", chatParams.prompt);
508
482
  // chat_format: int
509
483
  result.Set("chat_format", static_cast<int>(chatParams.format));
510
484
  // grammar: string
511
485
  result.Set("grammar", chatParams.grammar);
512
486
  // grammar_lazy: boolean
513
487
  result.Set("grammea_lazy", chatParams.grammar_lazy);
514
- // grammar_triggers: [{ word: string, at_start: boolean }]
488
+ // grammar_triggers: [{ value: string, token: number }]
515
489
  Napi::Array grammar_triggers = Napi::Array::New(env);
516
490
  for (size_t i = 0; i < chatParams.grammar_triggers.size(); i++) {
517
491
  const auto & trigger = chatParams.grammar_triggers[i];
518
492
  Napi::Object triggerObj = Napi::Object::New(env);
519
- triggerObj.Set("word", Napi::String::New(env, trigger.word.c_str()));
520
- triggerObj.Set("at_start", Napi::Boolean::New(env, trigger.at_start));
493
+ triggerObj.Set("type", Napi::Number::New(env, trigger.type));
494
+ triggerObj.Set("value", Napi::String::New(env, trigger.value));
495
+ triggerObj.Set("token", Napi::Number::New(env, trigger.token));
521
496
  grammar_triggers.Set(i, triggerObj);
522
497
  }
523
498
  result.Set("grammar_triggers", grammar_triggers);
@@ -594,6 +569,60 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
594
569
  }
595
570
  }
596
571
 
572
+ // Handle preserved_tokens from options
573
+ if (options.Has("preserved_tokens")) {
574
+ auto preserved_tokens = options.Get("preserved_tokens").As<Napi::Array>();
575
+ for (size_t i = 0; i < preserved_tokens.Length(); i++) {
576
+ auto token = preserved_tokens.Get(i).ToString().Utf8Value();
577
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
578
+ if (ids.size() == 1) {
579
+ params.sampling.preserved_tokens.insert(ids[0]);
580
+ }
581
+ }
582
+ }
583
+
584
+ // Handle grammar_triggers from options
585
+ if (options.Has("grammar_triggers")) {
586
+ auto grammar_triggers = options.Get("grammar_triggers").As<Napi::Array>();
587
+ for (size_t i = 0; i < grammar_triggers.Length(); i++) {
588
+ auto trigger_obj = grammar_triggers.Get(i).As<Napi::Object>();
589
+
590
+ auto type = static_cast<common_grammar_trigger_type>(trigger_obj.Get("type").ToNumber().Int32Value());
591
+ auto word = trigger_obj.Get("value").ToString().Utf8Value();
592
+
593
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
594
+ auto ids = common_tokenize(_sess->context(), word, /* add_special= */ false, /* parse_special= */ true);
595
+ if (ids.size() == 1) {
596
+ auto token = ids[0];
597
+ if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
598
+ throw std::runtime_error("Grammar trigger word should be marked as preserved token");
599
+ }
600
+ common_grammar_trigger trigger;
601
+ trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
602
+ trigger.value = word;
603
+ trigger.token = token;
604
+ params.sampling.grammar_triggers.push_back(std::move(trigger));
605
+ } else {
606
+ params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
607
+ }
608
+ } else {
609
+ common_grammar_trigger trigger;
610
+ trigger.type = type;
611
+ trigger.value = word;
612
+ if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
613
+ auto token = (llama_token) trigger_obj.Get("token").ToNumber().Int32Value();
614
+ trigger.token = token;
615
+ }
616
+ params.sampling.grammar_triggers.push_back(std::move(trigger));
617
+ }
618
+ }
619
+ }
620
+
621
+ // Handle grammar_lazy from options
622
+ if (options.Has("grammar_lazy")) {
623
+ params.sampling.grammar_lazy = options.Get("grammar_lazy").ToBoolean().Value();
624
+ }
625
+
597
626
  if (options.Has("messages") && options.Get("messages").IsArray()) {
598
627
  auto messages = options.Get("messages").As<Napi::Array>();
599
628
  auto chat_template = get_option<std::string>(options, "chat_template", "");
@@ -616,33 +645,26 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
616
645
  tool_choice
617
646
  );
618
647
 
619
- params.prompt = chatParams.prompt.get<std::string>();
648
+ params.prompt = chatParams.prompt;
620
649
 
621
650
  chat_format = chatParams.format;
622
651
 
652
+ for (const auto & token : chatParams.preserved_tokens) {
653
+ auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
654
+ if (ids.size() == 1) {
655
+ params.sampling.preserved_tokens.insert(ids[0]);
656
+ }
657
+ }
658
+
623
659
  if (!has_grammar_set) {
624
660
  // grammar param always wins jinja template & json_schema
625
661
  params.sampling.grammar = chatParams.grammar;
626
662
  params.sampling.grammar_lazy = chatParams.grammar_lazy;
627
-
628
663
  for (const auto & trigger : chatParams.grammar_triggers) {
629
- auto ids = common_tokenize(_sess->context(), trigger.word, /* add_special= */ false, /* parse_special= */ true);
630
- if (ids.size() == 1) {
631
- params.sampling.grammar_trigger_tokens.push_back(ids[0]);
632
- params.sampling.preserved_tokens.insert(ids[0]);
633
- continue;
634
- }
635
- params.sampling.grammar_trigger_words.push_back(trigger);
664
+ params.sampling.grammar_triggers.push_back(trigger);
636
665
  }
637
666
  has_grammar_set = true;
638
667
  }
639
-
640
- for (const auto & token : chatParams.preserved_tokens) {
641
- auto ids = common_tokenize(_sess->context(), token, /* add_special= */ false, /* parse_special= */ true);
642
- if (ids.size() == 1) {
643
- params.sampling.preserved_tokens.insert(ids[0]);
644
- }
645
- }
646
668
 
647
669
  for (const auto & stop : chatParams.additional_stops) {
648
670
  stop_words.push_back(stop);
@@ -28,7 +28,7 @@ private:
28
28
  std::string _info;
29
29
  Napi::Object _meta;
30
30
  LlamaSessionPtr _sess = nullptr;
31
- common_chat_templates _templates;
31
+ common_chat_templates_ptr _templates;
32
32
  std::vector<common_adapter_lora_info> _lora;
33
33
  LlamaCompletionWorker *_wip = nullptr;
34
34
  };
package/src/common.hpp CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  #include "common/common.h"
4
4
  #include "common/sampling.h"
5
- #include "chat.hpp"
6
- #include "chat-template.hpp"
5
+ #include "chat.h"
7
6
  #include "llama.h"
8
7
  #include <memory>
9
8
  #include <mutex>
@@ -173,7 +173,15 @@ jobs:
173
173
  name: llama-bin-macos-x64.zip
174
174
 
175
175
  ubuntu-cpu-cmake:
176
- runs-on: ubuntu-22.04
176
+ strategy:
177
+ matrix:
178
+ include:
179
+ - build: 'x64'
180
+ os: ubuntu-22.04
181
+ - build: 'arm64'
182
+ os: ubuntu-22.04-arm
183
+
184
+ runs-on: ${{ matrix.os }}
177
185
 
178
186
  steps:
179
187
  - name: Clone
@@ -239,14 +247,14 @@ jobs:
239
247
  run: |
240
248
  cp LICENSE ./build/bin/
241
249
  cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
242
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
250
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
243
251
 
244
252
  - name: Upload artifacts
245
253
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
246
254
  uses: actions/upload-artifact@v4
247
255
  with:
248
- path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
249
- name: llama-bin-ubuntu-x64.zip
256
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
257
+ name: llama-bin-ubuntu-${{ matrix.build }}.zip
250
258
 
251
259
  ubuntu-latest-cmake-sanitizer:
252
260
  runs-on: ubuntu-latest
@@ -459,6 +467,7 @@ jobs:
459
467
  run: |
460
468
  cmake -B build -S . \
461
469
  -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
470
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
462
471
  -DGGML_HIP=ON
463
472
  cmake --build build --config Release -j $(nproc)
464
473
 
@@ -468,6 +477,7 @@ jobs:
468
477
  cmake -B build2 -S . \
469
478
  -DCMAKE_C_COMPILER=hipcc \
470
479
  -DCMAKE_CXX_COMPILER=hipcc \
480
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
471
481
  -DGGML_HIP=ON
472
482
  cmake --build build2 --config Release -j $(nproc)
473
483
 
@@ -702,12 +712,11 @@ jobs:
702
712
  -DLLAMA_BUILD_SERVER=OFF \
703
713
  -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
704
714
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
705
- sudo cmake --install build --config Release
706
715
 
707
716
  - name: xcodebuild for swift package
708
717
  id: xcodebuild
709
718
  run: |
710
- xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
719
+ ./build-xcframework.sh
711
720
 
712
721
  windows-msys2:
713
722
  runs-on: windows-latest
@@ -765,7 +774,7 @@ jobs:
765
774
  env:
766
775
  OPENBLAS_VERSION: 0.3.23
767
776
  SDE_VERSION: 9.33.0-2024-01-07
768
- VULKAN_VERSION: 1.3.261.1
777
+ VULKAN_VERSION: 1.4.304.1
769
778
 
770
779
  strategy:
771
780
  matrix:
@@ -1195,6 +1204,11 @@ jobs:
1195
1204
  id: checkout
1196
1205
  uses: actions/checkout@v4
1197
1206
 
1207
+ - name: Clone rocWMMA repository
1208
+ id: clone_rocwmma
1209
+ run: |
1210
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1211
+
1198
1212
  - name: Install
1199
1213
  id: depends
1200
1214
  run: |
@@ -1224,8 +1238,10 @@ jobs:
1224
1238
  cmake -G "Unix Makefiles" -B build -S . `
1225
1239
  -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1226
1240
  -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1241
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1227
1242
  -DCMAKE_BUILD_TYPE=Release `
1228
1243
  -DGGML_HIP=ON `
1244
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
1229
1245
  -DGGML_RPC=ON
1230
1246
  cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1231
1247
 
@@ -1244,6 +1260,11 @@ jobs:
1244
1260
  with:
1245
1261
  fetch-depth: 0
1246
1262
 
1263
+ - name: Clone rocWMMA repository
1264
+ id: clone_rocwmma
1265
+ run: |
1266
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1267
+
1247
1268
  - name: ccache
1248
1269
  uses: hendrikmuhs/ccache-action@v1.2.16
1249
1270
  with:
@@ -1273,8 +1294,10 @@ jobs:
1273
1294
  cmake -G "Unix Makefiles" -B build -S . `
1274
1295
  -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1275
1296
  -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1297
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1276
1298
  -DCMAKE_BUILD_TYPE=Release `
1277
1299
  -DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
1300
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
1278
1301
  -DGGML_HIP=ON `
1279
1302
  -DGGML_RPC=ON
1280
1303
  cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
@@ -1313,6 +1336,8 @@ jobs:
1313
1336
  steps:
1314
1337
  - name: Checkout code
1315
1338
  uses: actions/checkout@v4
1339
+ with:
1340
+ fetch-depth: 0
1316
1341
 
1317
1342
  - name: Build
1318
1343
  id: cmake_build
@@ -1328,15 +1353,40 @@ jobs:
1328
1353
  -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1329
1354
  -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1330
1355
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1331
- sudo cmake --install build --config Release
1332
1356
 
1333
1357
  - name: xcodebuild for swift package
1334
1358
  id: xcodebuild
1335
1359
  run: |
1336
- xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
1360
+ ./build-xcframework.sh
1337
1361
 
1338
1362
  - name: Build Xcode project
1339
- run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1363
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
1364
+
1365
+ - name: Determine tag name
1366
+ id: tag
1367
+ shell: bash
1368
+ run: |
1369
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1370
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1371
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1372
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1373
+ else
1374
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1375
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1376
+ fi
1377
+
1378
+ - name: Pack artifacts
1379
+ id: pack_artifacts
1380
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1381
+ run: |
1382
+ zip -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
1383
+
1384
+ - name: Upload artifacts
1385
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1386
+ uses: actions/upload-artifact@v4
1387
+ with:
1388
+ path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
1389
+ name: llama-${{ steps.tag.outputs.name }}-xcframework
1340
1390
 
1341
1391
  android-build:
1342
1392
  runs-on: ubuntu-latest
@@ -161,6 +161,8 @@ jobs:
161
161
  - name: Tests
162
162
  id: server_integration_tests
163
163
  if: ${{ matrix.sanitizer == '' }}
164
+ env:
165
+ GITHUB_ACTIONS: "true"
164
166
  run: |
165
167
  cd examples/server/tests
166
168
  ./tests.sh
@@ -57,8 +57,7 @@ add_library(${TARGET} STATIC
57
57
  arg.h
58
58
  base64.hpp
59
59
  chat.cpp
60
- chat.hpp
61
- chat-template.hpp
60
+ chat.h
62
61
  common.cpp
63
62
  common.h
64
63
  console.cpp
@@ -68,7 +67,8 @@ add_library(${TARGET} STATIC
68
67
  llguidance.cpp
69
68
  log.cpp
70
69
  log.h
71
- minja.hpp
70
+ minja/chat-template.hpp
71
+ minja/minja.hpp
72
72
  ngram-cache.cpp
73
73
  ngram-cache.h
74
74
  sampling.cpp