@fugood/llama.node 1.3.0-rc.6 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/CMakeLists.txt +12 -2
  2. package/package.json +14 -14
  3. package/scripts/llama.cpp.patch +8 -9
  4. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  5. package/src/llama.cpp/common/arg.cpp +39 -1001
  6. package/src/llama.cpp/common/arg.h +2 -2
  7. package/src/llama.cpp/common/chat.cpp +216 -2
  8. package/src/llama.cpp/common/chat.h +1 -0
  9. package/src/llama.cpp/common/common.cpp +33 -0
  10. package/src/llama.cpp/common/common.h +13 -0
  11. package/src/llama.cpp/common/download.cpp +1054 -0
  12. package/src/llama.cpp/common/download.h +55 -0
  13. package/src/llama.cpp/common/json-schema-to-grammar.cpp +19 -3
  14. package/src/llama.cpp/ggml/CMakeLists.txt +3 -1
  15. package/src/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
  16. package/src/llama.cpp/ggml/include/ggml.h +2 -0
  17. package/src/llama.cpp/ggml/src/CMakeLists.txt +7 -3
  18. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +10 -3
  19. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  20. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
  21. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  22. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
  23. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +0 -5
  24. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -35
  25. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
  26. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
  27. package/src/llama.cpp/include/llama.h +7 -3
  28. package/src/llama.cpp/src/CMakeLists.txt +95 -0
  29. package/src/llama.cpp/src/llama-arch.cpp +108 -0
  30. package/src/llama.cpp/src/llama-arch.h +11 -0
  31. package/src/llama.cpp/src/llama-batch.cpp +63 -31
  32. package/src/llama.cpp/src/llama-batch.h +12 -1
  33. package/src/llama.cpp/src/llama-chat.cpp +32 -0
  34. package/src/llama.cpp/src/llama-chat.h +1 -0
  35. package/src/llama.cpp/src/llama-context.cpp +44 -16
  36. package/src/llama.cpp/src/llama-context.h +5 -5
  37. package/src/llama.cpp/src/llama-cparams.h +1 -0
  38. package/src/llama.cpp/src/llama-graph.cpp +12 -7
  39. package/src/llama.cpp/src/llama-hparams.cpp +11 -1
  40. package/src/llama.cpp/src/llama-hparams.h +6 -0
  41. package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
  42. package/src/llama.cpp/src/llama-kv-cache.cpp +56 -21
  43. package/src/llama.cpp/src/llama-kv-cache.h +2 -4
  44. package/src/llama.cpp/src/llama-kv-cells.h +44 -2
  45. package/src/llama.cpp/src/llama-memory-recurrent.cpp +18 -14
  46. package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
  47. package/src/llama.cpp/src/llama-model.cpp +350 -13194
  48. package/src/llama.cpp/src/llama-model.h +9 -2
  49. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  50. package/src/llama.cpp/src/llama-vocab.cpp +5 -0
  51. package/src/llama.cpp/src/llama-vocab.h +1 -0
  52. package/src/llama.cpp/src/models/apertus.cpp +125 -0
  53. package/src/llama.cpp/src/models/arcee.cpp +135 -0
  54. package/src/llama.cpp/src/models/arctic.cpp +138 -0
  55. package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
  56. package/src/llama.cpp/src/models/baichuan.cpp +122 -0
  57. package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
  58. package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  59. package/src/llama.cpp/src/models/bert.cpp +176 -0
  60. package/src/llama.cpp/src/models/bitnet.cpp +160 -0
  61. package/src/llama.cpp/src/models/bloom.cpp +101 -0
  62. package/src/llama.cpp/src/models/chameleon.cpp +178 -0
  63. package/src/llama.cpp/src/models/chatglm.cpp +132 -0
  64. package/src/llama.cpp/src/models/codeshell.cpp +111 -0
  65. package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
  66. package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  67. package/src/llama.cpp/src/models/command-r.cpp +122 -0
  68. package/src/llama.cpp/src/models/dbrx.cpp +123 -0
  69. package/src/llama.cpp/src/models/deci.cpp +135 -0
  70. package/src/llama.cpp/src/models/deepseek.cpp +144 -0
  71. package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
  72. package/src/llama.cpp/src/models/dots1.cpp +134 -0
  73. package/src/llama.cpp/src/models/dream.cpp +105 -0
  74. package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  75. package/src/llama.cpp/src/models/ernie4-5.cpp +111 -0
  76. package/src/llama.cpp/src/models/exaone.cpp +114 -0
  77. package/src/llama.cpp/src/models/exaone4.cpp +123 -0
  78. package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
  79. package/src/llama.cpp/src/models/falcon.cpp +120 -0
  80. package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  81. package/src/llama.cpp/src/models/gemma.cpp +112 -0
  82. package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  83. package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  84. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  85. package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
  86. package/src/llama.cpp/src/models/glm4.cpp +127 -0
  87. package/src/llama.cpp/src/models/gpt2.cpp +105 -0
  88. package/src/llama.cpp/src/models/gptneox.cpp +144 -0
  89. package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  90. package/src/llama.cpp/src/models/granite.cpp +211 -0
  91. package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  92. package/src/llama.cpp/src/models/grok.cpp +159 -0
  93. package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
  94. package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  95. package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  96. package/src/llama.cpp/src/models/internlm2.cpp +120 -0
  97. package/src/llama.cpp/src/models/jais.cpp +86 -0
  98. package/src/llama.cpp/src/models/jamba.cpp +106 -0
  99. package/src/llama.cpp/src/models/lfm2.cpp +173 -0
  100. package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
  101. package/src/llama.cpp/src/models/llada.cpp +99 -0
  102. package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
  103. package/src/llama.cpp/src/models/llama.cpp +155 -0
  104. package/src/llama.cpp/src/models/mamba.cpp +55 -0
  105. package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
  106. package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
  107. package/src/llama.cpp/src/models/models.h +481 -0
  108. package/src/llama.cpp/src/models/mpt.cpp +126 -0
  109. package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
  110. package/src/llama.cpp/src/models/nemotron.cpp +122 -0
  111. package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
  112. package/src/llama.cpp/src/models/olmo.cpp +121 -0
  113. package/src/llama.cpp/src/models/olmo2.cpp +150 -0
  114. package/src/llama.cpp/src/models/olmoe.cpp +124 -0
  115. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +123 -0
  116. package/src/llama.cpp/src/models/openelm.cpp +124 -0
  117. package/src/llama.cpp/src/models/orion.cpp +123 -0
  118. package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  119. package/src/llama.cpp/src/models/phi2.cpp +121 -0
  120. package/src/llama.cpp/src/models/phi3.cpp +152 -0
  121. package/src/llama.cpp/src/models/plamo.cpp +110 -0
  122. package/src/llama.cpp/src/models/plamo2.cpp +316 -0
  123. package/src/llama.cpp/src/models/plm.cpp +168 -0
  124. package/src/llama.cpp/src/models/qwen.cpp +108 -0
  125. package/src/llama.cpp/src/models/qwen2.cpp +117 -0
  126. package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
  127. package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
  128. package/src/llama.cpp/src/models/qwen3.cpp +117 -0
  129. package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
  130. package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  131. package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
  132. package/src/llama.cpp/src/models/refact.cpp +94 -0
  133. package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  134. package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
  135. package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  136. package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  137. package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
  138. package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
  139. package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
  140. package/src/llama.cpp/src/models/smollm3.cpp +128 -0
  141. package/src/llama.cpp/src/models/stablelm.cpp +146 -0
  142. package/src/llama.cpp/src/models/starcoder.cpp +100 -0
  143. package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
  144. package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
  145. package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
  146. package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  147. package/src/llama.cpp/src/models/xverse.cpp +108 -0
@@ -59,8 +59,8 @@ struct common_arg {
59
59
  common_arg & set_sparam();
60
60
  bool in_example(enum llama_example ex);
61
61
  bool is_exclude(enum llama_example ex);
62
- bool get_value_from_env(std::string & output);
63
- bool has_value_from_env();
62
+ bool get_value_from_env(std::string & output) const;
63
+ bool has_value_from_env() const;
64
64
  std::string to_string();
65
65
  };
66
66
 
@@ -6,8 +6,11 @@
6
6
  #include "log.h"
7
7
  #include "regex-partial.h"
8
8
 
9
+ #include <algorithm>
9
10
  #include <cstdio>
11
+ #include <cctype>
10
12
  #include <exception>
13
+ #include <functional>
11
14
  #include <iostream>
12
15
  #include <optional>
13
16
  #include <stdexcept>
@@ -297,7 +300,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
297
300
  }
298
301
  if (!msg.reasoning_content.empty()) {
299
302
  jmsg["reasoning_content"] = msg.reasoning_content;
300
- jmsg["thinking"] = msg.reasoning_content; // gpt-oss
301
303
  }
302
304
  if (!msg.tool_name.empty()) {
303
305
  jmsg["name"] = msg.tool_name;
@@ -627,6 +629,7 @@ const char * common_chat_format_name(common_chat_format format) {
627
629
  case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
628
630
  case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
629
631
  case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
632
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
630
633
  default:
631
634
  throw std::runtime_error("Unknown chat format");
632
635
  }
@@ -794,6 +797,7 @@ static std::string apply(
794
797
  if (additional_context) {
795
798
  tmpl_inputs.extra_context.merge_patch(*additional_context);
796
799
  }
800
+ // TODO: add flag to control date/time, if only for testing purposes.
797
801
  tmpl_inputs.now = inputs.now;
798
802
 
799
803
  minja::chat_template_options tmpl_opts;
@@ -972,6 +976,126 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
972
976
  return data;
973
977
  }
974
978
 
979
+
980
+ // Case-insensitive find
981
+ static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
982
+ auto it = std::search(
983
+ haystack.begin() + pos, haystack.end(),
984
+ needle.begin(), needle.end(),
985
+ [](char a, char b) { return std::tolower(a) == std::tolower(b); }
986
+ );
987
+ return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
988
+ }
989
+
990
+ static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
991
+ common_chat_params data;
992
+ const auto is_json_schema_provided = !inputs.json_schema.is_null();
993
+ const auto is_grammar_provided = !inputs.grammar.empty();
994
+ const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
995
+
996
+ // the logic requires potentially modifying the messages
997
+ auto tweaked_messages = inputs.messages;
998
+
999
+ auto replace_json_schema_marker = [](json & messages) -> bool {
1000
+ static std::string marker1 = "force json schema.\n";
1001
+ static std::string marker2 = "force json schema.";
1002
+
1003
+ if (messages.empty() || messages.at(0).at("role") != "system") {
1004
+ return false;
1005
+ }
1006
+
1007
+ std::string content = messages.at(0).at("content");
1008
+
1009
+ for (const auto & marker : {marker1, marker2}) {
1010
+ const auto pos = ifind_string(content, marker);
1011
+ if (pos != std::string::npos) {
1012
+ content.replace(pos, marker.length(), "");
1013
+ // inject modified content back into the messages
1014
+ messages.at(0).at("content") = content;
1015
+ return true;
1016
+ }
1017
+ }
1018
+
1019
+ return false;
1020
+ };
1021
+
1022
+ // Lfm2 model does not natively work with json, but can generally understand the tools structure
1023
+ //
1024
+ // Example of the pytorch dialog structure:
1025
+ // <|startoftext|><|im_start|>system
1026
+ // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
1027
+ // <|im_start|>user
1028
+ // What is the current status of candidate ID 12345?<|im_end|>
1029
+ // <|im_start|>assistant
1030
+ // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
1031
+ // <|im_start|>tool
1032
+ // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
1033
+ // <|im_start|>assistant
1034
+ // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
1035
+ //
1036
+ // For the llama server compatibility with json tools semantic,
1037
+ // the client can add "Follow json schema." line into the system message prompt to force the json output.
1038
+ //
1039
+ if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
1040
+ // server/utils.hpp prohibits that branch for the custom grammar anyways
1041
+ throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
1042
+ } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
1043
+ LOG_INF("%s: Using tools to build a grammar\n", __func__);
1044
+
1045
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1046
+ auto schemas = json::array();
1047
+ foreach_function(inputs.tools, [&](const json & tool) {
1048
+ const auto & function = tool.at("function");
1049
+ schemas.push_back({
1050
+ {"type", "object"},
1051
+ {"properties", {
1052
+ {"name", {
1053
+ {"type", "string"},
1054
+ {"const", function.at("name")},
1055
+ }},
1056
+ {"arguments", function.at("parameters")},
1057
+ }},
1058
+ {"required", json::array({"name", "arguments", "id"})},
1059
+ });
1060
+ });
1061
+ auto schema = json {
1062
+ {"type", "array"},
1063
+ {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1064
+ {"minItems", 1},
1065
+ };
1066
+ if (!inputs.parallel_tool_calls) {
1067
+ schema["maxItems"] = 1;
1068
+ }
1069
+
1070
+ builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
1071
+ });
1072
+ // model has no concept of tool selection mode choice,
1073
+ // if the system prompt rendered correctly it will produce a tool call
1074
+ // the grammar goes inside the tool call body
1075
+ data.grammar_lazy = true;
1076
+ data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
1077
+ data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1078
+ data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
1079
+ } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
1080
+ LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
1081
+ // output those tokens
1082
+ data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1083
+ } else if (is_json_schema_provided) {
1084
+ LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
1085
+ data.grammar = json_schema_to_grammar(inputs.json_schema);
1086
+ } else if (is_grammar_provided) {
1087
+ LOG_INF("%s: Using provided grammar\n", __func__);
1088
+ data.grammar = inputs.grammar;
1089
+ } else {
1090
+ LOG_INF("%s: Using content relying on the template\n", __func__);
1091
+ }
1092
+
1093
+ data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
1094
+ LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
1095
+
1096
+ return data;
1097
+ }
1098
+
975
1099
  static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
976
1100
  common_chat_params data;
977
1101
  data.prompt = apply(tmpl, inputs);
@@ -1672,7 +1796,23 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1672
1796
 
1673
1797
  static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
1674
1798
  common_chat_params data;
1675
- auto prompt = apply(tmpl, inputs);
1799
+
1800
+ // Copy reasoning to the "thinking" field as expected by the gpt-oss template
1801
+ auto adjusted_messages = json::array();
1802
+ for (const auto & msg : inputs.messages) {
1803
+ auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
1804
+ auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
1805
+
1806
+ if (has_reasoning_content && has_tool_calls) {
1807
+ auto adjusted_message = msg;
1808
+ adjusted_message["thinking"] = msg.at("reasoning_content");
1809
+ adjusted_messages.push_back(adjusted_message);
1810
+ } else {
1811
+ adjusted_messages.push_back(msg);
1812
+ }
1813
+ }
1814
+
1815
+ auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
1676
1816
 
1677
1817
  // Check if we need to replace the return token with end token during
1678
1818
  // inference and without generation prompt. For more details see:
@@ -2485,6 +2625,71 @@ static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
2485
2625
  builder.add_content(builder.consume_rest());
2486
2626
  }
2487
2627
 
2628
+
2629
+ static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
2630
+ if (!builder.syntax().parse_tool_calls) {
2631
+ builder.add_content(builder.consume_rest());
2632
+ return;
2633
+ }
2634
+
2635
+ // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
2636
+ static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
2637
+ static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
2638
+
2639
+ // Loop through all tool calls
2640
+ while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
2641
+ builder.move_to(res->groups[0].end);
2642
+
2643
+ // Parse JSON array format: [{"name": "...", "arguments": {...}}]
2644
+ auto tool_calls_data = builder.consume_json();
2645
+
2646
+ // Consume end marker
2647
+ builder.consume_spaces();
2648
+ if (!builder.try_consume_regex(tool_call_end_regex)) {
2649
+ throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
2650
+ }
2651
+
2652
+ // Process each tool call in the array
2653
+ if (tool_calls_data.json.is_array()) {
2654
+ for (const auto & tool_call : tool_calls_data.json) {
2655
+ if (!tool_call.is_object()) {
2656
+ throw common_chat_msg_partial_exception("Tool call must be an object");
2657
+ }
2658
+
2659
+ if (!tool_call.contains("name")) {
2660
+ throw common_chat_msg_partial_exception("Tool call missing 'name' field");
2661
+ }
2662
+
2663
+ std::string function_name = tool_call.at("name");
2664
+ std::string arguments = "{}";
2665
+
2666
+ if (tool_call.contains("arguments")) {
2667
+ if (tool_call.at("arguments").is_object()) {
2668
+ arguments = tool_call.at("arguments").dump();
2669
+ } else if (tool_call.at("arguments").is_string()) {
2670
+ arguments = tool_call.at("arguments");
2671
+ }
2672
+ }
2673
+
2674
+ if (!builder.add_tool_call(function_name, "", arguments)) {
2675
+ throw common_chat_msg_partial_exception("Incomplete tool call");
2676
+ }
2677
+ }
2678
+ } else {
2679
+ throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
2680
+ }
2681
+
2682
+ // Consume any trailing whitespace after this tool call
2683
+ builder.consume_spaces();
2684
+ }
2685
+
2686
+ // Consume any remaining content after all tool calls
2687
+ auto remaining = builder.consume_rest();
2688
+ if (!string_strip(remaining).empty()) {
2689
+ builder.add_content(remaining);
2690
+ }
2691
+ }
2692
+
2488
2693
  static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
2489
2694
  // Parse thinking tags first - this handles the main reasoning content
2490
2695
  builder.try_parse_reasoning("<seed:think>", "</seed:think>");
@@ -2734,6 +2939,12 @@ static common_chat_params common_chat_templates_apply_jinja(
2734
2939
  return common_chat_params_init_apertus(tmpl, params);
2735
2940
  }
2736
2941
 
2942
+ // LFM2 (w/ tools)
2943
+ if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
2944
+ src.find("]<|tool_list_end|>") != std::string::npos) {
2945
+ return common_chat_params_init_lfm2(tmpl, params);
2946
+ }
2947
+
2737
2948
  // Use generic handler when mixing tools + JSON schema.
2738
2949
  // TODO: support that mix in handlers below.
2739
2950
  if ((params.tools.is_array() && params.json_schema.is_object())) {
@@ -2912,6 +3123,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
2912
3123
  case COMMON_CHAT_FORMAT_APERTUS:
2913
3124
  common_chat_parse_apertus(builder);
2914
3125
  break;
3126
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
3127
+ common_chat_parse_lfm2(builder);
3128
+ break;
2915
3129
  default:
2916
3130
  throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
2917
3131
  }
@@ -127,6 +127,7 @@ enum common_chat_format {
127
127
  COMMON_CHAT_FORMAT_SEED_OSS,
128
128
  COMMON_CHAT_FORMAT_NEMOTRON_V2,
129
129
  COMMON_CHAT_FORMAT_APERTUS,
130
+ COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
130
131
 
131
132
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
132
133
  };
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
908
908
  return cache_directory + filename;
909
909
  }
910
910
 
911
+ std::vector<common_file_info> fs_list_files(const std::string & path) {
912
+ std::vector<common_file_info> files;
913
+ if (path.empty()) return files;
914
+
915
+ std::filesystem::path dir(path);
916
+ if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
917
+ return files;
918
+ }
919
+
920
+ for (const auto & entry : std::filesystem::directory_iterator(dir)) {
921
+ try {
922
+ // Only include regular files (skip directories)
923
+ const auto & p = entry.path();
924
+ if (std::filesystem::is_regular_file(p)) {
925
+ common_file_info info;
926
+ info.path = p.string();
927
+ info.name = p.filename().string();
928
+ try {
929
+ info.size = static_cast<size_t>(std::filesystem::file_size(p));
930
+ } catch (const std::filesystem::filesystem_error &) {
931
+ info.size = 0;
932
+ }
933
+ files.push_back(std::move(info));
934
+ }
935
+ } catch (const std::filesystem::filesystem_error &) {
936
+ // skip entries we cannot inspect
937
+ continue;
938
+ }
939
+ }
940
+
941
+ return files;
942
+ }
943
+
911
944
 
912
945
  //
913
946
  // Model utils
@@ -407,6 +407,8 @@ struct common_params {
407
407
  bool mmproj_use_gpu = true; // use GPU for multimodal model
408
408
  bool no_mmproj = false; // explicitly disable multimodal model
409
409
  std::vector<std::string> image; // path to image file(s)
410
+ int image_min_tokens = -1;
411
+ int image_max_tokens = -1;
410
412
 
411
413
  // finetune
412
414
  struct lr_opt lr;
@@ -506,6 +508,10 @@ struct common_params {
506
508
  // return false from callback to abort model loading or true to continue
507
509
  llama_progress_callback load_progress_callback = NULL;
508
510
  void * load_progress_callback_user_data = NULL;
511
+
512
+ bool has_speculative() const {
513
+ return !speculative.model.path.empty() || !speculative.model.hf_repo.empty();
514
+ }
509
515
  };
510
516
 
511
517
  // call once at the start of a program if it uses libcommon
@@ -606,6 +612,13 @@ bool fs_create_directory_with_parents(const std::string & path);
606
612
  std::string fs_get_cache_directory();
607
613
  std::string fs_get_cache_file(const std::string & filename);
608
614
 
615
+ struct common_file_info {
616
+ std::string path;
617
+ std::string name;
618
+ size_t size = 0; // in bytes
619
+ };
620
+ std::vector<common_file_info> fs_list_files(const std::string & path);
621
+
609
622
  //
610
623
  // Model utils
611
624
  //