@fugood/llama.node 1.3.8 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/lib/binding.js +25 -18
  2. package/lib/binding.ts +19 -1
  3. package/lib/index.js +3 -3
  4. package/lib/index.ts +1 -1
  5. package/package.json +17 -17
  6. package/scripts/llama.cpp.patch +53 -4
  7. package/src/LlamaCompletionWorker.cpp +2 -2
  8. package/src/LlamaContext.cpp +6 -1
  9. package/src/llama.cpp/common/arg.cpp +1 -1
  10. package/src/llama.cpp/common/chat-parser.cpp +968 -0
  11. package/src/llama.cpp/common/chat.cpp +0 -952
  12. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
  13. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  14. package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
  15. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
  16. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
  17. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
  18. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
  19. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
  20. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
  21. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
  22. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  23. package/src/llama.cpp/src/llama-arch.cpp +48 -3
  24. package/src/llama.cpp/src/llama-arch.h +2 -0
  25. package/src/llama.cpp/src/llama-context.cpp +6 -2
  26. package/src/llama.cpp/src/llama-hparams.h +1 -1
  27. package/src/llama.cpp/src/llama-model.cpp +102 -5
  28. package/src/llama.cpp/src/llama-model.h +4 -0
  29. package/src/llama.cpp/src/llama-quant.cpp +13 -5
  30. package/src/llama.cpp/src/models/lfm2.cpp +5 -3
  31. package/src/llama.cpp/src/models/models.h +51 -1
  32. package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
@@ -665,114 +665,6 @@ common_reasoning_format common_reasoning_format_from_name(const std::string & fo
665
665
  throw std::runtime_error("Unknown reasoning format: " + format);
666
666
  }
667
667
 
668
- static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
669
- std::string arguments;
670
- if (builder.is_partial()) {
671
- arguments = (json {{"code", code + builder.healing_marker()}}).dump();
672
- auto idx = arguments.find(builder.healing_marker());
673
- if (idx != std::string::npos) {
674
- arguments.resize(idx);
675
- }
676
- } else {
677
- arguments = (json {{"code", code}}).dump();
678
- }
679
- return arguments;
680
- }
681
-
682
- /**
683
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
684
- * Aggregates the prefix, suffix and in-between text into the content.
685
- */
686
- static void parse_json_tool_calls(
687
- common_chat_msg_parser & builder,
688
- const std::optional<common_regex> & block_open,
689
- const std::optional<common_regex> & function_regex_start_only,
690
- const std::optional<common_regex> & function_regex,
691
- const common_regex & close_regex,
692
- const std::optional<common_regex> & block_close,
693
- bool allow_raw_python = false,
694
- const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
695
-
696
- auto parse_tool_calls = [&]() {
697
- size_t from = std::string::npos;
698
- auto first = true;
699
- while (true) {
700
- auto start_pos = builder.pos();
701
- auto res = function_regex_start_only && first
702
- ? builder.try_consume_regex(*function_regex_start_only)
703
- : function_regex
704
- ? builder.try_find_regex(*function_regex, from)
705
- : std::nullopt;
706
-
707
- if (res) {
708
- std::string name;
709
- if (get_function_name) {
710
- name = get_function_name(*res);
711
- } else {
712
- GGML_ASSERT(res->groups.size() == 2);
713
- name = builder.str(res->groups[1]);
714
- }
715
- first = false;
716
- if (name.empty()) {
717
- // get_function_name signalled us that we should skip this match and treat it as content.
718
- from = res->groups[0].begin + 1;
719
- continue;
720
- }
721
- from = std::string::npos;
722
-
723
- auto maybe_raw_python = name == "python" && allow_raw_python;
724
- if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
725
- if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
726
- if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
727
- throw common_chat_msg_partial_exception("incomplete tool call");
728
- }
729
- builder.consume_regex(close_regex);
730
- }
731
- continue;
732
- }
733
- if (maybe_raw_python) {
734
- auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
735
- if (!builder.add_tool_call(name, "", arguments)) {
736
- throw common_chat_msg_partial_exception("incomplete tool call");
737
- }
738
- return;
739
- }
740
- throw common_chat_msg_partial_exception("incomplete tool call");
741
- } else {
742
- builder.move_to(start_pos);
743
- }
744
- break;
745
- }
746
- if (block_close) {
747
- builder.consume_regex(*block_close);
748
- }
749
- builder.consume_spaces();
750
- builder.add_content(builder.consume_rest());
751
- };
752
- if (block_open) {
753
- if (auto res = builder.try_find_regex(*block_open)) {
754
- parse_tool_calls();
755
- } else {
756
- builder.add_content(builder.consume_rest());
757
- }
758
- } else {
759
- parse_tool_calls();
760
- }
761
- }
762
-
763
- static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
764
- static const std::vector<std::vector<std::string>> args_paths = {{"arguments"}};
765
- if (auto res = builder.try_find_regex(prefix)) {
766
- builder.move_back(rstrip_prefix);
767
- auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
768
- if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
769
- throw common_chat_msg_partial_exception("incomplete tool call array");
770
- }
771
- } else {
772
- builder.add_content(builder.consume_rest());
773
- }
774
- }
775
-
776
668
  static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
777
669
  for (const auto & tool : tools) {
778
670
  if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
@@ -905,37 +797,6 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
905
797
  data.format = COMMON_CHAT_FORMAT_GENERIC;
906
798
  return data;
907
799
  }
908
- static void common_chat_parse_generic(common_chat_msg_parser & builder) {
909
- if (!builder.syntax().parse_tool_calls) {
910
- builder.add_content(builder.consume_rest());
911
- return;
912
- }
913
- static const std::vector<std::vector<std::string>> content_paths = {
914
- {"response"},
915
- };
916
- static const std::vector<std::vector<std::string>> args_paths = {
917
- {"tool_call", "arguments"},
918
- {"tool_calls", "arguments"},
919
- };
920
- auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
921
- if (data.value.contains("tool_calls")) {
922
- if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
923
- throw common_chat_msg_partial_exception("incomplete tool calls");
924
- }
925
- } else if (data.value.contains("tool_call")) {
926
- if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
927
- throw common_chat_msg_partial_exception("incomplete tool call");
928
- }
929
- } else if (data.value.contains("response")) {
930
- const auto & response = data.value.at("response");
931
- builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
932
- if (data.is_partial) {
933
- throw common_chat_msg_partial_exception("incomplete response");
934
- }
935
- } else {
936
- throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
937
- }
938
- }
939
800
 
940
801
  static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
941
802
  common_chat_params data;
@@ -1160,28 +1021,6 @@ static common_chat_params common_chat_params_init_magistral(const common_chat_te
1160
1021
  return data;
1161
1022
  }
1162
1023
 
1163
- static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
1164
- if (!builder.syntax().parse_tool_calls) {
1165
- builder.add_content(builder.consume_rest());
1166
- return;
1167
- }
1168
-
1169
- static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
1170
- parse_prefixed_json_tool_call_array(builder, prefix);
1171
- }
1172
-
1173
- static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
1174
- builder.try_parse_reasoning("[THINK]", "[/THINK]");
1175
-
1176
- if (!builder.syntax().parse_tool_calls) {
1177
- builder.add_content(builder.consume_rest());
1178
- return;
1179
- }
1180
-
1181
- static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
1182
- parse_prefixed_json_tool_call_array(builder, prefix);
1183
- }
1184
-
1185
1024
  static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
1186
1025
  common_chat_params data;
1187
1026
 
@@ -1262,39 +1101,6 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
1262
1101
  return data;
1263
1102
  }
1264
1103
 
1265
- static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
1266
- builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
1267
-
1268
- static const common_regex start_action_regex("<\\|START_ACTION\\|>");
1269
- static const common_regex end_action_regex("<\\|END_ACTION\\|>");
1270
- static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
1271
- static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
1272
-
1273
- if (auto res = builder.try_find_regex(start_action_regex)) {
1274
- // If we didn't extract thoughts, prelude includes them.
1275
- auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
1276
- for (const auto & tool_call : tool_calls.value) {
1277
- std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
1278
- std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
1279
- std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
1280
- if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
1281
- throw common_chat_msg_partial_exception("incomplete tool call");
1282
- }
1283
- }
1284
- if (tool_calls.is_partial) {
1285
- throw common_chat_msg_partial_exception("incomplete tool call");
1286
- }
1287
- builder.consume_regex(end_action_regex);
1288
- } else if (auto res = builder.try_find_regex(start_response_regex)) {
1289
- if (!builder.try_find_regex(end_response_regex)) {
1290
- builder.add_content(builder.consume_rest());
1291
- throw common_chat_msg_partial_exception(end_response_regex.str());
1292
- }
1293
- } else {
1294
- builder.add_content(builder.consume_rest());
1295
- }
1296
- }
1297
-
1298
1104
  static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
1299
1105
  if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
1300
1106
  throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
@@ -1523,63 +1329,6 @@ static common_chat_params common_chat_params_init_apertus(const common_chat_temp
1523
1329
  }
1524
1330
  return data;
1525
1331
  }
1526
- static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
1527
- builder.try_parse_reasoning("<think>", "</think>");
1528
-
1529
- if (!builder.syntax().parse_tool_calls) {
1530
- builder.add_content(builder.consume_rest());
1531
- return;
1532
- }
1533
-
1534
- static const common_regex function_regex(
1535
- "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
1536
- static const common_regex close_regex("\\}\\s*");
1537
-
1538
- static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
1539
- static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
1540
-
1541
- if (with_builtin_tools) {
1542
- static const common_regex builtin_call_regex("<\\|python_tag\\|>");
1543
- if (auto res = builder.try_find_regex(builtin_call_regex)) {
1544
- auto fun_res = builder.consume_regex(function_name_regex);
1545
- auto function_name = builder.str(fun_res.groups[1]);
1546
-
1547
- common_healing_marker healing_marker;
1548
- json args = json::object();
1549
- while (true) {
1550
- if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
1551
- auto arg_name = builder.str(arg_res->groups[1]);
1552
- auto partial = builder.consume_json();
1553
- args[arg_name] = partial.json;
1554
- healing_marker.marker = partial.healing_marker.marker;
1555
- healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
1556
- builder.consume_spaces();
1557
- if (!builder.try_consume_literal(",")) {
1558
- break;
1559
- }
1560
- } else {
1561
- break;
1562
- }
1563
- }
1564
- builder.consume_literal(")");
1565
- builder.consume_spaces();
1566
-
1567
- auto arguments = args.dump();
1568
- if (!builder.add_tool_call(function_name, "", arguments)) {
1569
- throw common_chat_msg_partial_exception("Incomplete tool call");
1570
- }
1571
- return;
1572
- }
1573
- }
1574
- parse_json_tool_calls(
1575
- builder,
1576
- /* block_open= */ std::nullopt,
1577
- /* function_regex_start_only= */ function_regex,
1578
- /* function_regex= */ std::nullopt,
1579
- close_regex,
1580
- std::nullopt);
1581
-
1582
- }
1583
1332
 
1584
1333
  static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1585
1334
  common_chat_params data;
@@ -1719,88 +1468,6 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
1719
1468
  return data;
1720
1469
  }
1721
1470
 
1722
- static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
1723
- builder.try_parse_reasoning("<think>", "</think>");
1724
- if (!builder.syntax().parse_tool_calls) {
1725
- builder.add_content(builder.consume_rest());
1726
- return;
1727
- }
1728
-
1729
- static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1730
- static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1731
- static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
1732
- static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
1733
-
1734
- parse_json_tool_calls(
1735
- builder,
1736
- /* block_open= */ tool_calls_begin,
1737
- /* function_regex_start_only= */ std::nullopt,
1738
- function_regex,
1739
- close_regex,
1740
- tool_calls_end);
1741
- }
1742
-
1743
- static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
1744
- static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
1745
-
1746
- static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
1747
- static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1748
- static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1749
-
1750
- if (!builder.syntax().parse_tool_calls) {
1751
- LOG_DBG("%s: not parse_tool_calls\n", __func__);
1752
- builder.add_content(builder.consume_rest());
1753
- return;
1754
- }
1755
-
1756
- LOG_DBG("%s: parse_tool_calls\n", __func__);
1757
-
1758
- parse_json_tool_calls(
1759
- builder,
1760
- /* block_open= */ tool_calls_begin,
1761
- /* function_regex_start_only= */ std::nullopt,
1762
- function_regex,
1763
- close_regex,
1764
- tool_calls_end);
1765
- }
1766
-
1767
- static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1768
- // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1769
- // First try to parse using the standard reasoning parsing method
1770
- LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
1771
-
1772
- auto start_pos = builder.pos();
1773
- auto found_end_think = builder.try_find_literal("</think>");
1774
- builder.move_to(start_pos);
1775
-
1776
- if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
1777
- LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
1778
- common_chat_parse_deepseek_v3_1_content(builder);
1779
- } else if (builder.try_parse_reasoning("<think>", "</think>")) {
1780
- // If reasoning was parsed successfully, the remaining content is regular content
1781
- LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
1782
- // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1783
- common_chat_parse_deepseek_v3_1_content(builder);
1784
- } else {
1785
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1786
- LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
1787
- common_chat_parse_deepseek_v3_1_content(builder);
1788
- return;
1789
- }
1790
- // If no reasoning tags found, check if we should treat everything as reasoning
1791
- if (builder.syntax().thinking_forced_open) {
1792
- // If thinking is forced open but no tags found, treat everything as reasoning
1793
- LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
1794
- builder.add_reasoning_content(builder.consume_rest());
1795
- } else {
1796
- LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
1797
- // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1798
- common_chat_parse_deepseek_v3_1_content(builder);
1799
- }
1800
- }
1801
- }
1802
-
1803
-
1804
1471
  static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
1805
1472
  common_chat_params data;
1806
1473
  data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1843,20 +1510,6 @@ static common_chat_params common_chat_params_init_minimax_m2(const common_chat_t
1843
1510
  return data;
1844
1511
  }
1845
1512
 
1846
- static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
1847
- static const xml_tool_call_format form {
1848
- /* form.scope_start = */ "<minimax:tool_call>",
1849
- /* form.tool_start = */ "<invoke name=\"",
1850
- /* form.tool_sep = */ "\">",
1851
- /* form.key_start = */ "<parameter name=\"",
1852
- /* form.key_val_sep = */ "\">",
1853
- /* form.val_end = */ "</parameter>",
1854
- /* form.tool_end = */ "</invoke>",
1855
- /* form.scope_end = */ "</minimax:tool_call>",
1856
- };
1857
- builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1858
- }
1859
-
1860
1513
  static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
1861
1514
  common_chat_params data;
1862
1515
  data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1889,23 +1542,6 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
1889
1542
  return data;
1890
1543
  }
1891
1544
 
1892
- static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
1893
- static const xml_tool_call_format form = ([]() {
1894
- xml_tool_call_format form {};
1895
- form.scope_start = "<tool_call>";
1896
- form.tool_start = "<function=";
1897
- form.tool_sep = ">";
1898
- form.key_start = "<parameter=";
1899
- form.key_val_sep = ">";
1900
- form.val_end = "</parameter>";
1901
- form.tool_end = "</function>";
1902
- form.scope_end = "</tool_call>";
1903
- form.trim_raw_argval = true;
1904
- return form;
1905
- })();
1906
- builder.consume_reasoning_with_xml_tool_calls(form);
1907
- }
1908
-
1909
1545
  static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
1910
1546
  common_chat_params data;
1911
1547
  data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1950,24 +1586,6 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
1950
1586
  return data;
1951
1587
  }
1952
1588
 
1953
- static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
1954
- static const xml_tool_call_format form = ([]() {
1955
- xml_tool_call_format form {};
1956
- form.scope_start = "<|tool_calls_section_begin|>";
1957
- form.tool_start = "<|tool_call_begin|>";
1958
- form.tool_sep = "<|tool_call_argument_begin|>{";
1959
- form.key_start = "\"";
1960
- form.key_val_sep = "\": ";
1961
- form.val_end = ", ";
1962
- form.tool_end = "}<|tool_call_end|>";
1963
- form.scope_end = "<|tool_calls_section_end|>";
1964
- form.raw_argval = false;
1965
- form.last_val_end = "";
1966
- return form;
1967
- })();
1968
- builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1969
- }
1970
-
1971
1589
  static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
1972
1590
  common_chat_params data;
1973
1591
  data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -2003,25 +1621,6 @@ static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_t
2003
1621
  return data;
2004
1622
  }
2005
1623
 
2006
- static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
2007
- static const xml_tool_call_format form = ([]() {
2008
- xml_tool_call_format form {};
2009
- form.scope_start = "<tool_calls>[";
2010
- form.tool_start = "{\"name\": \"";
2011
- form.tool_sep = "\", \"arguments\": {";
2012
- form.key_start = "\"";
2013
- form.key_val_sep = "\": ";
2014
- form.val_end = ", ";
2015
- form.tool_end = "}, ";
2016
- form.scope_end = "]</tool_calls>";
2017
- form.raw_argval = false;
2018
- form.last_val_end = "";
2019
- form.last_tool_end = "}";
2020
- return form;
2021
- })();
2022
- builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
2023
- }
2024
-
2025
1624
  static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
2026
1625
  common_chat_params data;
2027
1626
  data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -2054,24 +1653,6 @@ static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_
2054
1653
  return data;
2055
1654
  }
2056
1655
 
2057
- static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
2058
- static const xml_tool_call_format form = ([]() {
2059
- xml_tool_call_format form {};
2060
- form.scope_start = "";
2061
- form.tool_start = "<tool_call>\n{\"name\": \"";
2062
- form.tool_sep = "\", \"arguments\": {";
2063
- form.key_start = "\"";
2064
- form.key_val_sep = "\": ";
2065
- form.val_end = ", ";
2066
- form.tool_end = "}\n</tool_call>";
2067
- form.scope_end = "";
2068
- form.raw_argval = false;
2069
- form.last_val_end = "";
2070
- return form;
2071
- })();
2072
- builder.consume_reasoning_with_xml_tool_calls(form);
2073
- }
2074
-
2075
1656
  static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
2076
1657
  common_chat_params data;
2077
1658
 
@@ -2218,93 +1799,6 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
2218
1799
 
2219
1800
  return data;
2220
1801
  }
2221
- static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
2222
- static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
2223
- static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
2224
-
2225
- static const common_regex start_regex("<\\|start\\|>assistant");
2226
- static const common_regex analysis_regex("<\\|channel\\|>analysis");
2227
- static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
2228
- static const common_regex preamble_regex("<\\|channel\\|>commentary");
2229
- static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
2230
- static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
2231
-
2232
- auto consume_end = [&](bool include_end = false) {
2233
- if (auto res = builder.try_find_literal("<|end|>")) {
2234
- return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
2235
- }
2236
- return builder.consume_rest();
2237
- };
2238
-
2239
- auto handle_tool_call = [&](const std::string & name) {
2240
- if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
2241
- if (builder.syntax().parse_tool_calls) {
2242
- if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
2243
- throw common_chat_msg_partial_exception("incomplete tool call");
2244
- }
2245
- } else if (args->is_partial) {
2246
- throw common_chat_msg_partial_exception("incomplete tool call");
2247
- }
2248
- }
2249
- };
2250
-
2251
- auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
2252
- auto match = regex.search(input, 0, true);
2253
- if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
2254
- return match;
2255
- }
2256
- return std::nullopt;
2257
- };
2258
-
2259
- do {
2260
- auto header_start_pos = builder.pos();
2261
- auto content_start = builder.try_find_literal("<|message|>");
2262
- if (!content_start) {
2263
- throw common_chat_msg_partial_exception("incomplete header");
2264
- }
2265
-
2266
- auto header = content_start->prelude;
2267
-
2268
- if (auto match = regex_match(tool_call1_regex, header)) {
2269
- auto group = match->groups[1];
2270
- auto name = header.substr(group.begin, group.end - group.begin);
2271
- handle_tool_call(name);
2272
- continue;
2273
- }
2274
-
2275
- if (auto match = regex_match(tool_call2_regex, header)) {
2276
- auto group = match->groups[2];
2277
- auto name = header.substr(group.begin, group.end - group.begin);
2278
- handle_tool_call(name);
2279
- continue;
2280
- }
2281
-
2282
- if (regex_match(analysis_regex, header)) {
2283
- builder.move_to(header_start_pos);
2284
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
2285
- builder.add_content(consume_end(true));
2286
- } else {
2287
- builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
2288
- }
2289
- continue;
2290
- }
2291
-
2292
- if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
2293
- builder.add_content(consume_end());
2294
- continue;
2295
- }
2296
-
2297
- // Possibly a malformed message, attempt to recover by rolling
2298
- // back to pick up the next <|start|>
2299
- LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
2300
- builder.move_to(header_start_pos);
2301
- } while (builder.try_find_regex(start_regex, std::string::npos, false));
2302
-
2303
- auto remaining = builder.consume_rest();
2304
- if (!remaining.empty()) {
2305
- LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
2306
- }
2307
- }
2308
1802
 
2309
1803
  static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
2310
1804
  common_chat_params data;
@@ -2385,21 +1879,6 @@ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_temp
2385
1879
  return data;
2386
1880
  }
2387
1881
 
2388
- static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
2389
- static const xml_tool_call_format form {
2390
- /* form.scope_start = */ "",
2391
- /* form.tool_start = */ "<tool_call>",
2392
- /* form.tool_sep = */ "",
2393
- /* form.key_start = */ "<arg_key>",
2394
- /* form.key_val_sep = */ "</arg_key>",
2395
- /* form.val_end = */ "</arg_value>",
2396
- /* form.tool_end = */ "</tool_call>",
2397
- /* form.scope_end = */ "",
2398
- /* form.key_val_sep2 = */ "<arg_value>",
2399
- };
2400
- builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
2401
- }
2402
-
2403
1882
  static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2404
1883
  LOG_DBG("%s\n", __func__);
2405
1884
  common_chat_params data;
@@ -2447,14 +1926,6 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
2447
1926
  }
2448
1927
  return data;
2449
1928
  }
2450
- static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
2451
- if (!builder.syntax().parse_tool_calls) {
2452
- builder.add_content(builder.consume_rest());
2453
- return;
2454
- }
2455
- static const common_regex prefix(regex_escape(" functools["));
2456
- parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
2457
- }
2458
1929
 
2459
1930
  static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2460
1931
  // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
@@ -2505,34 +1976,6 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
2505
1976
  }
2506
1977
  return data;
2507
1978
  }
2508
- static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
2509
- static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
2510
- static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
2511
- static const common_regex close_regex(R"(\s*)");
2512
-
2513
- parse_json_tool_calls(
2514
- builder,
2515
- std::nullopt,
2516
- function_regex_start_only,
2517
- function_regex,
2518
- close_regex,
2519
- std::nullopt,
2520
- /* allow_raw_python= */ true,
2521
- /* get_function_name= */ [&](const auto & res) -> std::string {
2522
- auto at_start = res.groups[0].begin == 0;
2523
- auto name = builder.str(res.groups[1]);
2524
- if (!name.empty() && name.back() == '{') {
2525
- // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
2526
- builder.move_back(1);
2527
- }
2528
- auto idx = name.find_last_not_of("\n{");
2529
- name = name.substr(0, idx + 1);
2530
- if (at_start && name == "all") {
2531
- return "";
2532
- }
2533
- return name;
2534
- });
2535
- }
2536
1979
 
2537
1980
  static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
2538
1981
  // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
@@ -2592,31 +2035,6 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
2592
2035
  // TODO: if (has_raw_python)
2593
2036
  return data;
2594
2037
  }
2595
- static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
2596
- if (!builder.syntax().parse_tool_calls) {
2597
- builder.add_content(builder.consume_rest());
2598
- return;
2599
- }
2600
- // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
2601
- static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
2602
-
2603
- static const common_regex function_regex(R"(<function=(\w+)>)");
2604
- static const common_regex close_regex(R"(</function>)");
2605
-
2606
- parse_json_tool_calls(
2607
- builder,
2608
- /* block_open= */ std::nullopt,
2609
- /* function_regex_start_only= */ std::nullopt,
2610
- function_regex,
2611
- close_regex,
2612
- std::nullopt);
2613
-
2614
- if (auto res = builder.try_find_regex(python_tag_regex)) {
2615
- auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
2616
- builder.add_tool_call("python", "", arguments);
2617
- return;
2618
- }
2619
- }
2620
2038
 
2621
2039
  static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
2622
2040
  common_chat_params data;
@@ -2733,83 +2151,6 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
2733
2151
 
2734
2152
  return data;
2735
2153
  }
2736
- static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
2737
- builder.try_parse_reasoning("<think>", "</think>");
2738
- if (!builder.syntax().parse_tool_calls) {
2739
- builder.add_content(builder.consume_rest());
2740
- return;
2741
- }
2742
-
2743
- static const common_regex open_regex(
2744
- "(?:"
2745
- "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
2746
- "(" // match 2 (open_tag)
2747
- "<tool_call>"
2748
- "|<function_call>"
2749
- "|<tool>"
2750
- "|<tools>"
2751
- "|<response>"
2752
- "|<json>"
2753
- "|<xml>"
2754
- "|<JSON>"
2755
- ")?"
2756
- "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
2757
- ")"
2758
- "|<function=([^>]+)>" // match 4 (function name)
2759
- "|<function name=\"([^\"]+)\">" // match 5 (function name again)
2760
- );
2761
-
2762
- while (auto res = builder.try_find_regex(open_regex)) {
2763
- const auto & block_start = res->groups[1];
2764
- std::string block_end = block_start.empty() ? "" : "```";
2765
-
2766
- const auto & open_tag = res->groups[2];
2767
- std::string close_tag;
2768
-
2769
- if (!res->groups[3].empty()) {
2770
- builder.move_to(res->groups[3].begin);
2771
- close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
2772
-
2773
- if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
2774
- if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
2775
- throw common_chat_msg_partial_exception("incomplete tool call");
2776
- }
2777
- builder.consume_spaces();
2778
- builder.consume_literal(close_tag);
2779
- builder.consume_spaces();
2780
- if (!block_end.empty()) {
2781
- builder.consume_literal(block_end);
2782
- builder.consume_spaces();
2783
- }
2784
- } else {
2785
- throw common_chat_msg_partial_exception("failed to parse tool call");
2786
- }
2787
- } else {
2788
- auto function_name = builder.str(res->groups[4]);
2789
- if (function_name.empty()) {
2790
- function_name = builder.str(res->groups[5]);
2791
- }
2792
- GGML_ASSERT(!function_name.empty());
2793
-
2794
- close_tag = "</function>";
2795
-
2796
- if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
2797
- if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
2798
- throw common_chat_msg_partial_exception("incomplete tool call");
2799
- }
2800
- builder.consume_spaces();
2801
- builder.consume_literal(close_tag);
2802
- builder.consume_spaces();
2803
- if (!block_end.empty()) {
2804
- builder.consume_literal(block_end);
2805
- builder.consume_spaces();
2806
- }
2807
- }
2808
- }
2809
- }
2810
-
2811
- builder.add_content(builder.consume_rest());
2812
- }
2813
2154
 
2814
2155
  static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
2815
2156
  common_chat_params data;
@@ -2892,190 +2233,6 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
2892
2233
  return data;
2893
2234
  }
2894
2235
 
2895
- static void common_chat_parse_granite(common_chat_msg_parser & builder) {
2896
- // Parse thinking tags
2897
- static const common_regex start_think_regex(regex_escape("<think>"));
2898
- static const common_regex end_think_regex(regex_escape("</think>"));
2899
- // Granite models output partial tokens such as "<" and "<think".
2900
- // By leveraging try_consume_regex()/try_find_regex() throwing
2901
- // common_chat_msg_partial_exception for these partial tokens,
2902
- // processing is interrupted and the tokens are not passed to add_content().
2903
- if (auto res = builder.try_consume_regex(start_think_regex)) {
2904
- // Restore position for try_parse_reasoning()
2905
- builder.move_to(res->groups[0].begin);
2906
- builder.try_find_regex(end_think_regex, std::string::npos, false);
2907
- // Restore position for try_parse_reasoning()
2908
- builder.move_to(res->groups[0].begin);
2909
- }
2910
- builder.try_parse_reasoning("<think>", "</think>");
2911
-
2912
- // Parse response tags
2913
- static const common_regex start_response_regex(regex_escape("<response>"));
2914
- static const common_regex end_response_regex(regex_escape("</response>"));
2915
- // Granite models output partial tokens such as "<" and "<response".
2916
- // Same hack as reasoning parsing.
2917
- if (builder.try_consume_regex(start_response_regex)) {
2918
- builder.try_find_regex(end_response_regex);
2919
- }
2920
-
2921
- if (!builder.syntax().parse_tool_calls) {
2922
- builder.add_content(builder.consume_rest());
2923
- return;
2924
- }
2925
-
2926
- // Look for tool calls
2927
- static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
2928
- if (auto res = builder.try_find_regex(tool_call_regex)) {
2929
- builder.move_to(res->groups[0].end);
2930
-
2931
- // Expect JSON array of tool calls
2932
- if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
2933
- if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
2934
- throw common_chat_msg_partial_exception("incomplete tool call");
2935
- }
2936
- }
2937
- } else {
2938
- builder.add_content(builder.consume_rest());
2939
- }
2940
- }
2941
-
2942
- static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
2943
- // Parse thinking tags
2944
- builder.try_parse_reasoning("<think>", "</think>");
2945
- if (!builder.syntax().parse_tool_calls) {
2946
- builder.add_content(builder.consume_rest());
2947
- return;
2948
- }
2949
-
2950
- // Look for tool calls
2951
- static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
2952
- if (auto res = builder.try_find_regex(tool_call_regex)) {
2953
- builder.move_to(res->groups[0].end);
2954
-
2955
- // Expect JSON array of tool calls
2956
- auto tool_calls_data = builder.consume_json();
2957
- if (tool_calls_data.json.is_array()) {
2958
- if (!builder.try_consume_literal("</TOOLCALL>")) {
2959
- throw common_chat_msg_partial_exception("Incomplete tool call");
2960
- }
2961
- builder.add_tool_calls(tool_calls_data.json);
2962
- } else {
2963
- throw common_chat_msg_partial_exception("Incomplete tool call");
2964
- }
2965
- }
2966
- builder.add_content(builder.consume_rest());
2967
- }
2968
-
2969
- static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
2970
- // Parse thinking tags
2971
- builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
2972
- if (!builder.syntax().parse_tool_calls) {
2973
- builder.add_content(builder.consume_rest());
2974
- return;
2975
- }
2976
-
2977
- // Look for tool calls
2978
- static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
2979
- if (auto res = builder.try_find_regex(tool_call_regex)) {
2980
- builder.move_to(res->groups[0].end);
2981
-
2982
- auto tool_calls_data = builder.consume_json();
2983
- if (tool_calls_data.json.is_array()) {
2984
- builder.consume_spaces();
2985
- if (!builder.try_consume_literal("<|tools_suffix|>")) {
2986
- throw common_chat_msg_partial_exception("Incomplete tool call");
2987
- }
2988
- for (const auto & value : tool_calls_data.json) {
2989
- if (value.is_object()) {
2990
- builder.add_tool_call_short_form(value);
2991
- }
2992
- }
2993
- } else {
2994
- throw common_chat_msg_partial_exception("Incomplete tool call");
2995
- }
2996
- }
2997
- builder.add_content(builder.consume_rest());
2998
- }
2999
-
3000
-
3001
- static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
3002
- if (!builder.syntax().parse_tool_calls) {
3003
- builder.add_content(builder.consume_rest());
3004
- return;
3005
- }
3006
-
3007
- // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
3008
- static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
3009
- static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
3010
-
3011
- // Loop through all tool calls
3012
- while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
3013
- builder.move_to(res->groups[0].end);
3014
-
3015
- // Parse JSON array format: [{"name": "...", "arguments": {...}}]
3016
- auto tool_calls_data = builder.consume_json();
3017
-
3018
- // Consume end marker
3019
- builder.consume_spaces();
3020
- if (!builder.try_consume_regex(tool_call_end_regex)) {
3021
- throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
3022
- }
3023
-
3024
- // Process each tool call in the array
3025
- if (tool_calls_data.json.is_array()) {
3026
- for (const auto & tool_call : tool_calls_data.json) {
3027
- if (!tool_call.is_object()) {
3028
- throw common_chat_msg_partial_exception("Tool call must be an object");
3029
- }
3030
-
3031
- if (!tool_call.contains("name")) {
3032
- throw common_chat_msg_partial_exception("Tool call missing 'name' field");
3033
- }
3034
-
3035
- std::string function_name = tool_call.at("name");
3036
- std::string arguments = "{}";
3037
-
3038
- if (tool_call.contains("arguments")) {
3039
- if (tool_call.at("arguments").is_object()) {
3040
- arguments = tool_call.at("arguments").dump();
3041
- } else if (tool_call.at("arguments").is_string()) {
3042
- arguments = tool_call.at("arguments");
3043
- }
3044
- }
3045
-
3046
- if (!builder.add_tool_call(function_name, "", arguments)) {
3047
- throw common_chat_msg_partial_exception("Incomplete tool call");
3048
- }
3049
- }
3050
- } else {
3051
- throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
3052
- }
3053
-
3054
- // Consume any trailing whitespace after this tool call
3055
- builder.consume_spaces();
3056
- }
3057
-
3058
- // Consume any remaining content after all tool calls
3059
- auto remaining = builder.consume_rest();
3060
- if (!string_strip(remaining).empty()) {
3061
- builder.add_content(remaining);
3062
- }
3063
- }
3064
-
3065
- static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
3066
- static const xml_tool_call_format form {
3067
- /* form.scope_start = */ "<seed:tool_call>",
3068
- /* form.tool_start = */ "<function=",
3069
- /* form.tool_sep = */ ">",
3070
- /* form.key_start = */ "<parameter=",
3071
- /* form.key_val_sep = */ ">",
3072
- /* form.val_end = */ "</parameter>",
3073
- /* form.tool_end = */ "</function>",
3074
- /* form.scope_end = */ "</seed:tool_call>",
3075
- };
3076
- builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
3077
- }
3078
-
3079
2236
  static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
3080
2237
  common_chat_params data;
3081
2238
  data.prompt = apply(tmpl, inputs);
@@ -3415,112 +2572,3 @@ common_chat_params common_chat_templates_apply(
3415
2572
  ? common_chat_templates_apply_jinja(tmpls, inputs)
3416
2573
  : common_chat_templates_apply_legacy(tmpls, inputs);
3417
2574
  }
3418
-
3419
- static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
3420
- builder.try_parse_reasoning("<think>", "</think>");
3421
- builder.add_content(builder.consume_rest());
3422
- }
3423
-
3424
- static void common_chat_parse(common_chat_msg_parser & builder) {
3425
- LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
3426
-
3427
- switch (builder.syntax().format) {
3428
- case COMMON_CHAT_FORMAT_CONTENT_ONLY:
3429
- common_chat_parse_content_only(builder);
3430
- break;
3431
- case COMMON_CHAT_FORMAT_GENERIC:
3432
- common_chat_parse_generic(builder);
3433
- break;
3434
- case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
3435
- common_chat_parse_mistral_nemo(builder);
3436
- break;
3437
- case COMMON_CHAT_FORMAT_MAGISTRAL:
3438
- common_chat_parse_magistral(builder);
3439
- break;
3440
- case COMMON_CHAT_FORMAT_LLAMA_3_X:
3441
- common_chat_parse_llama_3_1(builder);
3442
- break;
3443
- case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
3444
- common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
3445
- break;
3446
- case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
3447
- common_chat_parse_deepseek_r1(builder);
3448
- break;
3449
- case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
3450
- common_chat_parse_deepseek_v3_1(builder);
3451
- break;
3452
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
3453
- common_chat_parse_functionary_v3_2(builder);
3454
- break;
3455
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
3456
- common_chat_parse_functionary_v3_1_llama_3_1(builder);
3457
- break;
3458
- case COMMON_CHAT_FORMAT_HERMES_2_PRO:
3459
- common_chat_parse_hermes_2_pro(builder);
3460
- break;
3461
- case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
3462
- common_chat_parse_firefunction_v2(builder);
3463
- break;
3464
- case COMMON_CHAT_FORMAT_COMMAND_R7B:
3465
- common_chat_parse_command_r7b(builder);
3466
- break;
3467
- case COMMON_CHAT_FORMAT_GRANITE:
3468
- common_chat_parse_granite(builder);
3469
- break;
3470
- case COMMON_CHAT_FORMAT_GPT_OSS:
3471
- common_chat_parse_gpt_oss(builder);
3472
- break;
3473
- case COMMON_CHAT_FORMAT_SEED_OSS:
3474
- common_chat_parse_seed_oss(builder);
3475
- break;
3476
- case COMMON_CHAT_FORMAT_NEMOTRON_V2:
3477
- common_chat_parse_nemotron_v2(builder);
3478
- break;
3479
- case COMMON_CHAT_FORMAT_APERTUS:
3480
- common_chat_parse_apertus(builder);
3481
- break;
3482
- case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
3483
- common_chat_parse_lfm2(builder);
3484
- break;
3485
- case COMMON_CHAT_FORMAT_MINIMAX_M2:
3486
- common_chat_parse_minimax_m2(builder);
3487
- break;
3488
- case COMMON_CHAT_FORMAT_GLM_4_5:
3489
- common_chat_parse_glm_4_5(builder);
3490
- break;
3491
- case COMMON_CHAT_FORMAT_KIMI_K2:
3492
- common_chat_parse_kimi_k2(builder);
3493
- break;
3494
- case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
3495
- common_chat_parse_qwen3_coder_xml(builder);
3496
- break;
3497
- case COMMON_CHAT_FORMAT_APRIEL_1_5:
3498
- common_chat_parse_apriel_1_5(builder);
3499
- break;
3500
- case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
3501
- common_chat_parse_xiaomi_mimo(builder);
3502
- break;
3503
- default:
3504
- throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
3505
- }
3506
- builder.finish();
3507
- }
3508
-
3509
- common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
3510
- common_chat_msg_parser builder(input, is_partial, syntax);
3511
- try {
3512
- common_chat_parse(builder);
3513
- } catch (const common_chat_msg_partial_exception & ex) {
3514
- LOG_DBG("Partial parse: %s\n", ex.what());
3515
- if (!is_partial) {
3516
- builder.clear_tools();
3517
- builder.move_to(0);
3518
- common_chat_parse_content_only(builder);
3519
- }
3520
- }
3521
- auto msg = builder.result();
3522
- if (!is_partial) {
3523
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
3524
- }
3525
- return msg;
3526
- }