@fugood/llama.node 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CMakeLists.txt +21 -1
  2. package/lib/binding.js +1 -1
  3. package/lib/binding.ts +47 -15
  4. package/lib/index.js +26 -2
  5. package/lib/index.ts +42 -10
  6. package/package.json +15 -14
  7. package/scripts/llama.cpp.patch +31 -10
  8. package/src/LlamaContext.cpp +46 -0
  9. package/src/LlamaContext.h +2 -0
  10. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  11. package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
  12. package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
  13. package/src/llama.cpp/common/chat-parser.h +10 -0
  14. package/src/llama.cpp/common/chat.cpp +461 -87
  15. package/src/llama.cpp/common/chat.h +6 -0
  16. package/src/llama.cpp/common/common.cpp +8 -1
  17. package/src/llama.cpp/common/common.h +12 -5
  18. package/src/llama.cpp/common/json-partial.cpp +19 -2
  19. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
  20. package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
  21. package/src/llama.cpp/common/sampling.cpp +60 -6
  22. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
  23. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
  24. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
  25. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
  26. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
  27. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
  28. package/src/llama.cpp/src/llama-grammar.cpp +17 -9
  29. package/src/llama.cpp/src/llama-impl.cpp +3 -3
  30. package/src/llama.cpp/src/llama-sampling.cpp +3 -6
  31. package/src/llama.cpp/src/llama-vocab.cpp +1 -0
@@ -630,6 +630,12 @@ const char * common_chat_format_name(common_chat_format format) {
630
630
  case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
631
631
  case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
632
632
  case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
633
+ case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
634
+ case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
635
+ case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
636
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
637
+ case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
638
+ case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
633
639
  default:
634
640
  throw std::runtime_error("Unknown chat format");
635
641
  }
@@ -1794,6 +1800,278 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1794
1800
  }
1795
1801
  }
1796
1802
 
1803
+
1804
+ static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
1805
+ common_chat_params data;
1806
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1807
+
1808
+ data.prompt = apply(tmpl, params);
1809
+ data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
1810
+
1811
+ // Handle thinking tags based on prompt ending
1812
+ if (string_ends_with(data.prompt, "<think>\n")) {
1813
+ if (!params.enable_thinking) {
1814
+ // Close the thinking tag immediately if thinking is disabled
1815
+ data.prompt += "</think>\n\n";
1816
+ } else {
1817
+ // Mark thinking as forced open (template started with <think>)
1818
+ data.thinking_forced_open = true;
1819
+ }
1820
+ }
1821
+
1822
+ // Preserve MiniMax-M2 special tokens
1823
+ data.preserved_tokens = {
1824
+ "<think>",
1825
+ "</think>",
1826
+ "<minimax:tool_call>",
1827
+ "</minimax:tool_call>",
1828
+ };
1829
+
1830
+ // build grammar for tool call
1831
+ static const xml_tool_call_format form {
1832
+ /* form.scope_start = */ "<minimax:tool_call>\n",
1833
+ /* form.tool_start = */ "<invoke name=\"",
1834
+ /* form.tool_sep = */ "\">\n",
1835
+ /* form.key_start = */ "<parameter name=\"",
1836
+ /* form.key_val_sep = */ "\">",
1837
+ /* form.val_end = */ "</parameter>\n",
1838
+ /* form.tool_end = */ "</invoke>\n",
1839
+ /* form.scope_end = */ "</minimax:tool_call>",
1840
+ };
1841
+ build_grammar_xml_tool_call(data, params.tools, form);
1842
+
1843
+ return data;
1844
+ }
1845
+
1846
+ static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
1847
+ static const xml_tool_call_format form {
1848
+ /* form.scope_start = */ "<minimax:tool_call>",
1849
+ /* form.tool_start = */ "<invoke name=\"",
1850
+ /* form.tool_sep = */ "\">",
1851
+ /* form.key_start = */ "<parameter name=\"",
1852
+ /* form.key_val_sep = */ "\">",
1853
+ /* form.val_end = */ "</parameter>",
1854
+ /* form.tool_end = */ "</invoke>",
1855
+ /* form.scope_end = */ "</minimax:tool_call>",
1856
+ };
1857
+ builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1858
+ }
1859
+
1860
+ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
1861
+ common_chat_params data;
1862
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1863
+
1864
+ data.prompt = apply(tmpl, params);
1865
+ data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
1866
+
1867
+ data.preserved_tokens = {
1868
+ "<tool_call>",
1869
+ "</tool_call>",
1870
+ "<function=",
1871
+ "</function>",
1872
+ "<parameter=",
1873
+ "</parameter>",
1874
+ };
1875
+
1876
+ // build grammar for tool call
1877
+ static const xml_tool_call_format form {
1878
+ /* form.scope_start = */ "<tool_call>\n",
1879
+ /* form.tool_start = */ "<function=",
1880
+ /* form.tool_sep = */ ">\n",
1881
+ /* form.key_start = */ "<parameter=",
1882
+ /* form.key_val_sep = */ ">\n",
1883
+ /* form.val_end = */ "\n</parameter>\n",
1884
+ /* form.tool_end = */ "</function>\n",
1885
+ /* form.scope_end = */ "</tool_call>",
1886
+ };
1887
+ build_grammar_xml_tool_call(data, params.tools, form);
1888
+
1889
+ return data;
1890
+ }
1891
+
1892
+ static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
1893
+ static const xml_tool_call_format form = ([]() {
1894
+ xml_tool_call_format form {};
1895
+ form.scope_start = "<tool_call>";
1896
+ form.tool_start = "<function=";
1897
+ form.tool_sep = ">";
1898
+ form.key_start = "<parameter=";
1899
+ form.key_val_sep = ">";
1900
+ form.val_end = "</parameter>";
1901
+ form.tool_end = "</function>";
1902
+ form.scope_end = "</tool_call>";
1903
+ form.trim_raw_argval = true;
1904
+ return form;
1905
+ })();
1906
+ builder.consume_reasoning_with_xml_tool_calls(form);
1907
+ }
1908
+
1909
+ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
1910
+ common_chat_params data;
1911
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1912
+
1913
+ data.prompt = apply(tmpl, params);
1914
+ data.format = COMMON_CHAT_FORMAT_KIMI_K2;
1915
+
1916
+ data.preserved_tokens = {
1917
+ "<think>",
1918
+ "</think>",
1919
+ "<|tool_calls_section_begin|>",
1920
+ "<|tool_call_begin|>",
1921
+ "<|tool_call_argument_begin|>",
1922
+ "<|tool_call_end|>",
1923
+ "<|tool_calls_section_end|>",
1924
+ "<|im_end|>",
1925
+ "<|im_system|>",
1926
+ "<|im_middle|>",
1927
+ };
1928
+
1929
+ data.additional_stops.insert(data.additional_stops.end(), {
1930
+ "<|im_end|>",
1931
+ "<|im_middle|>"
1932
+ });
1933
+ // build grammar for tool call
1934
+ static const xml_tool_call_format form = ([]() {
1935
+ xml_tool_call_format form {};
1936
+ form.scope_start = "<|tool_calls_section_begin|>";
1937
+ form.tool_start = "<|tool_call_begin|>";
1938
+ form.tool_sep = "<|tool_call_argument_begin|>{";
1939
+ form.key_start = "\"";
1940
+ form.key_val_sep = "\": ";
1941
+ form.val_end = ", ";
1942
+ form.tool_end = "}<|tool_call_end|>";
1943
+ form.scope_end = "<|tool_calls_section_end|>";
1944
+ form.raw_argval = false;
1945
+ form.last_val_end = "";
1946
+ return form;
1947
+ })();
1948
+ build_grammar_xml_tool_call(data, params.tools, form);
1949
+
1950
+ return data;
1951
+ }
1952
+
1953
+ static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
1954
+ static const xml_tool_call_format form = ([]() {
1955
+ xml_tool_call_format form {};
1956
+ form.scope_start = "<|tool_calls_section_begin|>";
1957
+ form.tool_start = "<|tool_call_begin|>";
1958
+ form.tool_sep = "<|tool_call_argument_begin|>{";
1959
+ form.key_start = "\"";
1960
+ form.key_val_sep = "\": ";
1961
+ form.val_end = ", ";
1962
+ form.tool_end = "}<|tool_call_end|>";
1963
+ form.scope_end = "<|tool_calls_section_end|>";
1964
+ form.raw_argval = false;
1965
+ form.last_val_end = "";
1966
+ return form;
1967
+ })();
1968
+ builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1969
+ }
1970
+
1971
+ static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
1972
+ common_chat_params data;
1973
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1974
+
1975
+ data.prompt = apply(tmpl, params);
1976
+ data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
1977
+
1978
+ data.preserved_tokens = {
1979
+ "<thinking>",
1980
+ "</thinking>",
1981
+ "<tool_calls>",
1982
+ "</tool_calls>",
1983
+ };
1984
+
1985
+ // build grammar for tool call
1986
+ static const xml_tool_call_format form = ([]() {
1987
+ xml_tool_call_format form {};
1988
+ form.scope_start = "<tool_calls>[";
1989
+ form.tool_start = "{\"name\": \"";
1990
+ form.tool_sep = "\", \"arguments\": {";
1991
+ form.key_start = "\"";
1992
+ form.key_val_sep = "\": ";
1993
+ form.val_end = ", ";
1994
+ form.tool_end = "}, ";
1995
+ form.scope_end = "]</tool_calls>";
1996
+ form.raw_argval = false;
1997
+ form.last_val_end = "";
1998
+ form.last_tool_end = "}";
1999
+ return form;
2000
+ })();
2001
+ build_grammar_xml_tool_call(data, params.tools, form);
2002
+
2003
+ return data;
2004
+ }
2005
+
2006
+ static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
2007
+ static const xml_tool_call_format form = ([]() {
2008
+ xml_tool_call_format form {};
2009
+ form.scope_start = "<tool_calls>[";
2010
+ form.tool_start = "{\"name\": \"";
2011
+ form.tool_sep = "\", \"arguments\": {";
2012
+ form.key_start = "\"";
2013
+ form.key_val_sep = "\": ";
2014
+ form.val_end = ", ";
2015
+ form.tool_end = "}, ";
2016
+ form.scope_end = "]</tool_calls>";
2017
+ form.raw_argval = false;
2018
+ form.last_val_end = "";
2019
+ form.last_tool_end = "}";
2020
+ return form;
2021
+ })();
2022
+ builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
2023
+ }
2024
+
2025
+ static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
2026
+ common_chat_params data;
2027
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2028
+
2029
+ data.prompt = apply(tmpl, params);
2030
+ data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
2031
+
2032
+ data.preserved_tokens = {
2033
+ "<tool_call>",
2034
+ "</tool_call>",
2035
+ };
2036
+
2037
+ // build grammar for tool call
2038
+ static const xml_tool_call_format form = ([]() {
2039
+ xml_tool_call_format form {};
2040
+ form.scope_start = "\n";
2041
+ form.tool_start = "<tool_call>\n{\"name\": \"";
2042
+ form.tool_sep = "\", \"arguments\": {";
2043
+ form.key_start = "\"";
2044
+ form.key_val_sep = "\": ";
2045
+ form.val_end = ", ";
2046
+ form.tool_end = "}\n</tool_call>";
2047
+ form.scope_end = "";
2048
+ form.raw_argval = false;
2049
+ form.last_val_end = "";
2050
+ return form;
2051
+ })();
2052
+ build_grammar_xml_tool_call(data, params.tools, form);
2053
+
2054
+ return data;
2055
+ }
2056
+
2057
+ static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
2058
+ static const xml_tool_call_format form = ([]() {
2059
+ xml_tool_call_format form {};
2060
+ form.scope_start = "";
2061
+ form.tool_start = "<tool_call>\n{\"name\": \"";
2062
+ form.tool_sep = "\", \"arguments\": {";
2063
+ form.key_start = "\"";
2064
+ form.key_val_sep = "\": ";
2065
+ form.val_end = ", ";
2066
+ form.tool_end = "}\n</tool_call>";
2067
+ form.scope_end = "";
2068
+ form.raw_argval = false;
2069
+ form.last_val_end = "";
2070
+ return form;
2071
+ })();
2072
+ builder.consume_reasoning_with_xml_tool_calls(form);
2073
+ }
2074
+
1797
2075
  static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
1798
2076
  common_chat_params data;
1799
2077
 
@@ -2028,6 +2306,100 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
2028
2306
  }
2029
2307
  }
2030
2308
 
2309
+ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
2310
+ common_chat_params data;
2311
+ data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2312
+
2313
+ std::string prompt = apply(tmpl, inputs);
2314
+
2315
+ // match the existing trimming behavior
2316
+ if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
2317
+ prompt.erase(0, tmpl.bos_token().size());
2318
+ }
2319
+ if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
2320
+ prompt.erase(prompt.size() - tmpl.eos_token().size());
2321
+ }
2322
+ if (string_ends_with(prompt, "<think>")) {
2323
+ if (!inputs.enable_thinking) {
2324
+ prompt += "</think>";
2325
+ } else {
2326
+ data.thinking_forced_open = true;
2327
+ }
2328
+ }
2329
+
2330
+ // add GLM preserved tokens
2331
+ data.preserved_tokens = {
2332
+ "<|endoftext|>",
2333
+ "[MASK]",
2334
+ "[gMASK]",
2335
+ "[sMASK]",
2336
+ "<sop>",
2337
+ "<eop>",
2338
+ "<|system|>",
2339
+ "<|user|>",
2340
+ "<|assistant|>",
2341
+ "<|observation|>",
2342
+ "<|begin_of_image|>",
2343
+ "<|end_of_image|>",
2344
+ "<|begin_of_video|>",
2345
+ "<|end_of_video|>",
2346
+ "<|begin_of_audio|>",
2347
+ "<|end_of_audio|>",
2348
+ "<|begin_of_transcription|>",
2349
+ "<|end_of_transcription|>",
2350
+ "<|code_prefix|>",
2351
+ "<|code_middle|>",
2352
+ "<|code_suffix|>",
2353
+ "/nothink",
2354
+ "<think>",
2355
+ "</think>",
2356
+ "<tool_call>",
2357
+ "</tool_call>",
2358
+ "<arg_key>",
2359
+ "</arg_key>",
2360
+ "<arg_value>",
2361
+ "</arg_value>"
2362
+ };
2363
+
2364
+ // extra GLM 4.5 stop word
2365
+ data.additional_stops.insert(data.additional_stops.end(), {
2366
+ "<|user|>",
2367
+ "<|observation|>"
2368
+ });
2369
+
2370
+ // build grammar for tool call
2371
+ static const xml_tool_call_format form {
2372
+ /* form.scope_start = */ "",
2373
+ /* form.tool_start = */ "\n<tool_call>",
2374
+ /* form.tool_sep = */ "\n",
2375
+ /* form.key_start = */ "<arg_key>",
2376
+ /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
2377
+ /* form.val_end = */ "</arg_value>\n",
2378
+ /* form.tool_end = */ "</tool_call>\n",
2379
+ /* form.scope_end = */ "",
2380
+ };
2381
+ build_grammar_xml_tool_call(data, inputs.tools, form);
2382
+
2383
+ data.prompt = prompt;
2384
+ data.format = COMMON_CHAT_FORMAT_GLM_4_5;
2385
+ return data;
2386
+ }
2387
+
2388
+ static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
2389
+ static const xml_tool_call_format form {
2390
+ /* form.scope_start = */ "",
2391
+ /* form.tool_start = */ "<tool_call>",
2392
+ /* form.tool_sep = */ "",
2393
+ /* form.key_start = */ "<arg_key>",
2394
+ /* form.key_val_sep = */ "</arg_key>",
2395
+ /* form.val_end = */ "</arg_value>",
2396
+ /* form.tool_end = */ "</tool_call>",
2397
+ /* form.scope_end = */ "",
2398
+ /* form.key_val_sep2 = */ "<arg_value>",
2399
+ };
2400
+ builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
2401
+ }
2402
+
2031
2403
  static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2032
2404
  LOG_DBG("%s\n", __func__);
2033
2405
  common_chat_params data;
@@ -2691,91 +3063,17 @@ static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
2691
3063
  }
2692
3064
 
2693
3065
  static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
2694
- // Parse thinking tags first - this handles the main reasoning content
2695
- builder.try_parse_reasoning("<seed:think>", "</seed:think>");
2696
-
2697
- if (!builder.syntax().parse_tool_calls) {
2698
- builder.add_content(builder.consume_rest());
2699
- return;
2700
- }
2701
-
2702
- // Parse tool calls - Seed-OSS uses <seed:tool_call> format
2703
- static const common_regex tool_call_begin_regex("<seed:tool_call>");
2704
- static const common_regex tool_call_end_regex("</seed:tool_call>");
2705
- static const common_regex function_regex("<function=([^>]+)>");
2706
- static const common_regex param_regex("<parameter=([^>]+)>");
2707
-
2708
- while (auto tool_res = builder.try_find_regex(tool_call_begin_regex)) {
2709
- builder.consume_spaces(); // Consume whitespace after <seed:tool_call>
2710
-
2711
- // Look for function call inside tool call, ignore any content before it
2712
- if (auto func_res = builder.try_find_regex(function_regex, std::string::npos, false)) {
2713
- auto function_name = builder.str(func_res->groups[1]);
2714
-
2715
- // Parse Seed-OSS parameters <parameter=name>value</parameter>
2716
- json args = json::object();
2717
- // Parse all parameters
2718
- while (auto param_res = builder.try_find_regex(param_regex, std::string::npos, false)) {
2719
- // again, ignore noise around parameters
2720
- auto param_name = builder.str(param_res->groups[1]);
2721
- builder.move_to(param_res->groups[0].end);
2722
- builder.consume_spaces(); // Consume whitespace after parameter
2723
- auto savedPos = builder.pos();
2724
- if (auto param_parse = builder.try_find_literal("</parameter>")) {
2725
- auto param = param_parse->prelude;
2726
- builder.move_to(savedPos);
2727
- try {
2728
- if (auto param_res = builder.try_consume_json()) {
2729
- args[param_name] = param_res->json;
2730
- } else {
2731
- args[param_name] = param;
2732
- }
2733
- } catch (json::exception &) {
2734
- args[param_name] = param;
2735
- }
2736
- } else {
2737
- throw common_chat_msg_partial_exception("Incomplete tool parameter");
2738
- }
2739
- }
2740
- // Look for closing function tag
2741
- auto end_func = builder.try_find_literal("</function>");
2742
- if (end_func) {
2743
- builder.move_to(end_func->groups[0].end);
2744
- builder.consume_spaces(); // Consume whitespace after </function>
2745
-
2746
- // Add the tool call with parsed arguments, but only if we REALLY got the literal
2747
- auto eaten_fragment = builder.input().substr(end_func->groups[0].begin, end_func->groups[0].end);
2748
- auto funlen = std::string("</function>").length();
2749
- if (eaten_fragment.length() >= funlen && eaten_fragment.substr(0, funlen) == std::string("</function>")) {
2750
- if (!builder.add_tool_call(function_name, "", args.dump())) {
2751
- throw common_chat_msg_partial_exception("Incomplete tool call");
2752
- }
2753
- } else {
2754
- throw common_chat_msg_partial_exception("Incomplete tool call");
2755
- }
2756
- } else {
2757
- throw common_chat_msg_partial_exception("Incomplete tool call");
2758
- }
2759
- // Look for closing tool call tag
2760
- if (auto end_tool = builder.try_find_regex(tool_call_end_regex, std::string::npos, false)) {
2761
- builder.move_to(end_tool->groups[0].end);
2762
- builder.consume_spaces(); // Consume trailing whitespace after tool call
2763
- } else {
2764
- throw common_chat_msg_partial_exception("Incomplete tool call");
2765
- }
2766
- } else {
2767
- // No function found - don't consume content here, let it be handled at the end
2768
- break;
2769
- }
2770
- }
2771
-
2772
- // Consume any remaining whitespace after all tool call processing
2773
- builder.consume_spaces();
2774
- auto remaining = builder.consume_rest();
2775
- // If there's any non-whitespace content remaining, add it as content
2776
- if (!string_strip(remaining).empty()) {
2777
- builder.add_content(remaining);
2778
- }
3066
+ static const xml_tool_call_format form {
3067
+ /* form.scope_start = */ "<seed:tool_call>",
3068
+ /* form.tool_start = */ "<function=",
3069
+ /* form.tool_sep = */ ">",
3070
+ /* form.key_start = */ "<parameter=",
3071
+ /* form.key_val_sep = */ ">",
3072
+ /* form.val_end = */ "</parameter>",
3073
+ /* form.tool_end = */ "</function>",
3074
+ /* form.scope_end = */ "</seed:tool_call>",
3075
+ };
3076
+ builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
2779
3077
  }
2780
3078
 
2781
3079
  static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -2914,6 +3212,35 @@ static common_chat_params common_chat_templates_apply_jinja(
2914
3212
  return common_chat_params_init_granite(tmpl, params);
2915
3213
  }
2916
3214
 
3215
+ // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
3216
+ if (src.find("[gMASK]<sop>") != std::string::npos &&
3217
+ src.find("<arg_key>") != std::string::npos &&
3218
+ src.find("<arg_value>") != std::string::npos &&
3219
+ params.json_schema.is_null()) {
3220
+ return common_chat_params_init_glm_4_5(tmpl, params);
3221
+ }
3222
+
3223
+ // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
3224
+ // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
3225
+ // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
3226
+ if (src.find("<tool_call>") != std::string::npos &&
3227
+ src.find("<function>") != std::string::npos &&
3228
+ src.find("<function=") != std::string::npos &&
3229
+ src.find("<parameters>") != std::string::npos &&
3230
+ src.find("<parameter=") != std::string::npos) {
3231
+ return common_chat_params_init_qwen3_coder_xml(tmpl, params);
3232
+ }
3233
+
3234
+ // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
3235
+ if (src.find("<tools>") != std::string::npos &&
3236
+ src.find("# Tools") != std::string::npos &&
3237
+ src.find("</tools>") != std::string::npos &&
3238
+ src.find("<tool_calls>") != std::string::npos &&
3239
+ src.find("</tool_calls>") != std::string::npos &&
3240
+ src.find("<tool_response>") != std::string::npos) {
3241
+ return common_chat_params_init_xiaomi_mimo(tmpl, params);
3242
+ }
3243
+
2917
3244
  // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
2918
3245
  if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
2919
3246
  return common_chat_params_init_hermes_2_pro(tmpl, params);
@@ -2945,6 +3272,29 @@ static common_chat_params common_chat_templates_apply_jinja(
2945
3272
  return common_chat_params_init_lfm2(tmpl, params);
2946
3273
  }
2947
3274
 
3275
+ // MiniMax-M2 format detection
3276
+ if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
3277
+ return common_chat_params_init_minimax_m2(tmpl, params);
3278
+ }
3279
+
3280
+ // Kimi K2 format detection
3281
+ if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
3282
+ src.find("<|tool_calls_section_begin|>") != std::string::npos &&
3283
+ src.find("## Return of") != std::string::npos) {
3284
+ return common_chat_params_init_kimi_k2(tmpl, params);
3285
+ }
3286
+
3287
+ // Apriel 1.5 format detection
3288
+ if (src.find("<thinking>") != std::string::npos &&
3289
+ src.find("</thinking>") != std::string::npos &&
3290
+ src.find("<available_tools>") != std::string::npos &&
3291
+ src.find("<|assistant|>") != std::string::npos &&
3292
+ src.find("<|tool_result|>") != std::string::npos &&
3293
+ src.find("<tool_calls>[") != std::string::npos &&
3294
+ src.find("]</tool_calls>") != std::string::npos) {
3295
+ return common_chat_params_init_apriel_1_5(tmpl, params);
3296
+ }
3297
+
2948
3298
  // Use generic handler when mixing tools + JSON schema.
2949
3299
  // TODO: support that mix in handlers below.
2950
3300
  if ((params.tools.is_array() && params.json_schema.is_object())) {
@@ -2996,7 +3346,7 @@ static common_chat_params common_chat_templates_apply_legacy(
2996
3346
  const struct common_chat_templates * tmpls,
2997
3347
  const struct common_chat_templates_inputs & inputs)
2998
3348
  {
2999
- int alloc_size = 0;
3349
+ size_t alloc_size = 0;
3000
3350
  std::vector<llama_chat_message> chat;
3001
3351
  std::vector<std::string> contents;
3002
3352
 
@@ -3018,7 +3368,8 @@ static common_chat_params common_chat_templates_apply_legacy(
3018
3368
  const auto & msg = inputs.messages[i];
3019
3369
  const auto & content = contents[i];
3020
3370
  chat.push_back({msg.role.c_str(), content.c_str()});
3021
- alloc_size += (msg.role.size() + content.size()) * 1.25;
3371
+ size_t msg_size = msg.role.size() + content.size();
3372
+ alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
3022
3373
  }
3023
3374
 
3024
3375
  std::vector<char> buf(alloc_size);
@@ -3040,6 +3391,11 @@ static common_chat_params common_chat_templates_apply_legacy(
3040
3391
  res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
3041
3392
  }
3042
3393
 
3394
+ // for safety, we check the result again
3395
+ if (res < 0 || (size_t) res > buf.size()) {
3396
+ throw std::runtime_error("failed to apply chat template, try using --jinja");
3397
+ }
3398
+
3043
3399
  common_chat_params params;
3044
3400
  params.prompt = std::string(buf.data(), res);
3045
3401
  if (!inputs.json_schema.empty()) {
@@ -3126,6 +3482,24 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
3126
3482
  case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
3127
3483
  common_chat_parse_lfm2(builder);
3128
3484
  break;
3485
+ case COMMON_CHAT_FORMAT_MINIMAX_M2:
3486
+ common_chat_parse_minimax_m2(builder);
3487
+ break;
3488
+ case COMMON_CHAT_FORMAT_GLM_4_5:
3489
+ common_chat_parse_glm_4_5(builder);
3490
+ break;
3491
+ case COMMON_CHAT_FORMAT_KIMI_K2:
3492
+ common_chat_parse_kimi_k2(builder);
3493
+ break;
3494
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
3495
+ common_chat_parse_qwen3_coder_xml(builder);
3496
+ break;
3497
+ case COMMON_CHAT_FORMAT_APRIEL_1_5:
3498
+ common_chat_parse_apriel_1_5(builder);
3499
+ break;
3500
+ case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
3501
+ common_chat_parse_xiaomi_mimo(builder);
3502
+ break;
3129
3503
  default:
3130
3504
  throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
3131
3505
  }
@@ -128,6 +128,12 @@ enum common_chat_format {
128
128
  COMMON_CHAT_FORMAT_NEMOTRON_V2,
129
129
  COMMON_CHAT_FORMAT_APERTUS,
130
130
  COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
131
+ COMMON_CHAT_FORMAT_GLM_4_5,
132
+ COMMON_CHAT_FORMAT_MINIMAX_M2,
133
+ COMMON_CHAT_FORMAT_KIMI_K2,
134
+ COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
135
+ COMMON_CHAT_FORMAT_APRIEL_1_5,
136
+ COMMON_CHAT_FORMAT_XIAOMI_MIMO,
131
137
 
132
138
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
133
139
  };
@@ -26,7 +26,6 @@
26
26
  #include <sstream>
27
27
  #include <string>
28
28
  #include <thread>
29
- #include <unordered_map>
30
29
  #include <unordered_set>
31
30
  #include <vector>
32
31
 
@@ -60,6 +59,14 @@
60
59
  #pragma warning(disable: 4244 4267) // possible loss of data
61
60
  #endif
62
61
 
62
+ common_time_meas::common_time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
63
+
64
+ common_time_meas::~common_time_meas() {
65
+ if (t_start_us >= 0) {
66
+ t_acc += ggml_time_us() - t_start_us;
67
+ }
68
+ }
69
+
63
70
  //
64
71
  // CPU utils
65
72
  //