@fugood/llama.node 1.3.4 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +21 -1
- package/lib/binding.js +1 -1
- package/lib/binding.ts +47 -15
- package/lib/index.js +26 -2
- package/lib/index.ts +42 -10
- package/package.json +15 -14
- package/scripts/llama.cpp.patch +31 -10
- package/src/LlamaContext.cpp +46 -0
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/src/llama.cpp/common/chat-parser.h +10 -0
- package/src/llama.cpp/common/chat.cpp +461 -87
- package/src/llama.cpp/common/chat.h +6 -0
- package/src/llama.cpp/common/common.cpp +8 -1
- package/src/llama.cpp/common/common.h +12 -5
- package/src/llama.cpp/common/json-partial.cpp +19 -2
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
- package/src/llama.cpp/common/sampling.cpp +60 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
- package/src/llama.cpp/src/llama-grammar.cpp +17 -9
- package/src/llama.cpp/src/llama-impl.cpp +3 -3
- package/src/llama.cpp/src/llama-sampling.cpp +3 -6
- package/src/llama.cpp/src/llama-vocab.cpp +1 -0
|
@@ -630,6 +630,12 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
|
630
630
|
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
|
|
631
631
|
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
|
|
632
632
|
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
|
|
633
|
+
case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
|
|
634
|
+
case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
|
|
635
|
+
case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
|
|
636
|
+
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
|
637
|
+
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
|
638
|
+
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
|
633
639
|
default:
|
|
634
640
|
throw std::runtime_error("Unknown chat format");
|
|
635
641
|
}
|
|
@@ -1794,6 +1800,278 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
|
|
1794
1800
|
}
|
|
1795
1801
|
}
|
|
1796
1802
|
|
|
1803
|
+
|
|
1804
|
+
static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1805
|
+
common_chat_params data;
|
|
1806
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1807
|
+
|
|
1808
|
+
data.prompt = apply(tmpl, params);
|
|
1809
|
+
data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
|
|
1810
|
+
|
|
1811
|
+
// Handle thinking tags based on prompt ending
|
|
1812
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
1813
|
+
if (!params.enable_thinking) {
|
|
1814
|
+
// Close the thinking tag immediately if thinking is disabled
|
|
1815
|
+
data.prompt += "</think>\n\n";
|
|
1816
|
+
} else {
|
|
1817
|
+
// Mark thinking as forced open (template started with <think>)
|
|
1818
|
+
data.thinking_forced_open = true;
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
// Preserve MiniMax-M2 special tokens
|
|
1823
|
+
data.preserved_tokens = {
|
|
1824
|
+
"<think>",
|
|
1825
|
+
"</think>",
|
|
1826
|
+
"<minimax:tool_call>",
|
|
1827
|
+
"</minimax:tool_call>",
|
|
1828
|
+
};
|
|
1829
|
+
|
|
1830
|
+
// build grammar for tool call
|
|
1831
|
+
static const xml_tool_call_format form {
|
|
1832
|
+
/* form.scope_start = */ "<minimax:tool_call>\n",
|
|
1833
|
+
/* form.tool_start = */ "<invoke name=\"",
|
|
1834
|
+
/* form.tool_sep = */ "\">\n",
|
|
1835
|
+
/* form.key_start = */ "<parameter name=\"",
|
|
1836
|
+
/* form.key_val_sep = */ "\">",
|
|
1837
|
+
/* form.val_end = */ "</parameter>\n",
|
|
1838
|
+
/* form.tool_end = */ "</invoke>\n",
|
|
1839
|
+
/* form.scope_end = */ "</minimax:tool_call>",
|
|
1840
|
+
};
|
|
1841
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
1842
|
+
|
|
1843
|
+
return data;
|
|
1844
|
+
}
|
|
1845
|
+
|
|
1846
|
+
static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
|
|
1847
|
+
static const xml_tool_call_format form {
|
|
1848
|
+
/* form.scope_start = */ "<minimax:tool_call>",
|
|
1849
|
+
/* form.tool_start = */ "<invoke name=\"",
|
|
1850
|
+
/* form.tool_sep = */ "\">",
|
|
1851
|
+
/* form.key_start = */ "<parameter name=\"",
|
|
1852
|
+
/* form.key_val_sep = */ "\">",
|
|
1853
|
+
/* form.val_end = */ "</parameter>",
|
|
1854
|
+
/* form.tool_end = */ "</invoke>",
|
|
1855
|
+
/* form.scope_end = */ "</minimax:tool_call>",
|
|
1856
|
+
};
|
|
1857
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
1858
|
+
}
|
|
1859
|
+
|
|
1860
|
+
static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1861
|
+
common_chat_params data;
|
|
1862
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1863
|
+
|
|
1864
|
+
data.prompt = apply(tmpl, params);
|
|
1865
|
+
data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
|
|
1866
|
+
|
|
1867
|
+
data.preserved_tokens = {
|
|
1868
|
+
"<tool_call>",
|
|
1869
|
+
"</tool_call>",
|
|
1870
|
+
"<function=",
|
|
1871
|
+
"</function>",
|
|
1872
|
+
"<parameter=",
|
|
1873
|
+
"</parameter>",
|
|
1874
|
+
};
|
|
1875
|
+
|
|
1876
|
+
// build grammar for tool call
|
|
1877
|
+
static const xml_tool_call_format form {
|
|
1878
|
+
/* form.scope_start = */ "<tool_call>\n",
|
|
1879
|
+
/* form.tool_start = */ "<function=",
|
|
1880
|
+
/* form.tool_sep = */ ">\n",
|
|
1881
|
+
/* form.key_start = */ "<parameter=",
|
|
1882
|
+
/* form.key_val_sep = */ ">\n",
|
|
1883
|
+
/* form.val_end = */ "\n</parameter>\n",
|
|
1884
|
+
/* form.tool_end = */ "</function>\n",
|
|
1885
|
+
/* form.scope_end = */ "</tool_call>",
|
|
1886
|
+
};
|
|
1887
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
1888
|
+
|
|
1889
|
+
return data;
|
|
1890
|
+
}
|
|
1891
|
+
|
|
1892
|
+
static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
|
|
1893
|
+
static const xml_tool_call_format form = ([]() {
|
|
1894
|
+
xml_tool_call_format form {};
|
|
1895
|
+
form.scope_start = "<tool_call>";
|
|
1896
|
+
form.tool_start = "<function=";
|
|
1897
|
+
form.tool_sep = ">";
|
|
1898
|
+
form.key_start = "<parameter=";
|
|
1899
|
+
form.key_val_sep = ">";
|
|
1900
|
+
form.val_end = "</parameter>";
|
|
1901
|
+
form.tool_end = "</function>";
|
|
1902
|
+
form.scope_end = "</tool_call>";
|
|
1903
|
+
form.trim_raw_argval = true;
|
|
1904
|
+
return form;
|
|
1905
|
+
})();
|
|
1906
|
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
|
1907
|
+
}
|
|
1908
|
+
|
|
1909
|
+
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1910
|
+
common_chat_params data;
|
|
1911
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1912
|
+
|
|
1913
|
+
data.prompt = apply(tmpl, params);
|
|
1914
|
+
data.format = COMMON_CHAT_FORMAT_KIMI_K2;
|
|
1915
|
+
|
|
1916
|
+
data.preserved_tokens = {
|
|
1917
|
+
"<think>",
|
|
1918
|
+
"</think>",
|
|
1919
|
+
"<|tool_calls_section_begin|>",
|
|
1920
|
+
"<|tool_call_begin|>",
|
|
1921
|
+
"<|tool_call_argument_begin|>",
|
|
1922
|
+
"<|tool_call_end|>",
|
|
1923
|
+
"<|tool_calls_section_end|>",
|
|
1924
|
+
"<|im_end|>",
|
|
1925
|
+
"<|im_system|>",
|
|
1926
|
+
"<|im_middle|>",
|
|
1927
|
+
};
|
|
1928
|
+
|
|
1929
|
+
data.additional_stops.insert(data.additional_stops.end(), {
|
|
1930
|
+
"<|im_end|>",
|
|
1931
|
+
"<|im_middle|>"
|
|
1932
|
+
});
|
|
1933
|
+
// build grammar for tool call
|
|
1934
|
+
static const xml_tool_call_format form = ([]() {
|
|
1935
|
+
xml_tool_call_format form {};
|
|
1936
|
+
form.scope_start = "<|tool_calls_section_begin|>";
|
|
1937
|
+
form.tool_start = "<|tool_call_begin|>";
|
|
1938
|
+
form.tool_sep = "<|tool_call_argument_begin|>{";
|
|
1939
|
+
form.key_start = "\"";
|
|
1940
|
+
form.key_val_sep = "\": ";
|
|
1941
|
+
form.val_end = ", ";
|
|
1942
|
+
form.tool_end = "}<|tool_call_end|>";
|
|
1943
|
+
form.scope_end = "<|tool_calls_section_end|>";
|
|
1944
|
+
form.raw_argval = false;
|
|
1945
|
+
form.last_val_end = "";
|
|
1946
|
+
return form;
|
|
1947
|
+
})();
|
|
1948
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
1949
|
+
|
|
1950
|
+
return data;
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
|
|
1954
|
+
static const xml_tool_call_format form = ([]() {
|
|
1955
|
+
xml_tool_call_format form {};
|
|
1956
|
+
form.scope_start = "<|tool_calls_section_begin|>";
|
|
1957
|
+
form.tool_start = "<|tool_call_begin|>";
|
|
1958
|
+
form.tool_sep = "<|tool_call_argument_begin|>{";
|
|
1959
|
+
form.key_start = "\"";
|
|
1960
|
+
form.key_val_sep = "\": ";
|
|
1961
|
+
form.val_end = ", ";
|
|
1962
|
+
form.tool_end = "}<|tool_call_end|>";
|
|
1963
|
+
form.scope_end = "<|tool_calls_section_end|>";
|
|
1964
|
+
form.raw_argval = false;
|
|
1965
|
+
form.last_val_end = "";
|
|
1966
|
+
return form;
|
|
1967
|
+
})();
|
|
1968
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1971
|
+
static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
1972
|
+
common_chat_params data;
|
|
1973
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1974
|
+
|
|
1975
|
+
data.prompt = apply(tmpl, params);
|
|
1976
|
+
data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
|
|
1977
|
+
|
|
1978
|
+
data.preserved_tokens = {
|
|
1979
|
+
"<thinking>",
|
|
1980
|
+
"</thinking>",
|
|
1981
|
+
"<tool_calls>",
|
|
1982
|
+
"</tool_calls>",
|
|
1983
|
+
};
|
|
1984
|
+
|
|
1985
|
+
// build grammar for tool call
|
|
1986
|
+
static const xml_tool_call_format form = ([]() {
|
|
1987
|
+
xml_tool_call_format form {};
|
|
1988
|
+
form.scope_start = "<tool_calls>[";
|
|
1989
|
+
form.tool_start = "{\"name\": \"";
|
|
1990
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
1991
|
+
form.key_start = "\"";
|
|
1992
|
+
form.key_val_sep = "\": ";
|
|
1993
|
+
form.val_end = ", ";
|
|
1994
|
+
form.tool_end = "}, ";
|
|
1995
|
+
form.scope_end = "]</tool_calls>";
|
|
1996
|
+
form.raw_argval = false;
|
|
1997
|
+
form.last_val_end = "";
|
|
1998
|
+
form.last_tool_end = "}";
|
|
1999
|
+
return form;
|
|
2000
|
+
})();
|
|
2001
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
2002
|
+
|
|
2003
|
+
return data;
|
|
2004
|
+
}
|
|
2005
|
+
|
|
2006
|
+
static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
|
|
2007
|
+
static const xml_tool_call_format form = ([]() {
|
|
2008
|
+
xml_tool_call_format form {};
|
|
2009
|
+
form.scope_start = "<tool_calls>[";
|
|
2010
|
+
form.tool_start = "{\"name\": \"";
|
|
2011
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2012
|
+
form.key_start = "\"";
|
|
2013
|
+
form.key_val_sep = "\": ";
|
|
2014
|
+
form.val_end = ", ";
|
|
2015
|
+
form.tool_end = "}, ";
|
|
2016
|
+
form.scope_end = "]</tool_calls>";
|
|
2017
|
+
form.raw_argval = false;
|
|
2018
|
+
form.last_val_end = "";
|
|
2019
|
+
form.last_tool_end = "}";
|
|
2020
|
+
return form;
|
|
2021
|
+
})();
|
|
2022
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
|
|
2026
|
+
common_chat_params data;
|
|
2027
|
+
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
2028
|
+
|
|
2029
|
+
data.prompt = apply(tmpl, params);
|
|
2030
|
+
data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
|
|
2031
|
+
|
|
2032
|
+
data.preserved_tokens = {
|
|
2033
|
+
"<tool_call>",
|
|
2034
|
+
"</tool_call>",
|
|
2035
|
+
};
|
|
2036
|
+
|
|
2037
|
+
// build grammar for tool call
|
|
2038
|
+
static const xml_tool_call_format form = ([]() {
|
|
2039
|
+
xml_tool_call_format form {};
|
|
2040
|
+
form.scope_start = "\n";
|
|
2041
|
+
form.tool_start = "<tool_call>\n{\"name\": \"";
|
|
2042
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2043
|
+
form.key_start = "\"";
|
|
2044
|
+
form.key_val_sep = "\": ";
|
|
2045
|
+
form.val_end = ", ";
|
|
2046
|
+
form.tool_end = "}\n</tool_call>";
|
|
2047
|
+
form.scope_end = "";
|
|
2048
|
+
form.raw_argval = false;
|
|
2049
|
+
form.last_val_end = "";
|
|
2050
|
+
return form;
|
|
2051
|
+
})();
|
|
2052
|
+
build_grammar_xml_tool_call(data, params.tools, form);
|
|
2053
|
+
|
|
2054
|
+
return data;
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
|
|
2058
|
+
static const xml_tool_call_format form = ([]() {
|
|
2059
|
+
xml_tool_call_format form {};
|
|
2060
|
+
form.scope_start = "";
|
|
2061
|
+
form.tool_start = "<tool_call>\n{\"name\": \"";
|
|
2062
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
2063
|
+
form.key_start = "\"";
|
|
2064
|
+
form.key_val_sep = "\": ";
|
|
2065
|
+
form.val_end = ", ";
|
|
2066
|
+
form.tool_end = "}\n</tool_call>";
|
|
2067
|
+
form.scope_end = "";
|
|
2068
|
+
form.raw_argval = false;
|
|
2069
|
+
form.last_val_end = "";
|
|
2070
|
+
return form;
|
|
2071
|
+
})();
|
|
2072
|
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
|
2073
|
+
}
|
|
2074
|
+
|
|
1797
2075
|
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1798
2076
|
common_chat_params data;
|
|
1799
2077
|
|
|
@@ -2028,6 +2306,100 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
|
|
|
2028
2306
|
}
|
|
2029
2307
|
}
|
|
2030
2308
|
|
|
2309
|
+
static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
2310
|
+
common_chat_params data;
|
|
2311
|
+
data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
2312
|
+
|
|
2313
|
+
std::string prompt = apply(tmpl, inputs);
|
|
2314
|
+
|
|
2315
|
+
// match the existing trimming behavior
|
|
2316
|
+
if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
|
|
2317
|
+
prompt.erase(0, tmpl.bos_token().size());
|
|
2318
|
+
}
|
|
2319
|
+
if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
|
|
2320
|
+
prompt.erase(prompt.size() - tmpl.eos_token().size());
|
|
2321
|
+
}
|
|
2322
|
+
if (string_ends_with(prompt, "<think>")) {
|
|
2323
|
+
if (!inputs.enable_thinking) {
|
|
2324
|
+
prompt += "</think>";
|
|
2325
|
+
} else {
|
|
2326
|
+
data.thinking_forced_open = true;
|
|
2327
|
+
}
|
|
2328
|
+
}
|
|
2329
|
+
|
|
2330
|
+
// add GLM preserved tokens
|
|
2331
|
+
data.preserved_tokens = {
|
|
2332
|
+
"<|endoftext|>",
|
|
2333
|
+
"[MASK]",
|
|
2334
|
+
"[gMASK]",
|
|
2335
|
+
"[sMASK]",
|
|
2336
|
+
"<sop>",
|
|
2337
|
+
"<eop>",
|
|
2338
|
+
"<|system|>",
|
|
2339
|
+
"<|user|>",
|
|
2340
|
+
"<|assistant|>",
|
|
2341
|
+
"<|observation|>",
|
|
2342
|
+
"<|begin_of_image|>",
|
|
2343
|
+
"<|end_of_image|>",
|
|
2344
|
+
"<|begin_of_video|>",
|
|
2345
|
+
"<|end_of_video|>",
|
|
2346
|
+
"<|begin_of_audio|>",
|
|
2347
|
+
"<|end_of_audio|>",
|
|
2348
|
+
"<|begin_of_transcription|>",
|
|
2349
|
+
"<|end_of_transcription|>",
|
|
2350
|
+
"<|code_prefix|>",
|
|
2351
|
+
"<|code_middle|>",
|
|
2352
|
+
"<|code_suffix|>",
|
|
2353
|
+
"/nothink",
|
|
2354
|
+
"<think>",
|
|
2355
|
+
"</think>",
|
|
2356
|
+
"<tool_call>",
|
|
2357
|
+
"</tool_call>",
|
|
2358
|
+
"<arg_key>",
|
|
2359
|
+
"</arg_key>",
|
|
2360
|
+
"<arg_value>",
|
|
2361
|
+
"</arg_value>"
|
|
2362
|
+
};
|
|
2363
|
+
|
|
2364
|
+
// extra GLM 4.5 stop word
|
|
2365
|
+
data.additional_stops.insert(data.additional_stops.end(), {
|
|
2366
|
+
"<|user|>",
|
|
2367
|
+
"<|observation|>"
|
|
2368
|
+
});
|
|
2369
|
+
|
|
2370
|
+
// build grammar for tool call
|
|
2371
|
+
static const xml_tool_call_format form {
|
|
2372
|
+
/* form.scope_start = */ "",
|
|
2373
|
+
/* form.tool_start = */ "\n<tool_call>",
|
|
2374
|
+
/* form.tool_sep = */ "\n",
|
|
2375
|
+
/* form.key_start = */ "<arg_key>",
|
|
2376
|
+
/* form.key_val_sep = */ "</arg_key>\n<arg_value>",
|
|
2377
|
+
/* form.val_end = */ "</arg_value>\n",
|
|
2378
|
+
/* form.tool_end = */ "</tool_call>\n",
|
|
2379
|
+
/* form.scope_end = */ "",
|
|
2380
|
+
};
|
|
2381
|
+
build_grammar_xml_tool_call(data, inputs.tools, form);
|
|
2382
|
+
|
|
2383
|
+
data.prompt = prompt;
|
|
2384
|
+
data.format = COMMON_CHAT_FORMAT_GLM_4_5;
|
|
2385
|
+
return data;
|
|
2386
|
+
}
|
|
2387
|
+
|
|
2388
|
+
static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
|
|
2389
|
+
static const xml_tool_call_format form {
|
|
2390
|
+
/* form.scope_start = */ "",
|
|
2391
|
+
/* form.tool_start = */ "<tool_call>",
|
|
2392
|
+
/* form.tool_sep = */ "",
|
|
2393
|
+
/* form.key_start = */ "<arg_key>",
|
|
2394
|
+
/* form.key_val_sep = */ "</arg_key>",
|
|
2395
|
+
/* form.val_end = */ "</arg_value>",
|
|
2396
|
+
/* form.tool_end = */ "</tool_call>",
|
|
2397
|
+
/* form.scope_end = */ "",
|
|
2398
|
+
/* form.key_val_sep2 = */ "<arg_value>",
|
|
2399
|
+
};
|
|
2400
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
2401
|
+
}
|
|
2402
|
+
|
|
2031
2403
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
2032
2404
|
LOG_DBG("%s\n", __func__);
|
|
2033
2405
|
common_chat_params data;
|
|
@@ -2691,91 +3063,17 @@ static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
|
|
|
2691
3063
|
}
|
|
2692
3064
|
|
|
2693
3065
|
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
static const common_regex function_regex("<function=([^>]+)>");
|
|
2706
|
-
static const common_regex param_regex("<parameter=([^>]+)>");
|
|
2707
|
-
|
|
2708
|
-
while (auto tool_res = builder.try_find_regex(tool_call_begin_regex)) {
|
|
2709
|
-
builder.consume_spaces(); // Consume whitespace after <seed:tool_call>
|
|
2710
|
-
|
|
2711
|
-
// Look for function call inside tool call, ignore any content before it
|
|
2712
|
-
if (auto func_res = builder.try_find_regex(function_regex, std::string::npos, false)) {
|
|
2713
|
-
auto function_name = builder.str(func_res->groups[1]);
|
|
2714
|
-
|
|
2715
|
-
// Parse Seed-OSS parameters <parameter=name>value</parameter>
|
|
2716
|
-
json args = json::object();
|
|
2717
|
-
// Parse all parameters
|
|
2718
|
-
while (auto param_res = builder.try_find_regex(param_regex, std::string::npos, false)) {
|
|
2719
|
-
// again, ignore noise around parameters
|
|
2720
|
-
auto param_name = builder.str(param_res->groups[1]);
|
|
2721
|
-
builder.move_to(param_res->groups[0].end);
|
|
2722
|
-
builder.consume_spaces(); // Consume whitespace after parameter
|
|
2723
|
-
auto savedPos = builder.pos();
|
|
2724
|
-
if (auto param_parse = builder.try_find_literal("</parameter>")) {
|
|
2725
|
-
auto param = param_parse->prelude;
|
|
2726
|
-
builder.move_to(savedPos);
|
|
2727
|
-
try {
|
|
2728
|
-
if (auto param_res = builder.try_consume_json()) {
|
|
2729
|
-
args[param_name] = param_res->json;
|
|
2730
|
-
} else {
|
|
2731
|
-
args[param_name] = param;
|
|
2732
|
-
}
|
|
2733
|
-
} catch (json::exception &) {
|
|
2734
|
-
args[param_name] = param;
|
|
2735
|
-
}
|
|
2736
|
-
} else {
|
|
2737
|
-
throw common_chat_msg_partial_exception("Incomplete tool parameter");
|
|
2738
|
-
}
|
|
2739
|
-
}
|
|
2740
|
-
// Look for closing function tag
|
|
2741
|
-
auto end_func = builder.try_find_literal("</function>");
|
|
2742
|
-
if (end_func) {
|
|
2743
|
-
builder.move_to(end_func->groups[0].end);
|
|
2744
|
-
builder.consume_spaces(); // Consume whitespace after </function>
|
|
2745
|
-
|
|
2746
|
-
// Add the tool call with parsed arguments, but only if we REALLY got the literal
|
|
2747
|
-
auto eaten_fragment = builder.input().substr(end_func->groups[0].begin, end_func->groups[0].end);
|
|
2748
|
-
auto funlen = std::string("</function>").length();
|
|
2749
|
-
if (eaten_fragment.length() >= funlen && eaten_fragment.substr(0, funlen) == std::string("</function>")) {
|
|
2750
|
-
if (!builder.add_tool_call(function_name, "", args.dump())) {
|
|
2751
|
-
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2752
|
-
}
|
|
2753
|
-
} else {
|
|
2754
|
-
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2755
|
-
}
|
|
2756
|
-
} else {
|
|
2757
|
-
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2758
|
-
}
|
|
2759
|
-
// Look for closing tool call tag
|
|
2760
|
-
if (auto end_tool = builder.try_find_regex(tool_call_end_regex, std::string::npos, false)) {
|
|
2761
|
-
builder.move_to(end_tool->groups[0].end);
|
|
2762
|
-
builder.consume_spaces(); // Consume trailing whitespace after tool call
|
|
2763
|
-
} else {
|
|
2764
|
-
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2765
|
-
}
|
|
2766
|
-
} else {
|
|
2767
|
-
// No function found - don't consume content here, let it be handled at the end
|
|
2768
|
-
break;
|
|
2769
|
-
}
|
|
2770
|
-
}
|
|
2771
|
-
|
|
2772
|
-
// Consume any remaining whitespace after all tool call processing
|
|
2773
|
-
builder.consume_spaces();
|
|
2774
|
-
auto remaining = builder.consume_rest();
|
|
2775
|
-
// If there's any non-whitespace content remaining, add it as content
|
|
2776
|
-
if (!string_strip(remaining).empty()) {
|
|
2777
|
-
builder.add_content(remaining);
|
|
2778
|
-
}
|
|
3066
|
+
static const xml_tool_call_format form {
|
|
3067
|
+
/* form.scope_start = */ "<seed:tool_call>",
|
|
3068
|
+
/* form.tool_start = */ "<function=",
|
|
3069
|
+
/* form.tool_sep = */ ">",
|
|
3070
|
+
/* form.key_start = */ "<parameter=",
|
|
3071
|
+
/* form.key_val_sep = */ ">",
|
|
3072
|
+
/* form.val_end = */ "</parameter>",
|
|
3073
|
+
/* form.tool_end = */ "</function>",
|
|
3074
|
+
/* form.scope_end = */ "</seed:tool_call>",
|
|
3075
|
+
};
|
|
3076
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
|
2779
3077
|
}
|
|
2780
3078
|
|
|
2781
3079
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
@@ -2914,6 +3212,35 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2914
3212
|
return common_chat_params_init_granite(tmpl, params);
|
|
2915
3213
|
}
|
|
2916
3214
|
|
|
3215
|
+
// GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
|
|
3216
|
+
if (src.find("[gMASK]<sop>") != std::string::npos &&
|
|
3217
|
+
src.find("<arg_key>") != std::string::npos &&
|
|
3218
|
+
src.find("<arg_value>") != std::string::npos &&
|
|
3219
|
+
params.json_schema.is_null()) {
|
|
3220
|
+
return common_chat_params_init_glm_4_5(tmpl, params);
|
|
3221
|
+
}
|
|
3222
|
+
|
|
3223
|
+
// Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
|
|
3224
|
+
// Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
|
|
3225
|
+
// Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
|
|
3226
|
+
if (src.find("<tool_call>") != std::string::npos &&
|
|
3227
|
+
src.find("<function>") != std::string::npos &&
|
|
3228
|
+
src.find("<function=") != std::string::npos &&
|
|
3229
|
+
src.find("<parameters>") != std::string::npos &&
|
|
3230
|
+
src.find("<parameter=") != std::string::npos) {
|
|
3231
|
+
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
|
|
3232
|
+
}
|
|
3233
|
+
|
|
3234
|
+
// Xiaomi MiMo format detection (must come before Hermes 2 Pro)
|
|
3235
|
+
if (src.find("<tools>") != std::string::npos &&
|
|
3236
|
+
src.find("# Tools") != std::string::npos &&
|
|
3237
|
+
src.find("</tools>") != std::string::npos &&
|
|
3238
|
+
src.find("<tool_calls>") != std::string::npos &&
|
|
3239
|
+
src.find("</tool_calls>") != std::string::npos &&
|
|
3240
|
+
src.find("<tool_response>") != std::string::npos) {
|
|
3241
|
+
return common_chat_params_init_xiaomi_mimo(tmpl, params);
|
|
3242
|
+
}
|
|
3243
|
+
|
|
2917
3244
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
2918
3245
|
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
|
2919
3246
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
|
@@ -2945,6 +3272,29 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2945
3272
|
return common_chat_params_init_lfm2(tmpl, params);
|
|
2946
3273
|
}
|
|
2947
3274
|
|
|
3275
|
+
// MiniMax-M2 format detection
|
|
3276
|
+
if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
|
|
3277
|
+
return common_chat_params_init_minimax_m2(tmpl, params);
|
|
3278
|
+
}
|
|
3279
|
+
|
|
3280
|
+
// Kimi K2 format detection
|
|
3281
|
+
if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
|
|
3282
|
+
src.find("<|tool_calls_section_begin|>") != std::string::npos &&
|
|
3283
|
+
src.find("## Return of") != std::string::npos) {
|
|
3284
|
+
return common_chat_params_init_kimi_k2(tmpl, params);
|
|
3285
|
+
}
|
|
3286
|
+
|
|
3287
|
+
// Apriel 1.5 format detection
|
|
3288
|
+
if (src.find("<thinking>") != std::string::npos &&
|
|
3289
|
+
src.find("</thinking>") != std::string::npos &&
|
|
3290
|
+
src.find("<available_tools>") != std::string::npos &&
|
|
3291
|
+
src.find("<|assistant|>") != std::string::npos &&
|
|
3292
|
+
src.find("<|tool_result|>") != std::string::npos &&
|
|
3293
|
+
src.find("<tool_calls>[") != std::string::npos &&
|
|
3294
|
+
src.find("]</tool_calls>") != std::string::npos) {
|
|
3295
|
+
return common_chat_params_init_apriel_1_5(tmpl, params);
|
|
3296
|
+
}
|
|
3297
|
+
|
|
2948
3298
|
// Use generic handler when mixing tools + JSON schema.
|
|
2949
3299
|
// TODO: support that mix in handlers below.
|
|
2950
3300
|
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
|
@@ -2996,7 +3346,7 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
|
2996
3346
|
const struct common_chat_templates * tmpls,
|
|
2997
3347
|
const struct common_chat_templates_inputs & inputs)
|
|
2998
3348
|
{
|
|
2999
|
-
|
|
3349
|
+
size_t alloc_size = 0;
|
|
3000
3350
|
std::vector<llama_chat_message> chat;
|
|
3001
3351
|
std::vector<std::string> contents;
|
|
3002
3352
|
|
|
@@ -3018,7 +3368,8 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
|
3018
3368
|
const auto & msg = inputs.messages[i];
|
|
3019
3369
|
const auto & content = contents[i];
|
|
3020
3370
|
chat.push_back({msg.role.c_str(), content.c_str()});
|
|
3021
|
-
|
|
3371
|
+
size_t msg_size = msg.role.size() + content.size();
|
|
3372
|
+
alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
|
|
3022
3373
|
}
|
|
3023
3374
|
|
|
3024
3375
|
std::vector<char> buf(alloc_size);
|
|
@@ -3040,6 +3391,11 @@ static common_chat_params common_chat_templates_apply_legacy(
|
|
|
3040
3391
|
res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
|
|
3041
3392
|
}
|
|
3042
3393
|
|
|
3394
|
+
// for safety, we check the result again
|
|
3395
|
+
if (res < 0 || (size_t) res > buf.size()) {
|
|
3396
|
+
throw std::runtime_error("failed to apply chat template, try using --jinja");
|
|
3397
|
+
}
|
|
3398
|
+
|
|
3043
3399
|
common_chat_params params;
|
|
3044
3400
|
params.prompt = std::string(buf.data(), res);
|
|
3045
3401
|
if (!inputs.json_schema.empty()) {
|
|
@@ -3126,6 +3482,24 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
3126
3482
|
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
|
|
3127
3483
|
common_chat_parse_lfm2(builder);
|
|
3128
3484
|
break;
|
|
3485
|
+
case COMMON_CHAT_FORMAT_MINIMAX_M2:
|
|
3486
|
+
common_chat_parse_minimax_m2(builder);
|
|
3487
|
+
break;
|
|
3488
|
+
case COMMON_CHAT_FORMAT_GLM_4_5:
|
|
3489
|
+
common_chat_parse_glm_4_5(builder);
|
|
3490
|
+
break;
|
|
3491
|
+
case COMMON_CHAT_FORMAT_KIMI_K2:
|
|
3492
|
+
common_chat_parse_kimi_k2(builder);
|
|
3493
|
+
break;
|
|
3494
|
+
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
|
|
3495
|
+
common_chat_parse_qwen3_coder_xml(builder);
|
|
3496
|
+
break;
|
|
3497
|
+
case COMMON_CHAT_FORMAT_APRIEL_1_5:
|
|
3498
|
+
common_chat_parse_apriel_1_5(builder);
|
|
3499
|
+
break;
|
|
3500
|
+
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
|
3501
|
+
common_chat_parse_xiaomi_mimo(builder);
|
|
3502
|
+
break;
|
|
3129
3503
|
default:
|
|
3130
3504
|
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
|
3131
3505
|
}
|
|
@@ -128,6 +128,12 @@ enum common_chat_format {
|
|
|
128
128
|
COMMON_CHAT_FORMAT_NEMOTRON_V2,
|
|
129
129
|
COMMON_CHAT_FORMAT_APERTUS,
|
|
130
130
|
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
|
|
131
|
+
COMMON_CHAT_FORMAT_GLM_4_5,
|
|
132
|
+
COMMON_CHAT_FORMAT_MINIMAX_M2,
|
|
133
|
+
COMMON_CHAT_FORMAT_KIMI_K2,
|
|
134
|
+
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
|
|
135
|
+
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
|
136
|
+
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
|
131
137
|
|
|
132
138
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
133
139
|
};
|
|
@@ -26,7 +26,6 @@
|
|
|
26
26
|
#include <sstream>
|
|
27
27
|
#include <string>
|
|
28
28
|
#include <thread>
|
|
29
|
-
#include <unordered_map>
|
|
30
29
|
#include <unordered_set>
|
|
31
30
|
#include <vector>
|
|
32
31
|
|
|
@@ -60,6 +59,14 @@
|
|
|
60
59
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
|
61
60
|
#endif
|
|
62
61
|
|
|
62
|
+
common_time_meas::common_time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
|
|
63
|
+
|
|
64
|
+
common_time_meas::~common_time_meas() {
|
|
65
|
+
if (t_start_us >= 0) {
|
|
66
|
+
t_acc += ggml_time_us() - t_start_us;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
63
70
|
//
|
|
64
71
|
// CPU utils
|
|
65
72
|
//
|