@fugood/llama.node 1.3.0-rc.6 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +12 -2
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +8 -9
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +39 -1001
- package/src/llama.cpp/common/arg.h +2 -2
- package/src/llama.cpp/common/chat.cpp +216 -2
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +33 -0
- package/src/llama.cpp/common/common.h +13 -0
- package/src/llama.cpp/common/download.cpp +1054 -0
- package/src/llama.cpp/common/download.h +55 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +19 -3
- package/src/llama.cpp/ggml/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
- package/src/llama.cpp/ggml/include/ggml.h +2 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +7 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +10 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +0 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -35
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
- package/src/llama.cpp/include/llama.h +7 -3
- package/src/llama.cpp/src/CMakeLists.txt +95 -0
- package/src/llama.cpp/src/llama-arch.cpp +108 -0
- package/src/llama.cpp/src/llama-arch.h +11 -0
- package/src/llama.cpp/src/llama-batch.cpp +63 -31
- package/src/llama.cpp/src/llama-batch.h +12 -1
- package/src/llama.cpp/src/llama-chat.cpp +32 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +44 -16
- package/src/llama.cpp/src/llama-context.h +5 -5
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +12 -7
- package/src/llama.cpp/src/llama-hparams.cpp +11 -1
- package/src/llama.cpp/src/llama-hparams.h +6 -0
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +56 -21
- package/src/llama.cpp/src/llama-kv-cache.h +2 -4
- package/src/llama.cpp/src/llama-kv-cells.h +44 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +18 -14
- package/src/llama.cpp/src/llama-memory-recurrent.h +2 -2
- package/src/llama.cpp/src/llama-model.cpp +350 -13194
- package/src/llama.cpp/src/llama-model.h +9 -2
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/apertus.cpp +125 -0
- package/src/llama.cpp/src/models/arcee.cpp +135 -0
- package/src/llama.cpp/src/models/arctic.cpp +138 -0
- package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
- package/src/llama.cpp/src/models/baichuan.cpp +122 -0
- package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
- package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
- package/src/llama.cpp/src/models/bert.cpp +176 -0
- package/src/llama.cpp/src/models/bitnet.cpp +160 -0
- package/src/llama.cpp/src/models/bloom.cpp +101 -0
- package/src/llama.cpp/src/models/chameleon.cpp +178 -0
- package/src/llama.cpp/src/models/chatglm.cpp +132 -0
- package/src/llama.cpp/src/models/codeshell.cpp +111 -0
- package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/command-r.cpp +122 -0
- package/src/llama.cpp/src/models/dbrx.cpp +123 -0
- package/src/llama.cpp/src/models/deci.cpp +135 -0
- package/src/llama.cpp/src/models/deepseek.cpp +144 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
- package/src/llama.cpp/src/models/dots1.cpp +134 -0
- package/src/llama.cpp/src/models/dream.cpp +105 -0
- package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/src/llama.cpp/src/models/ernie4-5.cpp +111 -0
- package/src/llama.cpp/src/models/exaone.cpp +114 -0
- package/src/llama.cpp/src/models/exaone4.cpp +123 -0
- package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
- package/src/llama.cpp/src/models/falcon.cpp +120 -0
- package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
- package/src/llama.cpp/src/models/gemma.cpp +112 -0
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
- package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
- package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
- package/src/llama.cpp/src/models/glm4.cpp +127 -0
- package/src/llama.cpp/src/models/gpt2.cpp +105 -0
- package/src/llama.cpp/src/models/gptneox.cpp +144 -0
- package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
- package/src/llama.cpp/src/models/granite.cpp +211 -0
- package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
- package/src/llama.cpp/src/models/grok.cpp +159 -0
- package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
- package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
- package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
- package/src/llama.cpp/src/models/internlm2.cpp +120 -0
- package/src/llama.cpp/src/models/jais.cpp +86 -0
- package/src/llama.cpp/src/models/jamba.cpp +106 -0
- package/src/llama.cpp/src/models/lfm2.cpp +173 -0
- package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
- package/src/llama.cpp/src/models/llada.cpp +99 -0
- package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
- package/src/llama.cpp/src/models/llama.cpp +155 -0
- package/src/llama.cpp/src/models/mamba.cpp +55 -0
- package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
- package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
- package/src/llama.cpp/src/models/models.h +481 -0
- package/src/llama.cpp/src/models/mpt.cpp +126 -0
- package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
- package/src/llama.cpp/src/models/nemotron.cpp +122 -0
- package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
- package/src/llama.cpp/src/models/olmo.cpp +121 -0
- package/src/llama.cpp/src/models/olmo2.cpp +150 -0
- package/src/llama.cpp/src/models/olmoe.cpp +124 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +123 -0
- package/src/llama.cpp/src/models/openelm.cpp +124 -0
- package/src/llama.cpp/src/models/orion.cpp +123 -0
- package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
- package/src/llama.cpp/src/models/phi2.cpp +121 -0
- package/src/llama.cpp/src/models/phi3.cpp +152 -0
- package/src/llama.cpp/src/models/plamo.cpp +110 -0
- package/src/llama.cpp/src/models/plamo2.cpp +316 -0
- package/src/llama.cpp/src/models/plm.cpp +168 -0
- package/src/llama.cpp/src/models/qwen.cpp +108 -0
- package/src/llama.cpp/src/models/qwen2.cpp +117 -0
- package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
- package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
- package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
- package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
- package/src/llama.cpp/src/models/refact.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
- package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
- package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
- package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
- package/src/llama.cpp/src/models/smollm3.cpp +128 -0
- package/src/llama.cpp/src/models/stablelm.cpp +146 -0
- package/src/llama.cpp/src/models/starcoder.cpp +100 -0
- package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
- package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
- package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
- package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/src/llama.cpp/src/models/xverse.cpp +108 -0
|
@@ -59,8 +59,8 @@ struct common_arg {
|
|
|
59
59
|
common_arg & set_sparam();
|
|
60
60
|
bool in_example(enum llama_example ex);
|
|
61
61
|
bool is_exclude(enum llama_example ex);
|
|
62
|
-
bool get_value_from_env(std::string & output);
|
|
63
|
-
bool has_value_from_env();
|
|
62
|
+
bool get_value_from_env(std::string & output) const;
|
|
63
|
+
bool has_value_from_env() const;
|
|
64
64
|
std::string to_string();
|
|
65
65
|
};
|
|
66
66
|
|
|
@@ -6,8 +6,11 @@
|
|
|
6
6
|
#include "log.h"
|
|
7
7
|
#include "regex-partial.h"
|
|
8
8
|
|
|
9
|
+
#include <algorithm>
|
|
9
10
|
#include <cstdio>
|
|
11
|
+
#include <cctype>
|
|
10
12
|
#include <exception>
|
|
13
|
+
#include <functional>
|
|
11
14
|
#include <iostream>
|
|
12
15
|
#include <optional>
|
|
13
16
|
#include <stdexcept>
|
|
@@ -297,7 +300,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
|
|
|
297
300
|
}
|
|
298
301
|
if (!msg.reasoning_content.empty()) {
|
|
299
302
|
jmsg["reasoning_content"] = msg.reasoning_content;
|
|
300
|
-
jmsg["thinking"] = msg.reasoning_content; // gpt-oss
|
|
301
303
|
}
|
|
302
304
|
if (!msg.tool_name.empty()) {
|
|
303
305
|
jmsg["name"] = msg.tool_name;
|
|
@@ -627,6 +629,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
|
627
629
|
case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
|
|
628
630
|
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
|
|
629
631
|
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
|
|
632
|
+
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
|
|
630
633
|
default:
|
|
631
634
|
throw std::runtime_error("Unknown chat format");
|
|
632
635
|
}
|
|
@@ -794,6 +797,7 @@ static std::string apply(
|
|
|
794
797
|
if (additional_context) {
|
|
795
798
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
796
799
|
}
|
|
800
|
+
// TODO: add flag to control date/time, if only for testing purposes.
|
|
797
801
|
tmpl_inputs.now = inputs.now;
|
|
798
802
|
|
|
799
803
|
minja::chat_template_options tmpl_opts;
|
|
@@ -972,6 +976,126 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
|
|
|
972
976
|
return data;
|
|
973
977
|
}
|
|
974
978
|
|
|
979
|
+
|
|
980
|
+
// Case-insensitive find
|
|
981
|
+
static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
|
|
982
|
+
auto it = std::search(
|
|
983
|
+
haystack.begin() + pos, haystack.end(),
|
|
984
|
+
needle.begin(), needle.end(),
|
|
985
|
+
[](char a, char b) { return std::tolower(a) == std::tolower(b); }
|
|
986
|
+
);
|
|
987
|
+
return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
991
|
+
common_chat_params data;
|
|
992
|
+
const auto is_json_schema_provided = !inputs.json_schema.is_null();
|
|
993
|
+
const auto is_grammar_provided = !inputs.grammar.empty();
|
|
994
|
+
const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
|
|
995
|
+
|
|
996
|
+
// the logic requires potentially modifying the messages
|
|
997
|
+
auto tweaked_messages = inputs.messages;
|
|
998
|
+
|
|
999
|
+
auto replace_json_schema_marker = [](json & messages) -> bool {
|
|
1000
|
+
static std::string marker1 = "force json schema.\n";
|
|
1001
|
+
static std::string marker2 = "force json schema.";
|
|
1002
|
+
|
|
1003
|
+
if (messages.empty() || messages.at(0).at("role") != "system") {
|
|
1004
|
+
return false;
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
std::string content = messages.at(0).at("content");
|
|
1008
|
+
|
|
1009
|
+
for (const auto & marker : {marker1, marker2}) {
|
|
1010
|
+
const auto pos = ifind_string(content, marker);
|
|
1011
|
+
if (pos != std::string::npos) {
|
|
1012
|
+
content.replace(pos, marker.length(), "");
|
|
1013
|
+
// inject modified content back into the messages
|
|
1014
|
+
messages.at(0).at("content") = content;
|
|
1015
|
+
return true;
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
return false;
|
|
1020
|
+
};
|
|
1021
|
+
|
|
1022
|
+
// Lfm2 model does not natively work with json, but can generally understand the tools structure
|
|
1023
|
+
//
|
|
1024
|
+
// Example of the pytorch dialog structure:
|
|
1025
|
+
// <|startoftext|><|im_start|>system
|
|
1026
|
+
// List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
|
|
1027
|
+
// <|im_start|>user
|
|
1028
|
+
// What is the current status of candidate ID 12345?<|im_end|>
|
|
1029
|
+
// <|im_start|>assistant
|
|
1030
|
+
// <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
|
|
1031
|
+
// <|im_start|>tool
|
|
1032
|
+
// <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
|
|
1033
|
+
// <|im_start|>assistant
|
|
1034
|
+
// The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
|
|
1035
|
+
//
|
|
1036
|
+
// For the llama server compatibility with json tools semantic,
|
|
1037
|
+
// the client can add "Follow json schema." line into the system message prompt to force the json output.
|
|
1038
|
+
//
|
|
1039
|
+
if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
|
|
1040
|
+
// server/utils.hpp prohibits that branch for the custom grammar anyways
|
|
1041
|
+
throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
|
|
1042
|
+
} else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
|
|
1043
|
+
LOG_INF("%s: Using tools to build a grammar\n", __func__);
|
|
1044
|
+
|
|
1045
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1046
|
+
auto schemas = json::array();
|
|
1047
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1048
|
+
const auto & function = tool.at("function");
|
|
1049
|
+
schemas.push_back({
|
|
1050
|
+
{"type", "object"},
|
|
1051
|
+
{"properties", {
|
|
1052
|
+
{"name", {
|
|
1053
|
+
{"type", "string"},
|
|
1054
|
+
{"const", function.at("name")},
|
|
1055
|
+
}},
|
|
1056
|
+
{"arguments", function.at("parameters")},
|
|
1057
|
+
}},
|
|
1058
|
+
{"required", json::array({"name", "arguments", "id"})},
|
|
1059
|
+
});
|
|
1060
|
+
});
|
|
1061
|
+
auto schema = json {
|
|
1062
|
+
{"type", "array"},
|
|
1063
|
+
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
|
1064
|
+
{"minItems", 1},
|
|
1065
|
+
};
|
|
1066
|
+
if (!inputs.parallel_tool_calls) {
|
|
1067
|
+
schema["maxItems"] = 1;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
|
|
1071
|
+
});
|
|
1072
|
+
// model has no concept of tool selection mode choice,
|
|
1073
|
+
// if the system prompt rendered correctly it will produce a tool call
|
|
1074
|
+
// the grammar goes inside the tool call body
|
|
1075
|
+
data.grammar_lazy = true;
|
|
1076
|
+
data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
|
|
1077
|
+
data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
|
|
1078
|
+
data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
|
|
1079
|
+
} else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
|
|
1080
|
+
LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
|
|
1081
|
+
// output those tokens
|
|
1082
|
+
data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
|
|
1083
|
+
} else if (is_json_schema_provided) {
|
|
1084
|
+
LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
|
|
1085
|
+
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
|
1086
|
+
} else if (is_grammar_provided) {
|
|
1087
|
+
LOG_INF("%s: Using provided grammar\n", __func__);
|
|
1088
|
+
data.grammar = inputs.grammar;
|
|
1089
|
+
} else {
|
|
1090
|
+
LOG_INF("%s: Using content relying on the template\n", __func__);
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
|
|
1094
|
+
LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
|
|
1095
|
+
|
|
1096
|
+
return data;
|
|
1097
|
+
}
|
|
1098
|
+
|
|
975
1099
|
static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
976
1100
|
common_chat_params data;
|
|
977
1101
|
data.prompt = apply(tmpl, inputs);
|
|
@@ -1672,7 +1796,23 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
|
|
1672
1796
|
|
|
1673
1797
|
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1674
1798
|
common_chat_params data;
|
|
1675
|
-
|
|
1799
|
+
|
|
1800
|
+
// Copy reasoning to the "thinking" field as expected by the gpt-oss template
|
|
1801
|
+
auto adjusted_messages = json::array();
|
|
1802
|
+
for (const auto & msg : inputs.messages) {
|
|
1803
|
+
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
|
1804
|
+
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
|
1805
|
+
|
|
1806
|
+
if (has_reasoning_content && has_tool_calls) {
|
|
1807
|
+
auto adjusted_message = msg;
|
|
1808
|
+
adjusted_message["thinking"] = msg.at("reasoning_content");
|
|
1809
|
+
adjusted_messages.push_back(adjusted_message);
|
|
1810
|
+
} else {
|
|
1811
|
+
adjusted_messages.push_back(msg);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
|
|
1676
1816
|
|
|
1677
1817
|
// Check if we need to replace the return token with end token during
|
|
1678
1818
|
// inference and without generation prompt. For more details see:
|
|
@@ -2485,6 +2625,71 @@ static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
|
|
|
2485
2625
|
builder.add_content(builder.consume_rest());
|
|
2486
2626
|
}
|
|
2487
2627
|
|
|
2628
|
+
|
|
2629
|
+
static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
|
|
2630
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
2631
|
+
builder.add_content(builder.consume_rest());
|
|
2632
|
+
return;
|
|
2633
|
+
}
|
|
2634
|
+
|
|
2635
|
+
// LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
|
|
2636
|
+
static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
|
|
2637
|
+
static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
|
|
2638
|
+
|
|
2639
|
+
// Loop through all tool calls
|
|
2640
|
+
while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
|
|
2641
|
+
builder.move_to(res->groups[0].end);
|
|
2642
|
+
|
|
2643
|
+
// Parse JSON array format: [{"name": "...", "arguments": {...}}]
|
|
2644
|
+
auto tool_calls_data = builder.consume_json();
|
|
2645
|
+
|
|
2646
|
+
// Consume end marker
|
|
2647
|
+
builder.consume_spaces();
|
|
2648
|
+
if (!builder.try_consume_regex(tool_call_end_regex)) {
|
|
2649
|
+
throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
|
|
2650
|
+
}
|
|
2651
|
+
|
|
2652
|
+
// Process each tool call in the array
|
|
2653
|
+
if (tool_calls_data.json.is_array()) {
|
|
2654
|
+
for (const auto & tool_call : tool_calls_data.json) {
|
|
2655
|
+
if (!tool_call.is_object()) {
|
|
2656
|
+
throw common_chat_msg_partial_exception("Tool call must be an object");
|
|
2657
|
+
}
|
|
2658
|
+
|
|
2659
|
+
if (!tool_call.contains("name")) {
|
|
2660
|
+
throw common_chat_msg_partial_exception("Tool call missing 'name' field");
|
|
2661
|
+
}
|
|
2662
|
+
|
|
2663
|
+
std::string function_name = tool_call.at("name");
|
|
2664
|
+
std::string arguments = "{}";
|
|
2665
|
+
|
|
2666
|
+
if (tool_call.contains("arguments")) {
|
|
2667
|
+
if (tool_call.at("arguments").is_object()) {
|
|
2668
|
+
arguments = tool_call.at("arguments").dump();
|
|
2669
|
+
} else if (tool_call.at("arguments").is_string()) {
|
|
2670
|
+
arguments = tool_call.at("arguments");
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
|
|
2674
|
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
|
2675
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
2676
|
+
}
|
|
2677
|
+
}
|
|
2678
|
+
} else {
|
|
2679
|
+
throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
|
|
2680
|
+
}
|
|
2681
|
+
|
|
2682
|
+
// Consume any trailing whitespace after this tool call
|
|
2683
|
+
builder.consume_spaces();
|
|
2684
|
+
}
|
|
2685
|
+
|
|
2686
|
+
// Consume any remaining content after all tool calls
|
|
2687
|
+
auto remaining = builder.consume_rest();
|
|
2688
|
+
if (!string_strip(remaining).empty()) {
|
|
2689
|
+
builder.add_content(remaining);
|
|
2690
|
+
}
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2488
2693
|
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
|
2489
2694
|
// Parse thinking tags first - this handles the main reasoning content
|
|
2490
2695
|
builder.try_parse_reasoning("<seed:think>", "</seed:think>");
|
|
@@ -2734,6 +2939,12 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2734
2939
|
return common_chat_params_init_apertus(tmpl, params);
|
|
2735
2940
|
}
|
|
2736
2941
|
|
|
2942
|
+
// LFM2 (w/ tools)
|
|
2943
|
+
if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
|
|
2944
|
+
src.find("]<|tool_list_end|>") != std::string::npos) {
|
|
2945
|
+
return common_chat_params_init_lfm2(tmpl, params);
|
|
2946
|
+
}
|
|
2947
|
+
|
|
2737
2948
|
// Use generic handler when mixing tools + JSON schema.
|
|
2738
2949
|
// TODO: support that mix in handlers below.
|
|
2739
2950
|
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
|
@@ -2912,6 +3123,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
2912
3123
|
case COMMON_CHAT_FORMAT_APERTUS:
|
|
2913
3124
|
common_chat_parse_apertus(builder);
|
|
2914
3125
|
break;
|
|
3126
|
+
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
|
|
3127
|
+
common_chat_parse_lfm2(builder);
|
|
3128
|
+
break;
|
|
2915
3129
|
default:
|
|
2916
3130
|
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
|
2917
3131
|
}
|
|
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|
|
908
908
|
return cache_directory + filename;
|
|
909
909
|
}
|
|
910
910
|
|
|
911
|
+
std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|
912
|
+
std::vector<common_file_info> files;
|
|
913
|
+
if (path.empty()) return files;
|
|
914
|
+
|
|
915
|
+
std::filesystem::path dir(path);
|
|
916
|
+
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
|
917
|
+
return files;
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
for (const auto & entry : std::filesystem::directory_iterator(dir)) {
|
|
921
|
+
try {
|
|
922
|
+
// Only include regular files (skip directories)
|
|
923
|
+
const auto & p = entry.path();
|
|
924
|
+
if (std::filesystem::is_regular_file(p)) {
|
|
925
|
+
common_file_info info;
|
|
926
|
+
info.path = p.string();
|
|
927
|
+
info.name = p.filename().string();
|
|
928
|
+
try {
|
|
929
|
+
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
|
930
|
+
} catch (const std::filesystem::filesystem_error &) {
|
|
931
|
+
info.size = 0;
|
|
932
|
+
}
|
|
933
|
+
files.push_back(std::move(info));
|
|
934
|
+
}
|
|
935
|
+
} catch (const std::filesystem::filesystem_error &) {
|
|
936
|
+
// skip entries we cannot inspect
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
return files;
|
|
942
|
+
}
|
|
943
|
+
|
|
911
944
|
|
|
912
945
|
//
|
|
913
946
|
// Model utils
|
|
@@ -407,6 +407,8 @@ struct common_params {
|
|
|
407
407
|
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
|
408
408
|
bool no_mmproj = false; // explicitly disable multimodal model
|
|
409
409
|
std::vector<std::string> image; // path to image file(s)
|
|
410
|
+
int image_min_tokens = -1;
|
|
411
|
+
int image_max_tokens = -1;
|
|
410
412
|
|
|
411
413
|
// finetune
|
|
412
414
|
struct lr_opt lr;
|
|
@@ -506,6 +508,10 @@ struct common_params {
|
|
|
506
508
|
// return false from callback to abort model loading or true to continue
|
|
507
509
|
llama_progress_callback load_progress_callback = NULL;
|
|
508
510
|
void * load_progress_callback_user_data = NULL;
|
|
511
|
+
|
|
512
|
+
bool has_speculative() const {
|
|
513
|
+
return !speculative.model.path.empty() || !speculative.model.hf_repo.empty();
|
|
514
|
+
}
|
|
509
515
|
};
|
|
510
516
|
|
|
511
517
|
// call once at the start of a program if it uses libcommon
|
|
@@ -606,6 +612,13 @@ bool fs_create_directory_with_parents(const std::string & path);
|
|
|
606
612
|
std::string fs_get_cache_directory();
|
|
607
613
|
std::string fs_get_cache_file(const std::string & filename);
|
|
608
614
|
|
|
615
|
+
struct common_file_info {
|
|
616
|
+
std::string path;
|
|
617
|
+
std::string name;
|
|
618
|
+
size_t size = 0; // in bytes
|
|
619
|
+
};
|
|
620
|
+
std::vector<common_file_info> fs_list_files(const std::string & path);
|
|
621
|
+
|
|
609
622
|
//
|
|
610
623
|
// Model utils
|
|
611
624
|
//
|