@fugood/llama.node 1.4.2 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -1
- package/lib/binding.js +3 -0
- package/lib/binding.ts +10 -0
- package/lib/index.js +9 -0
- package/lib/index.ts +10 -0
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +25 -11
- package/src/LlamaContext.cpp +24 -0
- package/src/LlamaContext.h +3 -0
- package/src/llama.cpp/CMakeLists.txt +21 -6
- package/src/llama.cpp/common/CMakeLists.txt +6 -0
- package/src/llama.cpp/common/arg.cpp +83 -22
- package/src/llama.cpp/common/chat-parser.cpp +40 -0
- package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
- package/src/llama.cpp/common/chat-peg-parser.h +105 -0
- package/src/llama.cpp/common/chat.cpp +40 -29
- package/src/llama.cpp/common/chat.h +10 -1
- package/src/llama.cpp/common/common.cpp +70 -7
- package/src/llama.cpp/common/common.h +23 -5
- package/src/llama.cpp/common/download.cpp +18 -8
- package/src/llama.cpp/common/download.h +3 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +18 -27
- package/src/llama.cpp/common/log.h +19 -12
- package/src/llama.cpp/common/peg-parser.cpp +1712 -0
- package/src/llama.cpp/common/peg-parser.h +459 -0
- package/src/llama.cpp/common/unicode.cpp +64 -0
- package/src/llama.cpp/common/unicode.h +22 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +52 -48
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -2
- package/src/llama.cpp/ggml/include/ggml-zendnn.h +22 -0
- package/src/llama.cpp/ggml/include/ggml.h +29 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +51 -125
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +98 -12
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +30 -1
- package/src/llama.cpp/src/llama-arch.h +3 -0
- package/src/llama.cpp/src/llama-graph.cpp +3 -6
- package/src/llama.cpp/src/llama-hparams.h +2 -2
- package/src/llama.cpp/src/llama-impl.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +54 -6
- package/src/llama.cpp/src/llama-quant.cpp +0 -29
- package/src/llama.cpp/src/llama-vocab.cpp +1 -2
- package/src/llama.cpp/src/models/deepseek2.cpp +18 -0
- package/src/llama.cpp/src/models/mistral3.cpp +160 -0
- package/src/llama.cpp/src/models/models.h +4 -0
- package/src/llama.cpp/src/unicode.cpp +2 -2
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#include "chat-peg-parser.h"
|
|
2
|
+
|
|
3
|
+
static std::string_view trim_trailing_space(std::string_view sv) {
|
|
4
|
+
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
|
|
5
|
+
sv.remove_suffix(1);
|
|
6
|
+
}
|
|
7
|
+
return sv;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
|
|
11
|
+
arena.visit(result, [this](const common_peg_ast_node & node) {
|
|
12
|
+
map(node);
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
|
17
|
+
bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
|
|
18
|
+
bool is_content = node.tag == common_chat_peg_builder::CONTENT;
|
|
19
|
+
|
|
20
|
+
if (is_reasoning) {
|
|
21
|
+
result.reasoning_content = std::string(trim_trailing_space(node.text));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (is_content) {
|
|
25
|
+
result.content = std::string(trim_trailing_space(node.text));
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
|
|
30
|
+
common_chat_peg_mapper::map(node);
|
|
31
|
+
|
|
32
|
+
bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
|
|
33
|
+
bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
|
|
34
|
+
bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
|
|
35
|
+
bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
|
|
36
|
+
|
|
37
|
+
if (is_tool_open) {
|
|
38
|
+
result.tool_calls.emplace_back();
|
|
39
|
+
current_tool = &result.tool_calls.back();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (is_tool_id && current_tool) {
|
|
43
|
+
current_tool->id = std::string(trim_trailing_space(node.text));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (is_tool_name && current_tool) {
|
|
47
|
+
current_tool->name = std::string(trim_trailing_space(node.text));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (is_tool_args && current_tool) {
|
|
51
|
+
current_tool->arguments = std::string(trim_trailing_space(node.text));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
|
56
|
+
common_chat_peg_mapper::map(node);
|
|
57
|
+
|
|
58
|
+
bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
|
|
59
|
+
bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
|
|
60
|
+
bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
|
|
61
|
+
bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
|
|
62
|
+
bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
|
|
63
|
+
bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
|
|
64
|
+
bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
|
|
65
|
+
bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
|
|
66
|
+
|
|
67
|
+
if (is_tool_open) {
|
|
68
|
+
result.tool_calls.emplace_back();
|
|
69
|
+
current_tool = &result.tool_calls.back();
|
|
70
|
+
arg_count = 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (is_tool_name) {
|
|
74
|
+
current_tool->name = std::string(node.text);
|
|
75
|
+
current_tool->arguments = "{";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (is_arg_open) {
|
|
79
|
+
needs_closing_quote = false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (is_arg_name && current_tool) {
|
|
83
|
+
if (arg_count > 0) {
|
|
84
|
+
current_tool->arguments += ",";
|
|
85
|
+
}
|
|
86
|
+
current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
|
|
87
|
+
++arg_count;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (is_arg_string && current_tool) {
|
|
91
|
+
// Serialize to JSON, but exclude the end quote
|
|
92
|
+
std::string dumped = json(node.text).dump();
|
|
93
|
+
current_tool->arguments += dumped.substr(0, dumped.size() - 1);
|
|
94
|
+
needs_closing_quote = true;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (is_arg_close && current_tool) {
|
|
98
|
+
if (needs_closing_quote) {
|
|
99
|
+
current_tool->arguments += "\"";
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (is_arg_json && current_tool) {
|
|
104
|
+
current_tool->arguments += std::string(trim_trailing_space(node.text));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (is_tool_close && current_tool) {
|
|
108
|
+
current_tool->arguments += "}";
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "chat.h"
|
|
4
|
+
#include "peg-parser.h"
|
|
5
|
+
|
|
6
|
+
class common_chat_peg_builder : public common_peg_parser_builder {
|
|
7
|
+
public:
|
|
8
|
+
static constexpr const char * REASONING_BLOCK = "reasoning-block";
|
|
9
|
+
static constexpr const char * REASONING = "reasoning";
|
|
10
|
+
static constexpr const char * CONTENT = "content";
|
|
11
|
+
|
|
12
|
+
common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
|
|
13
|
+
common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
|
|
14
|
+
common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
|
|
18
|
+
common_chat_peg_builder builder;
|
|
19
|
+
builder.set_root(fn(builder));
|
|
20
|
+
return builder.build();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
class common_chat_peg_mapper {
|
|
24
|
+
public:
|
|
25
|
+
common_chat_msg & result;
|
|
26
|
+
|
|
27
|
+
common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
|
|
28
|
+
|
|
29
|
+
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
|
|
30
|
+
virtual void map(const common_peg_ast_node & node);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
class common_chat_peg_native_builder : public common_chat_peg_builder {
|
|
34
|
+
public:
|
|
35
|
+
static constexpr const char * TOOL = "tool";
|
|
36
|
+
static constexpr const char * TOOL_OPEN = "tool-open";
|
|
37
|
+
static constexpr const char * TOOL_CLOSE = "tool-close";
|
|
38
|
+
static constexpr const char * TOOL_ID = "tool-id";
|
|
39
|
+
static constexpr const char * TOOL_NAME = "tool-name";
|
|
40
|
+
static constexpr const char * TOOL_ARGS = "tool-args";
|
|
41
|
+
|
|
42
|
+
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
|
43
|
+
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
|
44
|
+
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
|
45
|
+
common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
|
|
46
|
+
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
|
47
|
+
common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
class common_chat_peg_native_mapper : public common_chat_peg_mapper {
|
|
51
|
+
common_chat_tool_call * current_tool;
|
|
52
|
+
|
|
53
|
+
public:
|
|
54
|
+
common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
|
55
|
+
|
|
56
|
+
void map(const common_peg_ast_node & node) override;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
|
|
60
|
+
common_chat_peg_native_builder builder;
|
|
61
|
+
builder.set_root(fn(builder));
|
|
62
|
+
return builder.build();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
class common_chat_peg_constructed_builder : public common_chat_peg_builder {
|
|
66
|
+
public:
|
|
67
|
+
static constexpr const char * TOOL = "tool";
|
|
68
|
+
static constexpr const char * TOOL_OPEN = "tool-open";
|
|
69
|
+
static constexpr const char * TOOL_CLOSE = "tool-close";
|
|
70
|
+
static constexpr const char * TOOL_NAME = "tool-name";
|
|
71
|
+
static constexpr const char * TOOL_ARG = "tool-arg";
|
|
72
|
+
static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
|
|
73
|
+
static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
|
|
74
|
+
static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
|
|
75
|
+
static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
|
|
76
|
+
static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
|
|
77
|
+
|
|
78
|
+
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
|
79
|
+
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
|
80
|
+
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
|
81
|
+
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
|
82
|
+
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
|
|
83
|
+
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
|
|
84
|
+
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
|
|
85
|
+
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
|
|
86
|
+
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
|
|
87
|
+
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
|
|
91
|
+
common_chat_tool_call * current_tool;
|
|
92
|
+
int arg_count = 0;
|
|
93
|
+
bool needs_closing_quote = false;
|
|
94
|
+
|
|
95
|
+
public:
|
|
96
|
+
common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
|
97
|
+
|
|
98
|
+
void map(const common_peg_ast_node & node) override;
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
|
|
102
|
+
common_chat_peg_constructed_builder builder;
|
|
103
|
+
builder.set_root(fn(builder));
|
|
104
|
+
return builder.build();
|
|
105
|
+
}
|
|
@@ -82,29 +82,36 @@ json common_chat_msg::to_json_oaicompat() const
|
|
|
82
82
|
return message;
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg &
|
|
85
|
+
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
|
|
86
86
|
std::vector<common_chat_msg_diff> diffs;
|
|
87
|
-
if (
|
|
87
|
+
if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
|
|
88
|
+
diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
|
|
89
|
+
} else {
|
|
90
|
+
diffs.reserve(3);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// TODO: these can become expensive for long messages - how to optimize?
|
|
94
|
+
if (msg_prv.reasoning_content != msg_new.reasoning_content) {
|
|
88
95
|
auto & diff = diffs.emplace_back();
|
|
89
|
-
diff.reasoning_content_delta = string_diff(
|
|
96
|
+
diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
|
|
90
97
|
}
|
|
91
|
-
if (
|
|
98
|
+
if (msg_prv.content != msg_new.content) {
|
|
92
99
|
auto & diff = diffs.emplace_back();
|
|
93
|
-
diff.content_delta = string_diff(
|
|
100
|
+
diff.content_delta = string_diff(msg_prv.content, msg_new.content);
|
|
94
101
|
}
|
|
95
102
|
|
|
96
|
-
if (
|
|
103
|
+
if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
|
|
97
104
|
throw std::runtime_error("Invalid diff: now finding less tool calls!");
|
|
98
105
|
}
|
|
99
106
|
|
|
100
|
-
if (!
|
|
101
|
-
auto idx =
|
|
102
|
-
const auto & pref =
|
|
103
|
-
const auto & newf =
|
|
107
|
+
if (!msg_prv.tool_calls.empty()) {
|
|
108
|
+
const auto idx = msg_prv.tool_calls.size() - 1;
|
|
109
|
+
const auto & pref = msg_prv.tool_calls[idx];
|
|
110
|
+
const auto & newf = msg_new.tool_calls[idx];
|
|
104
111
|
if (pref.name != newf.name) {
|
|
105
112
|
throw std::runtime_error("Invalid diff: tool call mismatch!");
|
|
106
113
|
}
|
|
107
|
-
auto args_diff = string_diff(pref.arguments, newf.arguments);
|
|
114
|
+
const auto args_diff = string_diff(pref.arguments, newf.arguments);
|
|
108
115
|
if (!args_diff.empty() || pref.id != newf.id) {
|
|
109
116
|
auto & diff = diffs.emplace_back();
|
|
110
117
|
diff.tool_call_index = idx;
|
|
@@ -115,11 +122,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
|
115
122
|
diff.tool_call_delta.arguments = args_diff;
|
|
116
123
|
}
|
|
117
124
|
}
|
|
118
|
-
for (size_t idx =
|
|
125
|
+
for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
|
|
119
126
|
auto & diff = diffs.emplace_back();
|
|
120
127
|
diff.tool_call_index = idx;
|
|
121
|
-
diff.tool_call_delta =
|
|
128
|
+
diff.tool_call_delta = msg_new.tool_calls[idx];
|
|
122
129
|
}
|
|
130
|
+
|
|
123
131
|
return diffs;
|
|
124
132
|
}
|
|
125
133
|
|
|
@@ -150,7 +158,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
|
150
158
|
if (tool_choice == "required") {
|
|
151
159
|
return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
152
160
|
}
|
|
153
|
-
throw std::
|
|
161
|
+
throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
|
|
154
162
|
}
|
|
155
163
|
|
|
156
164
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
|
|
@@ -173,17 +181,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
173
181
|
try {
|
|
174
182
|
|
|
175
183
|
if (!messages.is_array()) {
|
|
176
|
-
throw std::
|
|
184
|
+
throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
|
|
177
185
|
}
|
|
178
186
|
|
|
179
187
|
for (const auto & message : messages) {
|
|
180
188
|
if (!message.is_object()) {
|
|
181
|
-
throw std::
|
|
189
|
+
throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
|
|
182
190
|
}
|
|
183
191
|
|
|
184
192
|
common_chat_msg msg;
|
|
185
193
|
if (!message.contains("role")) {
|
|
186
|
-
throw std::
|
|
194
|
+
throw std::invalid_argument("Missing 'role' in message: " + message.dump());
|
|
187
195
|
}
|
|
188
196
|
msg.role = message.at("role");
|
|
189
197
|
|
|
@@ -196,11 +204,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
196
204
|
} else if (content.is_array()) {
|
|
197
205
|
for (const auto & part : content) {
|
|
198
206
|
if (!part.contains("type")) {
|
|
199
|
-
throw std::
|
|
207
|
+
throw std::invalid_argument("Missing content part type: " + part.dump());
|
|
200
208
|
}
|
|
201
209
|
const auto & type = part.at("type");
|
|
202
210
|
if (type != "text") {
|
|
203
|
-
throw std::
|
|
211
|
+
throw std::invalid_argument("Unsupported content part type: " + type.dump());
|
|
204
212
|
}
|
|
205
213
|
common_chat_msg_content_part msg_part;
|
|
206
214
|
msg_part.type = type;
|
|
@@ -208,25 +216,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
208
216
|
msg.content_parts.push_back(msg_part);
|
|
209
217
|
}
|
|
210
218
|
} else if (!content.is_null()) {
|
|
211
|
-
throw std::
|
|
219
|
+
throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
|
|
212
220
|
}
|
|
213
221
|
}
|
|
214
222
|
if (has_tool_calls) {
|
|
215
223
|
for (const auto & tool_call : message.at("tool_calls")) {
|
|
216
224
|
common_chat_tool_call tc;
|
|
217
225
|
if (!tool_call.contains("type")) {
|
|
218
|
-
throw std::
|
|
226
|
+
throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
|
|
219
227
|
}
|
|
220
228
|
const auto & type = tool_call.at("type");
|
|
221
229
|
if (type != "function") {
|
|
222
|
-
throw std::
|
|
230
|
+
throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
|
|
223
231
|
}
|
|
224
232
|
if (!tool_call.contains("function")) {
|
|
225
|
-
throw std::
|
|
233
|
+
throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
|
|
226
234
|
}
|
|
227
235
|
const auto & fc = tool_call.at("function");
|
|
228
236
|
if (!fc.contains("name")) {
|
|
229
|
-
throw std::
|
|
237
|
+
throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
|
|
230
238
|
}
|
|
231
239
|
tc.name = fc.at("name");
|
|
232
240
|
tc.arguments = fc.at("arguments");
|
|
@@ -237,7 +245,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
237
245
|
}
|
|
238
246
|
}
|
|
239
247
|
if (!has_content && !has_tool_calls) {
|
|
240
|
-
throw std::
|
|
248
|
+
throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
|
|
241
249
|
}
|
|
242
250
|
if (message.contains("reasoning_content")) {
|
|
243
251
|
msg.reasoning_content = message.at("reasoning_content");
|
|
@@ -340,18 +348,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
|
|
|
340
348
|
try {
|
|
341
349
|
if (!tools.is_null()) {
|
|
342
350
|
if (!tools.is_array()) {
|
|
343
|
-
throw std::
|
|
351
|
+
throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
|
|
344
352
|
}
|
|
345
353
|
for (const auto & tool : tools) {
|
|
346
354
|
if (!tool.contains("type")) {
|
|
347
|
-
throw std::
|
|
355
|
+
throw std::invalid_argument("Missing tool type: " + tool.dump());
|
|
348
356
|
}
|
|
349
357
|
const auto & type = tool.at("type");
|
|
350
358
|
if (!type.is_string() || type != "function") {
|
|
351
|
-
throw std::
|
|
359
|
+
throw std::invalid_argument("Unsupported tool type: " + tool.dump());
|
|
352
360
|
}
|
|
353
361
|
if (!tool.contains("function")) {
|
|
354
|
-
throw std::
|
|
362
|
+
throw std::invalid_argument("Missing tool function: " + tool.dump());
|
|
355
363
|
}
|
|
356
364
|
|
|
357
365
|
const auto & function = tool.at("function");
|
|
@@ -636,6 +644,9 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
|
636
644
|
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
|
637
645
|
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
|
638
646
|
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
|
647
|
+
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
|
648
|
+
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
|
649
|
+
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
|
639
650
|
default:
|
|
640
651
|
throw std::runtime_error("Unknown chat format");
|
|
641
652
|
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#pragma once
|
|
4
4
|
|
|
5
5
|
#include "common.h"
|
|
6
|
+
#include "peg-parser.h"
|
|
6
7
|
#include <functional>
|
|
7
8
|
#include <chrono>
|
|
8
9
|
#include <string>
|
|
@@ -87,7 +88,7 @@ struct common_chat_msg_diff {
|
|
|
87
88
|
size_t tool_call_index = std::string::npos;
|
|
88
89
|
common_chat_tool_call tool_call_delta;
|
|
89
90
|
|
|
90
|
-
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg &
|
|
91
|
+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
|
|
91
92
|
|
|
92
93
|
bool operator==(const common_chat_msg_diff & other) const {
|
|
93
94
|
return content_delta == other.content_delta
|
|
@@ -135,6 +136,11 @@ enum common_chat_format {
|
|
|
135
136
|
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
|
136
137
|
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
|
137
138
|
|
|
139
|
+
// These are intended to be parsed by the PEG parser
|
|
140
|
+
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
|
141
|
+
COMMON_CHAT_FORMAT_PEG_NATIVE,
|
|
142
|
+
COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
|
|
143
|
+
|
|
138
144
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
139
145
|
};
|
|
140
146
|
|
|
@@ -165,6 +171,7 @@ struct common_chat_params {
|
|
|
165
171
|
std::vector<common_grammar_trigger> grammar_triggers;
|
|
166
172
|
std::vector<std::string> preserved_tokens;
|
|
167
173
|
std::vector<std::string> additional_stops;
|
|
174
|
+
std::string parser;
|
|
168
175
|
};
|
|
169
176
|
|
|
170
177
|
struct common_chat_syntax {
|
|
@@ -174,6 +181,7 @@ struct common_chat_syntax {
|
|
|
174
181
|
bool reasoning_in_content = false;
|
|
175
182
|
bool thinking_forced_open = false;
|
|
176
183
|
bool parse_tool_calls = true;
|
|
184
|
+
common_peg_arena parser = {};
|
|
177
185
|
};
|
|
178
186
|
|
|
179
187
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
|
@@ -217,6 +225,7 @@ const char* common_chat_format_name(common_chat_format format);
|
|
|
217
225
|
const char* common_reasoning_format_name(common_reasoning_format format);
|
|
218
226
|
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
|
|
219
227
|
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
|
228
|
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
|
220
229
|
|
|
221
230
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
|
222
231
|
|
|
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
|
|
|
694
694
|
|
|
695
695
|
// Validate if a filename is safe to use
|
|
696
696
|
// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
|
|
697
|
-
bool fs_validate_filename(const std::string & filename) {
|
|
697
|
+
bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|
698
698
|
if (!filename.length()) {
|
|
699
699
|
// Empty filename invalid
|
|
700
700
|
return false;
|
|
@@ -754,10 +754,14 @@ bool fs_validate_filename(const std::string & filename) {
|
|
|
754
754
|
|| (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|
|
755
755
|
|| c == 0xFFFD // Replacement Character (UTF-8)
|
|
756
756
|
|| c == 0xFEFF // Byte Order Mark (BOM)
|
|
757
|
-
|| c == '
|
|
757
|
+
|| c == ':' || c == '*' // Illegal characters
|
|
758
758
|
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
|
|
759
759
|
return false;
|
|
760
760
|
}
|
|
761
|
+
if (!allow_subdirs && (c == '/' || c == '\\')) {
|
|
762
|
+
// Subdirectories not allowed, reject path separators
|
|
763
|
+
return false;
|
|
764
|
+
}
|
|
761
765
|
}
|
|
762
766
|
|
|
763
767
|
// Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
|
|
@@ -782,11 +786,29 @@ bool fs_validate_filename(const std::string & filename) {
|
|
|
782
786
|
#include <iostream>
|
|
783
787
|
|
|
784
788
|
|
|
789
|
+
#ifdef _WIN32
|
|
790
|
+
static std::wstring utf8_to_wstring(const std::string & str) {
|
|
791
|
+
if (str.empty()) {
|
|
792
|
+
return std::wstring();
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
|
|
796
|
+
|
|
797
|
+
if (size <= 0) {
|
|
798
|
+
return std::wstring();
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
std::wstring wstr(size, 0);
|
|
802
|
+
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
|
|
803
|
+
|
|
804
|
+
return wstr;
|
|
805
|
+
}
|
|
806
|
+
#endif
|
|
807
|
+
|
|
785
808
|
// returns true if successful, false otherwise
|
|
786
809
|
bool fs_create_directory_with_parents(const std::string & path) {
|
|
787
810
|
#ifdef _WIN32
|
|
788
|
-
std::
|
|
789
|
-
std::wstring wpath = converter.from_bytes(path);
|
|
811
|
+
std::wstring wpath = utf8_to_wstring(path);
|
|
790
812
|
|
|
791
813
|
// if the path already exists, check whether it's a directory
|
|
792
814
|
const DWORD attributes = GetFileAttributesW(wpath.c_str());
|
|
@@ -859,6 +881,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
|
|
|
859
881
|
#endif // _WIN32
|
|
860
882
|
}
|
|
861
883
|
|
|
884
|
+
bool fs_is_directory(const std::string & path) {
|
|
885
|
+
std::filesystem::path dir(path);
|
|
886
|
+
return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
|
|
887
|
+
}
|
|
888
|
+
|
|
862
889
|
std::string fs_get_cache_directory() {
|
|
863
890
|
std::string cache_directory = "";
|
|
864
891
|
auto ensure_trailing_slash = [](std::string p) {
|
|
@@ -893,6 +920,8 @@ std::string fs_get_cache_directory() {
|
|
|
893
920
|
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
|
|
894
921
|
#elif defined(_WIN32)
|
|
895
922
|
cache_directory = std::getenv("LOCALAPPDATA");
|
|
923
|
+
#elif defined(__EMSCRIPTEN__)
|
|
924
|
+
GGML_ABORT("not implemented on this platform");
|
|
896
925
|
#else
|
|
897
926
|
# error Unknown architecture
|
|
898
927
|
#endif
|
|
@@ -912,7 +941,7 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|
|
912
941
|
return cache_directory + filename;
|
|
913
942
|
}
|
|
914
943
|
|
|
915
|
-
std::vector<common_file_info>
|
|
944
|
+
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
|
916
945
|
std::vector<common_file_info> files;
|
|
917
946
|
if (path.empty()) return files;
|
|
918
947
|
|
|
@@ -927,14 +956,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|
|
927
956
|
const auto & p = entry.path();
|
|
928
957
|
if (std::filesystem::is_regular_file(p)) {
|
|
929
958
|
common_file_info info;
|
|
930
|
-
info.path
|
|
931
|
-
info.name
|
|
959
|
+
info.path = p.string();
|
|
960
|
+
info.name = p.filename().string();
|
|
961
|
+
info.is_dir = false;
|
|
932
962
|
try {
|
|
933
963
|
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
|
934
964
|
} catch (const std::filesystem::filesystem_error &) {
|
|
935
965
|
info.size = 0;
|
|
936
966
|
}
|
|
937
967
|
files.push_back(std::move(info));
|
|
968
|
+
} else if (include_directories && std::filesystem::is_directory(p)) {
|
|
969
|
+
common_file_info info;
|
|
970
|
+
info.path = p.string();
|
|
971
|
+
info.name = p.filename().string();
|
|
972
|
+
info.size = 0; // Directories have no size
|
|
973
|
+
info.is_dir = true;
|
|
974
|
+
files.push_back(std::move(info));
|
|
938
975
|
}
|
|
939
976
|
} catch (const std::filesystem::filesystem_error &) {
|
|
940
977
|
// skip entries we cannot inspect
|
|
@@ -945,6 +982,32 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|
|
945
982
|
return files;
|
|
946
983
|
}
|
|
947
984
|
|
|
985
|
+
//
|
|
986
|
+
// TTY utils
|
|
987
|
+
//
|
|
988
|
+
|
|
989
|
+
bool tty_can_use_colors() {
|
|
990
|
+
// Check NO_COLOR environment variable (https://no-color.org/)
|
|
991
|
+
if (const char * no_color = std::getenv("NO_COLOR")) {
|
|
992
|
+
if (no_color[0] != '\0') {
|
|
993
|
+
return false;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
// Check TERM environment variable
|
|
998
|
+
if (const char * term = std::getenv("TERM")) {
|
|
999
|
+
if (std::strcmp(term, "dumb") == 0) {
|
|
1000
|
+
return false;
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
// Check if stdout and stderr are connected to a terminal
|
|
1005
|
+
// We check both because log messages can go to either
|
|
1006
|
+
bool stdout_is_tty = isatty(fileno(stdout));
|
|
1007
|
+
bool stderr_is_tty = isatty(fileno(stderr));
|
|
1008
|
+
|
|
1009
|
+
return stdout_is_tty || stderr_is_tty;
|
|
1010
|
+
}
|
|
948
1011
|
|
|
949
1012
|
//
|
|
950
1013
|
// Model utils
|
|
@@ -12,6 +12,10 @@
|
|
|
12
12
|
#include <vector>
|
|
13
13
|
#include <map>
|
|
14
14
|
|
|
15
|
+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
|
|
16
|
+
#define _WIN32_WINNT 0x0A00
|
|
17
|
+
#endif
|
|
18
|
+
|
|
15
19
|
#ifdef _WIN32
|
|
16
20
|
#define DIRECTORY_SEPARATOR '\\'
|
|
17
21
|
#else
|
|
@@ -26,8 +30,6 @@
|
|
|
26
30
|
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
|
|
27
31
|
} while(0)
|
|
28
32
|
|
|
29
|
-
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
|
30
|
-
|
|
31
33
|
struct common_time_meas {
|
|
32
34
|
common_time_meas(int64_t & t_acc, bool disable = false);
|
|
33
35
|
~common_time_meas();
|
|
@@ -223,6 +225,7 @@ struct common_params_model {
|
|
|
223
225
|
std::string hf_repo = ""; // HF repo // NOLINT
|
|
224
226
|
std::string hf_file = ""; // HF file // NOLINT
|
|
225
227
|
std::string docker_repo = ""; // Docker repo // NOLINT
|
|
228
|
+
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
|
226
229
|
};
|
|
227
230
|
|
|
228
231
|
struct common_params_speculative {
|
|
@@ -370,7 +373,7 @@ struct common_params {
|
|
|
370
373
|
|
|
371
374
|
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
|
372
375
|
|
|
373
|
-
int32_t verbosity =
|
|
376
|
+
int32_t verbosity = 3; // LOG_LEVEL_INFO
|
|
374
377
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
|
375
378
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
|
376
379
|
bool offline = false;
|
|
@@ -479,9 +482,15 @@ struct common_params {
|
|
|
479
482
|
bool endpoint_props = false; // only control POST requests, not GET
|
|
480
483
|
bool endpoint_metrics = false;
|
|
481
484
|
|
|
485
|
+
// router server configs
|
|
486
|
+
std::string models_dir = ""; // directory containing models for the router server
|
|
487
|
+
int models_max = 4; // maximum number of models to load simultaneously
|
|
488
|
+
bool models_autoload = true; // automatically load models when requested via the router server
|
|
489
|
+
|
|
482
490
|
bool log_json = false;
|
|
483
491
|
|
|
484
492
|
std::string slot_save_path;
|
|
493
|
+
std::string media_path; // path to directory for loading media files
|
|
485
494
|
|
|
486
495
|
float slot_prompt_similarity = 0.1f;
|
|
487
496
|
|
|
@@ -632,8 +641,9 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
|
|
|
632
641
|
// Filesystem utils
|
|
633
642
|
//
|
|
634
643
|
|
|
635
|
-
bool fs_validate_filename(const std::string & filename);
|
|
644
|
+
bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
|
|
636
645
|
bool fs_create_directory_with_parents(const std::string & path);
|
|
646
|
+
bool fs_is_directory(const std::string & path);
|
|
637
647
|
|
|
638
648
|
std::string fs_get_cache_directory();
|
|
639
649
|
std::string fs_get_cache_file(const std::string & filename);
|
|
@@ -642,8 +652,16 @@ struct common_file_info {
|
|
|
642
652
|
std::string path;
|
|
643
653
|
std::string name;
|
|
644
654
|
size_t size = 0; // in bytes
|
|
655
|
+
bool is_dir = false;
|
|
645
656
|
};
|
|
646
|
-
std::vector<common_file_info>
|
|
657
|
+
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
|
658
|
+
|
|
659
|
+
//
|
|
660
|
+
// TTY utils
|
|
661
|
+
//
|
|
662
|
+
|
|
663
|
+
// Auto-detect if colors can be enabled based on terminal and environment
|
|
664
|
+
bool tty_can_use_colors();
|
|
647
665
|
|
|
648
666
|
//
|
|
649
667
|
// Model utils
|