@fugood/llama.node 1.4.14 → 1.5.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/lib/binding.ts +13 -6
  2. package/lib/index.js +2 -2
  3. package/lib/index.ts +8 -3
  4. package/package.json +15 -15
  5. package/scripts/llama.cpp.patch +77 -65
  6. package/src/LlamaContext.cpp +31 -34
  7. package/src/llama.cpp/CMakeLists.txt +24 -8
  8. package/src/llama.cpp/common/CMakeLists.txt +15 -34
  9. package/src/llama.cpp/common/arg.cpp +59 -10
  10. package/src/llama.cpp/common/chat-parser.cpp +115 -0
  11. package/src/llama.cpp/common/chat.cpp +356 -34
  12. package/src/llama.cpp/common/chat.h +17 -13
  13. package/src/llama.cpp/common/common.cpp +0 -1
  14. package/src/llama.cpp/common/common.h +30 -25
  15. package/src/llama.cpp/common/debug.cpp +165 -0
  16. package/src/llama.cpp/common/debug.h +43 -0
  17. package/src/llama.cpp/common/download.cpp +12 -342
  18. package/src/llama.cpp/common/download.h +6 -0
  19. package/src/llama.cpp/common/jinja/caps.cpp +237 -0
  20. package/src/llama.cpp/common/jinja/caps.h +24 -0
  21. package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
  22. package/src/llama.cpp/common/jinja/lexer.h +157 -0
  23. package/src/llama.cpp/common/jinja/parser.cpp +591 -0
  24. package/src/llama.cpp/common/jinja/parser.h +21 -0
  25. package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
  26. package/src/llama.cpp/common/jinja/runtime.h +628 -0
  27. package/src/llama.cpp/common/jinja/string.cpp +207 -0
  28. package/src/llama.cpp/common/jinja/string.h +58 -0
  29. package/src/llama.cpp/common/jinja/utils.h +49 -0
  30. package/src/llama.cpp/common/jinja/value.cpp +1221 -0
  31. package/src/llama.cpp/common/jinja/value.h +464 -0
  32. package/src/llama.cpp/common/preset.cpp +12 -2
  33. package/src/llama.cpp/common/sampling.cpp +52 -19
  34. package/src/llama.cpp/ggml/include/ggml.h +39 -7
  35. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
  37. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
  39. package/src/llama.cpp/include/llama-cpp.h +3 -1
  40. package/src/llama.cpp/include/llama.h +29 -2
  41. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  42. package/src/llama.cpp/src/llama-adapter.cpp +7 -13
  43. package/src/llama.cpp/src/llama-adapter.h +1 -3
  44. package/src/llama.cpp/src/llama-arch.cpp +35 -0
  45. package/src/llama.cpp/src/llama-arch.h +1 -0
  46. package/src/llama.cpp/src/llama-chat.cpp +20 -0
  47. package/src/llama.cpp/src/llama-chat.h +1 -0
  48. package/src/llama.cpp/src/llama-context.cpp +232 -144
  49. package/src/llama.cpp/src/llama-context.h +10 -0
  50. package/src/llama.cpp/src/llama-cparams.h +2 -0
  51. package/src/llama.cpp/src/llama-graph.cpp +31 -43
  52. package/src/llama.cpp/src/llama-hparams.cpp +0 -36
  53. package/src/llama.cpp/src/llama-hparams.h +38 -1
  54. package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
  55. package/src/llama.cpp/src/llama-kv-cache.h +0 -2
  56. package/src/llama.cpp/src/llama-mmap.cpp +13 -6
  57. package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
  58. package/src/llama.cpp/src/llama-model.cpp +215 -97
  59. package/src/llama.cpp/src/llama-model.h +3 -2
  60. package/src/llama.cpp/src/llama-sampling.cpp +170 -13
  61. package/src/llama.cpp/src/llama-vocab.cpp +37 -24
  62. package/src/llama.cpp/src/llama-vocab.h +1 -0
  63. package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
  64. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
  65. package/src/llama.cpp/src/models/models.h +13 -2
  66. package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
@@ -0,0 +1,341 @@
1
+ #include "lexer.h"
2
+ #include "runtime.h"
3
+
4
+ #include <cctype>
5
+ #include <functional>
6
+ #include <map>
7
+ #include <string>
8
+ #include <vector>
9
+
10
+ #define FILENAME "jinja-lexer"
11
+
12
+ namespace jinja {
13
+
14
+ static void string_lstrip(std::string & s, const char * chars) {
15
+ size_t start = s.find_first_not_of(chars);
16
+ if (start == std::string::npos) {
17
+ s.clear();
18
+ } else {
19
+ s.erase(0, start);
20
+ }
21
+ }
22
+
23
+ static void string_rstrip(std::string & s, const char * chars) {
24
+ size_t end = s.find_last_not_of(chars);
25
+ if (end == std::string::npos) {
26
+ s.clear();
27
+ } else {
28
+ s.erase(end + 1);
29
+ }
30
+ }
31
+
32
+ lexer_result lexer::tokenize(const std::string & source) {
33
+ std::vector<token> tokens;
34
+
35
+ // NOTE: do NOT transform the source string (i.e. preprocessing), as we need to keep
36
+ // the original character positions for error reporting etc.
37
+ std::string src = source;
38
+
39
+ if (source.empty()) {
40
+ return {tokens, src};
41
+ }
42
+
43
+ // Normalize \r\n or \r to \n
44
+ for (std::string::size_type pos = 0; (pos = src.find("\r\n", pos)) != std::string::npos; ) {
45
+ src.erase(pos, 1);
46
+ ++pos;
47
+ }
48
+ for (std::string::size_type pos = 0; (pos = src.find("\r", pos)) != std::string::npos; ) {
49
+ src.replace(pos, 1, 1, '\n');
50
+ ++pos;
51
+ }
52
+
53
+ // In the default configuration:
54
+ // - a single trailing newline is stripped if present
55
+ // - other whitespace (spaces, tabs, newlines etc.) is returned unchanged
56
+ if (source.back() == '\n') {
57
+ src.pop_back();
58
+ }
59
+
60
+ size_t pos = 0;
61
+ size_t start_pos = 0;
62
+ size_t curly_bracket_depth = 0;
63
+
64
+ using pred = std::function<bool(char)>;
65
+ auto consume_while = [&](const pred & predicate) -> std::string {
66
+ std::string str;
67
+ while (predicate(src[pos])) {
68
+ // check for escape char
69
+ if (src[pos] == '\\') {
70
+ // consume backslash
71
+ ++pos;
72
+ // check for end of input
73
+ if (pos >= src.size()) {
74
+ throw lexer_exception("unexpected end of input after escape character", source, pos);
75
+ }
76
+ // add escaped char
77
+ char escaped_char = src[pos++];
78
+ if (escape_chars.find(escaped_char) == escape_chars.end()) {
79
+ throw lexer_exception(std::string("unknown escape character \\") + escaped_char, source, pos);
80
+ }
81
+ char unescaped_char = escape_chars.at(escaped_char);
82
+ str += unescaped_char;
83
+ continue;
84
+ }
85
+
86
+ str += src[pos++];
87
+ if (pos > src.size()) {
88
+ throw lexer_exception("unexpected end of input during consume_while", source, pos);
89
+ }
90
+ }
91
+ return str;
92
+ };
93
+
94
+ auto consume_numeric = [&]() -> std::string {
95
+ std::string num = consume_while(is_integer);
96
+ if (pos < src.size() && src[pos] == '.' && pos + 1 < src.size() && is_integer(src[pos + 1])) {
97
+ ++pos; // Consume '.'
98
+ std::string frac = consume_while(is_integer);
99
+ num += "." + frac;
100
+ }
101
+ return num;
102
+ };
103
+
104
+ auto next_pos_is = [&](std::initializer_list<char> chars, size_t n = 1) -> bool {
105
+ if (pos + n >= src.size()) return false;
106
+ for (char c : chars) {
107
+ if (src[pos + n] == c) return true;
108
+ }
109
+ return false;
110
+ };
111
+
112
+ // note: default config for chat template: lstrip_blocks = true, trim_blocks = true
113
+
114
+ // text\n[space]{block} --> text\n{block}
115
+ bool opt_lstrip_blocks = true;
116
+
117
+ // {block}\n[space]text --> {block}[space]text
118
+ bool opt_trim_blocks = true;
119
+
120
+ // options set dynamically based on current/last block
121
+ bool is_lstrip_block = false; // example: {%-
122
+ bool is_rstrip_block = false; // example: -%}
123
+
124
+ while (pos < src.size()) {
125
+ start_pos = pos;
126
+ // JJ_DEBUG("lexer main loop at pos %zu: '%s...'", pos, src.substr(pos, 10).c_str());
127
+
128
+ // First, consume all text that is outside of a Jinja statement or expression
129
+ token::type last_token_type = tokens.empty()
130
+ ? token::close_statement // initial state
131
+ : tokens.back().t;
132
+ if (last_token_type == token::close_statement ||
133
+ last_token_type == token::close_expression ||
134
+ last_token_type == token::comment) {
135
+
136
+ bool last_block_can_rm_newline = false;
137
+ is_rstrip_block = false;
138
+ if (pos > 3) {
139
+ char c0 = src[pos - 3];
140
+ char c1 = src[pos - 2];
141
+ char c2 = src[pos - 1];
142
+ // strip if: -[%}#]}text
143
+ is_rstrip_block = c0 == '-'
144
+ && (c1 == '%' || c1 == '}' || c1 == '#')
145
+ && c2 == '}';
146
+ // match behavior of hf.js: exclude {{ and }} cases, regex: ([#%-]})
147
+ last_block_can_rm_newline = (c1 == '#' || c1 == '%' || c1 == '-') && c2 == '}';
148
+ }
149
+
150
+ size_t start = pos;
151
+ size_t end = start;
152
+ while (pos < src.size() &&
153
+ // Keep going until we hit the next Jinja statement or expression
154
+ !(
155
+ src[pos] == '{' &&
156
+ next_pos_is( {'%', '{', '#'} )
157
+ )) {
158
+ end = ++pos;
159
+ }
160
+
161
+ // equivalent to hf.js code: template.replace(/^[ \t]*({[#%-])/gm, "$1");
162
+ if (opt_lstrip_blocks && src[pos] == '{' && next_pos_is({'%', '#', '-'})) {
163
+ size_t current = end;
164
+ while (current > start) {
165
+ char c = src[current - 1];
166
+ if (current == 1) {
167
+ end = 0; // Trim from the start of the string
168
+ break;
169
+ }
170
+ if (c == '\n') {
171
+ end = current; // Trim from the start of the line
172
+ break;
173
+ }
174
+ if (!std::isspace(static_cast<unsigned char>(c))) {
175
+ break; // Found non-whitespace before newline, keep
176
+ }
177
+ --current;
178
+ }
179
+ }
180
+
181
+ std::string text = src.substr(start, end - start);
182
+
183
+ // equivalent to hf.js code: template.replace(/([#%-]})\n/g, "$1");
184
+ if (opt_trim_blocks && last_block_can_rm_newline) {
185
+ if (!text.empty() && text.front() == '\n') {
186
+ text.erase(text.begin());
187
+ }
188
+ }
189
+
190
+ if (is_rstrip_block) {
191
+ // example: {last_block}[space]text
192
+ // doing lstrip on text, effectively rstrip the LAST block
193
+ // JJ_DEBUG("RSTRIP block detected, current text: '%s'", text.c_str());
194
+ string_lstrip(text, " \t\r\n");
195
+ }
196
+
197
+ is_lstrip_block = src[pos] == '{' && next_pos_is({'{', '%', '#'}) && next_pos_is({'-'}, 2);
198
+ if (is_lstrip_block) {
199
+ // example: text[space]{current_block}
200
+ // doing rstrip on text, effectively lstrip the CURRENT block
201
+ // JJ_DEBUG("LSTRIP block detected, current text: '%s'", text.c_str());
202
+ string_rstrip(text, " \t\r\n");
203
+ }
204
+
205
+ if (!text.empty()) {
206
+ // JJ_DEBUG("consumed text: '%s'", text.c_str());
207
+ tokens.push_back({token::text, text, start_pos});
208
+ continue;
209
+ }
210
+ }
211
+
212
+ // Possibly consume a comment
213
+ // TODO: handle lstrip/rstrip for comments? (not important for now)
214
+ if (src[pos] == '{' && next_pos_is( {'#'} )) {
215
+ start_pos = pos;
216
+ pos += 2; // Skip the opening {#
217
+ std::string comment;
218
+ while (!(src[pos] == '#' && next_pos_is( {'}'} ))) {
219
+ if (pos + 2 >= src.size()) {
220
+ throw lexer_exception("missing end of comment tag", source, pos);
221
+ }
222
+ comment += src[pos++];
223
+ }
224
+ JJ_DEBUG("consumed comment: '%s'", comment.c_str());
225
+ tokens.push_back({token::comment, comment, start_pos});
226
+ pos += 2; // Skip the closing #}
227
+ continue;
228
+ }
229
+
230
+ if (src[pos] == '-' && (
231
+ last_token_type == token::open_expression ||
232
+ last_token_type == token::open_statement)
233
+ ) {
234
+ JJ_DEBUG("lexer main loop at pos %zu: '%s...'", pos, src.substr(pos, 10).c_str());
235
+ pos++; // consume '-' in {%- or {{-
236
+ if (pos >= src.size()) break;
237
+ }
238
+
239
+ // Consume (and ignore) all whitespace inside Jinja statements or expressions
240
+ consume_while([](char c) { return std::isspace(static_cast<unsigned char>(c)); });
241
+
242
+ if (pos >= src.size()) break;
243
+
244
+ char ch = src[pos];
245
+
246
+ bool is_closing_block = ch == '-' && next_pos_is( {'%', '}'} );
247
+
248
+ // Check for unary operators
249
+ if (!is_closing_block && (ch == '-' || ch == '+')) {
250
+ start_pos = pos;
251
+ token::type last_token_type = tokens.empty() ? token::eof : tokens.back().t;
252
+ if (last_token_type == token::text || last_token_type == token::eof) {
253
+ throw lexer_exception(std::string("unexpected character: ") + ch, source, pos);
254
+ }
255
+ switch (last_token_type) {
256
+ case token::identifier:
257
+ case token::numeric_literal:
258
+ case token::string_literal:
259
+ case token::close_paren:
260
+ case token::close_square_bracket:
261
+ // Part of a binary operator
262
+ // a - 1, 1 - 1, true - 1, "apple" - 1, (1) - 1, a[1] - 1
263
+ // Continue parsing normally
264
+ break;
265
+ default: {
266
+ // Is part of a unary operator
267
+ // (-1), [-1], (1 + -1), not -1, -apple
268
+ ++pos; // Consume the operator
269
+
270
+ // Check for numbers following the unary operator
271
+ std::string num = consume_numeric();
272
+ std::string value = std::string(1, ch) + num;
273
+ token::type t = num.empty() ? token::unary_operator : token::numeric_literal;
274
+ // JJ_DEBUG("consumed unary operator or numeric literal: '%s'", value.c_str());
275
+ tokens.push_back({t, value, start_pos});
276
+ continue;
277
+ }
278
+ }
279
+ }
280
+
281
+ // Try to match one of the tokens in the mapping table
282
+ bool matched = false;
283
+ for (const auto & [seq, typ] : ordered_mapping_table) {
284
+ start_pos = pos;
285
+ // Inside an object literal, don't treat "}}" as expression-end
286
+ if (seq == "}}" && curly_bracket_depth > 0) {
287
+ continue;
288
+ }
289
+ if (pos + seq.size() <= src.size() && src.substr(pos, seq.size()) == seq) {
290
+ tokens.push_back({typ, seq, start_pos});
291
+ if (typ == token::open_expression) {
292
+ curly_bracket_depth = 0;
293
+ } else if (typ == token::open_curly_bracket) {
294
+ ++curly_bracket_depth;
295
+ } else if (typ == token::close_curly_bracket) {
296
+ --curly_bracket_depth;
297
+ }
298
+
299
+ pos += seq.size();
300
+ matched = true;
301
+ break; // continue main loop
302
+ }
303
+ }
304
+ if (matched) continue; // continue main loop
305
+
306
+ // Strings
307
+ if (ch == '\'' || ch == '"') {
308
+ start_pos = pos;
309
+ ++pos; // Skip opening quote
310
+ std::string str = consume_while([ch](char c) { return c != ch; });
311
+ // JJ_DEBUG("consumed string literal: '%s'", str.c_str());
312
+ tokens.push_back({token::string_literal, str, start_pos});
313
+ ++pos; // Skip closing quote
314
+ continue;
315
+ }
316
+
317
+ // Numbers
318
+ if (is_integer(ch)) {
319
+ start_pos = pos;
320
+ std::string num = consume_numeric();
321
+ // JJ_DEBUG("consumed numeric literal: '%s'", num.c_str());
322
+ tokens.push_back({token::numeric_literal, num, start_pos});
323
+ continue;
324
+ }
325
+
326
+ // Identifiers
327
+ if (is_word(ch)) {
328
+ start_pos = pos;
329
+ std::string word = consume_while(is_word);
330
+ // JJ_DEBUG("consumed identifier: '%s'", word.c_str());
331
+ tokens.push_back({token::identifier, word, start_pos});
332
+ continue;
333
+ }
334
+
335
+ throw lexer_exception(std::string("unexpected character: ") + ch, source, pos);
336
+ }
337
+
338
+ return {std::move(tokens), src};
339
+ }
340
+
341
+ } // namespace jinja
@@ -0,0 +1,157 @@
1
+ #pragma once
2
+
3
+ #include "utils.h"
4
+
5
+ #include <cctype>
6
+ #include <map>
7
+ #include <stdexcept>
8
+ #include <string>
9
+ #include <vector>
10
+
11
+ namespace jinja {
12
+
13
+ struct token {
14
+ enum type {
15
+ eof, // end of source
16
+ text, // The text between Jinja statements or expressions
17
+
18
+ numeric_literal, // e.g., 123, 1.0
19
+ string_literal, // 'string'
20
+ identifier, // Variables, functions, statements, booleans, etc.
21
+ equals, // =
22
+ open_paren, // (
23
+ close_paren, // )
24
+ open_statement, // {%
25
+ close_statement, // %}
26
+ open_expression, // {{
27
+ close_expression, // }}
28
+ open_square_bracket, // [
29
+ close_square_bracket, // ]
30
+ open_curly_bracket, // {
31
+ close_curly_bracket, // }
32
+ comma, // ,
33
+ dot, // .
34
+ colon, // :
35
+ pipe, // |
36
+
37
+ call_operator, // ()
38
+ additive_binary_operator, // + - ~
39
+ multiplicative_binary_operator, // * / %
40
+ comparison_binary_operator, // < > <= >= == !=
41
+ unary_operator, // ! - +
42
+ comment, // {# ... #}
43
+ };
44
+ type t;
45
+ std::string value;
46
+ size_t pos;
47
+ };
48
+
49
+ static std::string type_to_string(token::type t) {
50
+ switch (t) {
51
+ case token::eof: return "eof";
52
+ case token::text: return "text";
53
+ case token::numeric_literal: return "numeric_literal";
54
+ case token::string_literal: return "string_literal";
55
+ case token::identifier: return "identifier";
56
+ case token::equals: return "equals";
57
+ case token::open_paren: return "open_paren";
58
+ case token::close_paren: return "close_paren";
59
+ case token::open_statement: return "open_statement";
60
+ case token::close_statement: return "close_statement";
61
+ case token::open_expression: return "open_expression";
62
+ case token::close_expression: return "close_expression";
63
+ case token::open_square_bracket: return "open_square_bracket";
64
+ case token::close_square_bracket: return "close_square_bracket";
65
+ case token::open_curly_bracket: return "open_curly_bracket";
66
+ case token::close_curly_bracket: return "close_curly_bracket";
67
+ case token::comma: return "comma";
68
+ case token::dot: return "dot";
69
+ case token::colon: return "colon";
70
+ case token::pipe: return "pipe";
71
+ case token::call_operator: return "call_operator";
72
+ case token::additive_binary_operator: return "additive_binary_operator";
73
+ case token::multiplicative_binary_operator: return "multiplicative_binary_operator";
74
+ case token::comparison_binary_operator: return "comparison_binary_operator";
75
+ case token::unary_operator: return "unary_operator";
76
+ case token::comment: return "comment";
77
+ default: return "unknown";
78
+ }
79
+ }
80
+
81
+ struct lexer_result {
82
+ std::vector<token> tokens;
83
+ std::string source;
84
+ };
85
+
86
+ struct lexer {
87
+ const std::map<char, char> escape_chars = {
88
+ {'n', '\n'},
89
+ {'t', '\t'},
90
+ {'r', '\r'},
91
+ {'b', '\b'},
92
+ {'f', '\f'},
93
+ {'v', '\v'},
94
+ {'\\', '\\'},
95
+ {'\'', '\''},
96
+ {'\"', '\"'},
97
+ };
98
+
99
+ static bool is_word(char c) {
100
+ return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
101
+ }
102
+
103
+ static bool is_integer(char c) {
104
+ return std::isdigit(static_cast<unsigned char>(c));
105
+ }
106
+
107
+ const std::vector<std::pair<std::string, token::type>> ordered_mapping_table = {
108
+ // Trimmed control sequences
109
+ {"{%-", token::open_statement},
110
+ {"-%}", token::close_statement},
111
+ {"{{-", token::open_expression},
112
+ {"-}}", token::close_expression},
113
+ // Control sequences
114
+ {"{%", token::open_statement},
115
+ {"%}", token::close_statement},
116
+ {"{{", token::open_expression},
117
+ {"}}", token::close_expression},
118
+ // Single character tokens
119
+ {"(", token::open_paren},
120
+ {")", token::close_paren},
121
+ {"{", token::open_curly_bracket},
122
+ {"}", token::close_curly_bracket},
123
+ {"[", token::open_square_bracket},
124
+ {"]", token::close_square_bracket},
125
+ {",", token::comma},
126
+ {".", token::dot},
127
+ {":", token::colon},
128
+ {"|", token::pipe},
129
+ // Comparison operators
130
+ {"<=", token::comparison_binary_operator},
131
+ {">=", token::comparison_binary_operator},
132
+ {"==", token::comparison_binary_operator},
133
+ {"!=", token::comparison_binary_operator},
134
+ {"<", token::comparison_binary_operator},
135
+ {">", token::comparison_binary_operator},
136
+ // Arithmetic operators
137
+ {"+", token::additive_binary_operator},
138
+ {"-", token::additive_binary_operator},
139
+ {"~", token::additive_binary_operator},
140
+ {"*", token::multiplicative_binary_operator},
141
+ {"/", token::multiplicative_binary_operator},
142
+ {"%", token::multiplicative_binary_operator},
143
+ // Assignment operator
144
+ {"=", token::equals},
145
+ };
146
+
147
+ // tokenize the source string into a list of tokens
148
+ // may throw lexer_exception on error
149
+ lexer_result tokenize(const std::string & source);
150
+ };
151
+
152
+ struct lexer_exception : public std::runtime_error {
153
+ lexer_exception(const std::string & msg, const std::string & source, size_t pos)
154
+ : std::runtime_error(fmt_error_with_source("lexer", msg, source, pos)) {}
155
+ };
156
+
157
+ } // namespace jinja