cui-llama.rn 1.3.6 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +22 -1
  2. package/android/src/main/CMakeLists.txt +25 -26
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +31 -9
  4. package/android/src/main/java/com/rnllama/RNLlama.java +98 -0
  5. package/android/src/main/jni-utils.h +94 -0
  6. package/android/src/main/jni.cpp +133 -63
  7. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +15 -0
  8. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +15 -0
  9. package/cpp/common.cpp +2085 -1982
  10. package/cpp/common.h +696 -664
  11. package/cpp/ggml-alloc.c +1042 -1037
  12. package/cpp/ggml-backend-impl.h +255 -256
  13. package/cpp/ggml-backend-reg.cpp +582 -582
  14. package/cpp/ggml-backend.cpp +2002 -2002
  15. package/cpp/ggml-backend.h +354 -352
  16. package/cpp/ggml-common.h +1853 -1853
  17. package/cpp/ggml-cpp.h +39 -39
  18. package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
  19. package/cpp/ggml-cpu-aarch64.h +8 -8
  20. package/cpp/ggml-cpu-impl.h +386 -386
  21. package/cpp/ggml-cpu-quants.c +10920 -10839
  22. package/cpp/ggml-cpu-traits.cpp +36 -36
  23. package/cpp/ggml-cpu-traits.h +38 -38
  24. package/cpp/ggml-cpu.c +14391 -14122
  25. package/cpp/ggml-cpu.cpp +635 -627
  26. package/cpp/ggml-cpu.h +135 -135
  27. package/cpp/ggml-impl.h +567 -567
  28. package/cpp/ggml-metal-impl.h +288 -0
  29. package/cpp/ggml-metal.m +4884 -4884
  30. package/cpp/ggml-opt.cpp +854 -0
  31. package/cpp/ggml-opt.h +216 -0
  32. package/cpp/ggml-quants.c +5238 -5238
  33. package/cpp/ggml-threading.h +14 -14
  34. package/cpp/ggml.c +6514 -6448
  35. package/cpp/ggml.h +2194 -2163
  36. package/cpp/gguf.cpp +1329 -1325
  37. package/cpp/gguf.h +202 -202
  38. package/cpp/json-schema-to-grammar.cpp +1045 -1045
  39. package/cpp/json-schema-to-grammar.h +8 -8
  40. package/cpp/json.hpp +24766 -24766
  41. package/cpp/llama-adapter.cpp +347 -346
  42. package/cpp/llama-adapter.h +74 -73
  43. package/cpp/llama-arch.cpp +1487 -1434
  44. package/cpp/llama-arch.h +400 -395
  45. package/cpp/llama-batch.cpp +368 -368
  46. package/cpp/llama-batch.h +88 -88
  47. package/cpp/llama-chat.cpp +578 -567
  48. package/cpp/llama-chat.h +52 -51
  49. package/cpp/llama-context.cpp +1775 -1771
  50. package/cpp/llama-context.h +128 -128
  51. package/cpp/llama-cparams.cpp +1 -1
  52. package/cpp/llama-cparams.h +37 -37
  53. package/cpp/llama-cpp.h +30 -30
  54. package/cpp/llama-grammar.cpp +1139 -1139
  55. package/cpp/llama-grammar.h +143 -143
  56. package/cpp/llama-hparams.cpp +71 -71
  57. package/cpp/llama-hparams.h +139 -140
  58. package/cpp/llama-impl.cpp +167 -167
  59. package/cpp/llama-impl.h +61 -61
  60. package/cpp/llama-kv-cache.cpp +718 -718
  61. package/cpp/llama-kv-cache.h +218 -218
  62. package/cpp/llama-mmap.cpp +590 -589
  63. package/cpp/llama-mmap.h +67 -67
  64. package/cpp/llama-model-loader.cpp +1124 -1011
  65. package/cpp/llama-model-loader.h +167 -158
  66. package/cpp/llama-model.cpp +3997 -2202
  67. package/cpp/llama-model.h +370 -391
  68. package/cpp/llama-sampling.cpp +2408 -2406
  69. package/cpp/llama-sampling.h +32 -48
  70. package/cpp/llama-vocab.cpp +3247 -1982
  71. package/cpp/llama-vocab.h +125 -182
  72. package/cpp/llama.cpp +10077 -12544
  73. package/cpp/llama.h +1323 -1285
  74. package/cpp/log.cpp +401 -401
  75. package/cpp/log.h +121 -121
  76. package/cpp/rn-llama.hpp +123 -116
  77. package/cpp/sampling.cpp +505 -500
  78. package/cpp/sgemm.cpp +2597 -2597
  79. package/cpp/sgemm.h +14 -14
  80. package/cpp/speculative.cpp +277 -274
  81. package/cpp/speculative.h +28 -28
  82. package/cpp/unicode.cpp +2 -3
  83. package/ios/RNLlama.mm +47 -0
  84. package/ios/RNLlamaContext.h +3 -1
  85. package/ios/RNLlamaContext.mm +71 -14
  86. package/jest/mock.js +15 -3
  87. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  88. package/lib/commonjs/index.js +33 -37
  89. package/lib/commonjs/index.js.map +1 -1
  90. package/lib/module/NativeRNLlama.js.map +1 -1
  91. package/lib/module/index.js +31 -35
  92. package/lib/module/index.js.map +1 -1
  93. package/lib/typescript/NativeRNLlama.d.ts +26 -6
  94. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  95. package/lib/typescript/index.d.ts +21 -36
  96. package/lib/typescript/index.d.ts.map +1 -1
  97. package/llama-rn.podspec +4 -18
  98. package/package.json +2 -3
  99. package/src/NativeRNLlama.ts +32 -13
  100. package/src/index.ts +52 -47
  101. package/cpp/llama.cpp.rej +0 -23
@@ -1,1139 +1,1139 @@
1
- #include "llama-grammar.h"
2
-
3
- #include "llama-impl.h"
4
- #include "llama-vocab.h"
5
- #include "llama-sampling.h"
6
-
7
- #include <cmath>
8
- #include <algorithm>
9
- #include <stdexcept>
10
-
11
- //
12
- // helpers
13
- //
14
-
15
- // NOTE: assumes valid utf8 (but checks for overrun)
16
- static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
17
- static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
18
- uint8_t first_byte = static_cast<uint8_t>(*src);
19
- uint8_t highbits = first_byte >> 4;
20
- int len = lookup[highbits];
21
- uint8_t mask = (1 << (8 - len)) - 1;
22
- uint32_t value = first_byte & mask;
23
- const char * end = src + len; // may overrun!
24
- const char * pos = src + 1;
25
- for ( ; pos < end && *pos; pos++) {
26
- value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
27
- }
28
- return std::make_pair(value, pos);
29
- }
30
-
31
- static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32
- const std::string & src,
33
- llama_partial_utf8 partial_start) {
34
- static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
35
- const char * pos = src.c_str();
36
- std::vector<uint32_t> code_points;
37
-
38
- // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39
- code_points.reserve(src.size() + 1);
40
- uint32_t value = partial_start.value;
41
- int n_remain = partial_start.n_remain;
42
-
43
- // continue previous decode, if applicable
44
- while (*pos != 0 && n_remain > 0) {
45
- uint8_t next_byte = static_cast<uint8_t>(*pos);
46
- if ((next_byte >> 6) != 2) {
47
- // invalid sequence, abort
48
- code_points.push_back(0);
49
- return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
50
- }
51
- value = (value << 6) + (next_byte & 0x3F);
52
- ++pos;
53
- --n_remain;
54
- }
55
-
56
- if (partial_start.n_remain > 0 && n_remain == 0) {
57
- code_points.push_back(value);
58
- }
59
-
60
- // decode any subsequent utf-8 sequences, which may end in an incomplete one
61
- while (*pos != 0) {
62
- uint8_t first_byte = static_cast<uint8_t>(*pos);
63
- uint8_t highbits = first_byte >> 4;
64
- n_remain = lookup[highbits] - 1;
65
-
66
- if (n_remain < 0) {
67
- // invalid sequence, abort
68
- code_points.clear();
69
- code_points.push_back(0);
70
- return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
71
- }
72
-
73
- uint8_t mask = (1 << (7 - n_remain)) - 1;
74
- value = first_byte & mask;
75
-
76
- ++pos;
77
- while (*pos != 0 && n_remain > 0) {
78
- value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
79
- ++pos;
80
- --n_remain;
81
- }
82
- if (n_remain == 0) {
83
- code_points.push_back(value);
84
- }
85
- }
86
- code_points.push_back(0);
87
-
88
- return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
89
- }
90
-
91
- static bool is_digit_char(char c) {
92
- return '0' <= c && c <= '9';
93
- }
94
-
95
- static bool is_word_char(char c) {
96
- return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
97
- }
98
-
99
- static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
100
- const char * pos = src;
101
- const char * end = src + size;
102
- uint32_t value = 0;
103
- for ( ; pos < end && *pos; pos++) {
104
- value <<= 4;
105
- char c = *pos;
106
- if ('a' <= c && c <= 'f') {
107
- value += c - 'a' + 10;
108
- } else if ('A' <= c && c <= 'F') {
109
- value += c - 'A' + 10;
110
- } else if ('0' <= c && c <= '9') {
111
- value += c - '0';
112
- } else {
113
- break;
114
- }
115
- }
116
- if (pos != end) {
117
- throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
118
- }
119
- return std::make_pair(value, pos);
120
- }
121
-
122
- static const char * parse_space(const char * src, bool newline_ok) {
123
- const char * pos = src;
124
- while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
125
- (newline_ok && (*pos == '\r' || *pos == '\n'))) {
126
- if (*pos == '#') {
127
- while (*pos && *pos != '\r' && *pos != '\n') {
128
- pos++;
129
- }
130
- } else {
131
- pos++;
132
- }
133
- }
134
- return pos;
135
- }
136
-
137
- static const char * parse_name(const char * src) {
138
- const char * pos = src;
139
- while (is_word_char(*pos)) {
140
- pos++;
141
- }
142
- if (pos == src) {
143
- throw std::runtime_error(std::string("expecting name at ") + src);
144
- }
145
- return pos;
146
- }
147
-
148
- static const char * parse_int(const char * src) {
149
- const char * pos = src;
150
- while (is_digit_char(*pos)) {
151
- pos++;
152
- }
153
- if (pos == src) {
154
- throw std::runtime_error(std::string("expecting integer at ") + src);
155
- }
156
- return pos;
157
- }
158
-
159
- static std::pair<uint32_t, const char *> parse_char(const char * src) {
160
- if (*src == '\\') {
161
- switch (src[1]) {
162
- case 'x': return parse_hex(src + 2, 2);
163
- case 'u': return parse_hex(src + 2, 4);
164
- case 'U': return parse_hex(src + 2, 8);
165
- case 't': return std::make_pair('\t', src + 2);
166
- case 'r': return std::make_pair('\r', src + 2);
167
- case 'n': return std::make_pair('\n', src + 2);
168
- case '\\':
169
- case '"':
170
- case '[':
171
- case ']':
172
- return std::make_pair(src[1], src + 2);
173
- default:
174
- throw std::runtime_error(std::string("unknown escape at ") + src);
175
- }
176
- } else if (*src) {
177
- return decode_utf8(src);
178
- }
179
- throw std::runtime_error("unexpected end of input");
180
- }
181
-
182
- static void print_grammar_char(FILE * file, uint32_t c) {
183
- if (0x20 <= c && c <= 0x7f) {
184
- fprintf(file, "%c", static_cast<char>(c));
185
- } else {
186
- // cop out of encoding UTF-8
187
- fprintf(file, "<U+%04X>", c);
188
- }
189
- }
190
-
191
- static bool is_char_element(llama_grammar_element elem) {
192
- switch (elem.type) {
193
- case LLAMA_GRETYPE_CHAR: return true;
194
- case LLAMA_GRETYPE_CHAR_NOT: return true;
195
- case LLAMA_GRETYPE_CHAR_ALT: return true;
196
- case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197
- case LLAMA_GRETYPE_CHAR_ANY: return true;
198
- default: return false;
199
- }
200
- }
201
-
202
- static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203
- for (auto elem : rule) {
204
- switch (elem.type) {
205
- case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
206
- case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
207
- case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
208
- case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
209
- case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
210
- case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
211
- case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
212
- case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
213
- }
214
- switch (elem.type) {
215
- case LLAMA_GRETYPE_END:
216
- case LLAMA_GRETYPE_ALT:
217
- case LLAMA_GRETYPE_RULE_REF:
218
- fprintf(file, "(%u) ", elem.value);
219
- break;
220
- case LLAMA_GRETYPE_CHAR:
221
- case LLAMA_GRETYPE_CHAR_NOT:
222
- case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223
- case LLAMA_GRETYPE_CHAR_ALT:
224
- case LLAMA_GRETYPE_CHAR_ANY:
225
- fprintf(file, "(\"");
226
- print_grammar_char(file, elem.value);
227
- fprintf(file, "\") ");
228
- break;
229
- }
230
- }
231
- fprintf(file, "\n");
232
- }
233
-
234
- static void print_rule(
235
- FILE * file,
236
- uint32_t rule_id,
237
- const llama_grammar_rule & rule,
238
- const std::map<uint32_t, std::string> & symbol_id_names) {
239
- if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
240
- throw std::runtime_error(
241
- "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
242
- }
243
- fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
244
- for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
245
- llama_grammar_element elem = rule[i];
246
- switch (elem.type) {
247
- case LLAMA_GRETYPE_END:
248
- throw std::runtime_error(
249
- "unexpected end of rule: " + std::to_string(rule_id) + "," +
250
- std::to_string(i));
251
- case LLAMA_GRETYPE_ALT:
252
- fprintf(file, "| ");
253
- break;
254
- case LLAMA_GRETYPE_RULE_REF:
255
- fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
256
- break;
257
- case LLAMA_GRETYPE_CHAR:
258
- fprintf(file, "[");
259
- print_grammar_char(file, elem.value);
260
- break;
261
- case LLAMA_GRETYPE_CHAR_NOT:
262
- fprintf(file, "[^");
263
- print_grammar_char(file, elem.value);
264
- break;
265
- case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266
- if (i == 0 || !is_char_element(rule[i - 1])) {
267
- throw std::runtime_error(
268
- "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269
- std::to_string(rule_id) + "," + std::to_string(i));
270
- }
271
- fprintf(file, "-");
272
- print_grammar_char(file, elem.value);
273
- break;
274
- case LLAMA_GRETYPE_CHAR_ALT:
275
- if (i == 0 || !is_char_element(rule[i - 1])) {
276
- throw std::runtime_error(
277
- "LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278
- std::to_string(rule_id) + "," + std::to_string(i));
279
- }
280
- print_grammar_char(file, elem.value);
281
- break;
282
- case LLAMA_GRETYPE_CHAR_ANY:
283
- fprintf(file, ".");
284
- break;
285
- }
286
- if (is_char_element(elem)) {
287
- switch (rule[i + 1].type) {
288
- case LLAMA_GRETYPE_CHAR_ALT:
289
- case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290
- case LLAMA_GRETYPE_CHAR_ANY:
291
- break;
292
- default:
293
- fprintf(file, "] ");
294
- }
295
- }
296
- }
297
- fprintf(file, "\n");
298
- }
299
-
300
- //
301
- // implementation
302
- //
303
-
304
- uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305
- uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306
- auto result = symbol_ids.emplace(std::string(src, len), next_id);
307
- return result.first->second;
308
- }
309
-
310
- uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311
- uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312
- symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
313
- return next_id;
314
- }
315
-
316
- void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317
- if (rules.size() <= rule_id) {
318
- rules.resize(rule_id + 1);
319
- }
320
- rules[rule_id] = rule;
321
- }
322
-
323
- const char * llama_grammar_parser::parse_alternates(
324
- const char * src,
325
- const std::string & rule_name,
326
- uint32_t rule_id,
327
- bool is_nested) {
328
- llama_grammar_rule rule;
329
- const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330
- while (*pos == '|') {
331
- rule.push_back({LLAMA_GRETYPE_ALT, 0});
332
- pos = parse_space(pos + 1, true);
333
- pos = parse_sequence(pos, rule_name, rule, is_nested);
334
- }
335
- rule.push_back({LLAMA_GRETYPE_END, 0});
336
- add_rule(rule_id, rule);
337
- return pos;
338
- }
339
-
340
- const char * llama_grammar_parser::parse_sequence(
341
- const char * src,
342
- const std::string & rule_name,
343
- llama_grammar_rule & rule,
344
- bool is_nested) {
345
- size_t last_sym_start = rule.size();
346
- const char * pos = src;
347
-
348
- auto handle_repetitions = [&](int min_times, int max_times) {
349
-
350
- if (last_sym_start == rule.size()) {
351
- throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
352
- }
353
-
354
- // apply transformation to previous symbol (last_sym_start to end) according to
355
- // the following rewrite rules:
356
- // S{m,n} --> S S S (m times) S'(n-m)
357
- // S'(x) ::= S S'(x-1) |
358
- // (... n-m definitions of these S' rules ...)
359
- // S'(1) ::= S |
360
- // S{m,} --> S S S (m times) S'
361
- // S' ::= S S' |
362
- // S* --> S{0,}
363
- // --> S' ::= S S' |
364
- // S+ --> S{1,}
365
- // --> S S'
366
- // S' ::= S S' |
367
- // S? --> S{0,1}
368
- // --> S'
369
- // S' ::= S |
370
-
371
- llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372
- if (min_times == 0) {
373
- rule.resize(last_sym_start);
374
- } else {
375
- // Repeat the previous elements (min_times - 1) times
376
- for (int i = 1; i < min_times; i++) {
377
- rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
378
- }
379
- }
380
-
381
- uint32_t last_rec_rule_id = 0;
382
- auto n_opt = max_times < 0 ? 1 : max_times - min_times;
383
-
384
- llama_grammar_rule rec_rule(prev_rule);
385
- for (int i = 0; i < n_opt; i++) {
386
- rec_rule.resize(prev_rule.size());
387
- uint32_t rec_rule_id = generate_symbol_id( rule_name);
388
- if (i > 0 || max_times < 0) {
389
- rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
390
- }
391
- rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
392
- rec_rule.push_back({LLAMA_GRETYPE_END, 0});
393
- add_rule( rec_rule_id, rec_rule);
394
- last_rec_rule_id = rec_rule_id;
395
- }
396
- if (n_opt > 0) {
397
- rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
398
- }
399
- };
400
-
401
- while (*pos) {
402
- if (*pos == '"') { // literal string
403
- pos++;
404
- last_sym_start = rule.size();
405
- while (*pos != '"') {
406
- if (!*pos) {
407
- throw std::runtime_error("unexpected end of input");
408
- }
409
- auto char_pair = parse_char(pos);
410
- pos = char_pair.second;
411
- rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
412
- }
413
- pos = parse_space(pos + 1, is_nested);
414
- } else if (*pos == '[') { // char range(s)
415
- pos++;
416
- enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417
- if (*pos == '^') {
418
- pos++;
419
- start_type = LLAMA_GRETYPE_CHAR_NOT;
420
- }
421
- last_sym_start = rule.size();
422
- while (*pos != ']') {
423
- if (!*pos) {
424
- throw std::runtime_error("unexpected end of input");
425
- }
426
- auto char_pair = parse_char(pos);
427
- pos = char_pair.second;
428
- enum llama_gretype type = last_sym_start < rule.size()
429
- ? LLAMA_GRETYPE_CHAR_ALT
430
- : start_type;
431
-
432
- rule.push_back({type, char_pair.first});
433
- if (pos[0] == '-' && pos[1] != ']') {
434
- if (!pos[1]) {
435
- throw std::runtime_error("unexpected end of input");
436
- }
437
- auto endchar_pair = parse_char(pos + 1);
438
- pos = endchar_pair.second;
439
- rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
440
- }
441
- }
442
- pos = parse_space(pos + 1, is_nested);
443
- } else if (is_word_char(*pos)) { // rule reference
444
- const char * name_end = parse_name(pos);
445
- uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
446
- pos = parse_space(name_end, is_nested);
447
- last_sym_start = rule.size();
448
- rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
449
- } else if (*pos == '(') { // grouping
450
- // parse nested alternates into synthesized rule
451
- pos = parse_space(pos + 1, true);
452
- uint32_t sub_rule_id = generate_symbol_id(rule_name);
453
- pos = parse_alternates(pos, rule_name, sub_rule_id, true);
454
- last_sym_start = rule.size();
455
- // output reference to synthesized rule
456
- rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
457
- if (*pos != ')') {
458
- throw std::runtime_error(std::string("expecting ')' at ") + pos);
459
- }
460
- pos = parse_space(pos + 1, is_nested);
461
- } else if (*pos == '.') { // any char
462
- last_sym_start = rule.size();
463
- rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
464
- pos = parse_space(pos + 1, is_nested);
465
- } else if (*pos == '*') {
466
- pos = parse_space(pos + 1, is_nested);
467
- handle_repetitions(0, -1);
468
- } else if (*pos == '+') {
469
- pos = parse_space(pos + 1, is_nested);
470
- handle_repetitions(1, -1);
471
- } else if (*pos == '?') {
472
- pos = parse_space(pos + 1, is_nested);
473
- handle_repetitions(0, 1);
474
- } else if (*pos == '{') {
475
- pos = parse_space(pos + 1, is_nested);
476
-
477
- if (!is_digit_char(*pos)) {
478
- throw std::runtime_error(std::string("expecting an int at ") + pos);
479
- }
480
- const char * int_end = parse_int(pos);
481
- int min_times = std::stoul(std::string(pos, int_end - pos));
482
- pos = parse_space(int_end, is_nested);
483
-
484
- int max_times = -1;
485
-
486
- if (*pos == '}') {
487
- max_times = min_times;
488
- pos = parse_space(pos + 1, is_nested);
489
- } else if (*pos == ',') {
490
- pos = parse_space(pos + 1, is_nested);
491
-
492
- if (is_digit_char(*pos)) {
493
- const char * int_end = parse_int(pos);
494
- max_times = std::stoul(std::string(pos, int_end - pos));
495
- pos = parse_space(int_end, is_nested);
496
- }
497
-
498
- if (*pos != '}') {
499
- throw std::runtime_error(std::string("expecting '}' at ") + pos);
500
- }
501
- pos = parse_space(pos + 1, is_nested);
502
- } else {
503
- throw std::runtime_error(std::string("expecting ',' at ") + pos);
504
- }
505
- handle_repetitions(min_times, max_times);
506
- } else {
507
- break;
508
- }
509
- }
510
- return pos;
511
- }
512
-
513
- const char * llama_grammar_parser::parse_rule(const char * src) {
514
- const char * name_end = parse_name(src);
515
- const char * pos = parse_space(name_end, false);
516
- size_t name_len = name_end - src;
517
- uint32_t rule_id = get_symbol_id(src, name_len);
518
- const std::string name(src, name_len);
519
-
520
- if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
521
- throw std::runtime_error(std::string("expecting ::= at ") + pos);
522
- }
523
- pos = parse_space(pos + 3, true);
524
-
525
- pos = parse_alternates(pos, name, rule_id, false);
526
-
527
- if (*pos == '\r') {
528
- pos += pos[1] == '\n' ? 2 : 1;
529
- } else if (*pos == '\n') {
530
- pos++;
531
- } else if (*pos) {
532
- throw std::runtime_error(std::string("expecting newline or end at ") + pos);
533
- }
534
- return parse_space(pos, true);
535
- }
536
-
537
- bool llama_grammar_parser::parse(const char * src) {
538
- try {
539
- const char * pos = parse_space(src, true);
540
- while (*pos) {
541
- pos = parse_rule(pos);
542
- }
543
- // Validate the state to ensure that all rules are defined
544
- for (const auto & rule : rules) {
545
- if (rule.empty()) {
546
- throw std::runtime_error("Undefined rule");
547
- }
548
- for (const auto & elem : rule) {
549
- if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550
- // Ensure that the rule at that location exists
551
- if (elem.value >= rules.size() || rules[elem.value].empty()) {
552
- // Get the name of the rule that is missing
553
- for (const auto & kv : symbol_ids) {
554
- if (kv.second == elem.value) {
555
- throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
556
- }
557
- }
558
- }
559
- }
560
- }
561
- }
562
- } catch (const std::exception & err) {
563
- fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
564
- rules.clear();
565
- return false;
566
- }
567
-
568
- return true;
569
- }
570
-
571
- void llama_grammar_parser::print(FILE * file) {
572
- try {
573
- std::map<uint32_t, std::string> symbol_id_names;
574
- for (const auto & kv : symbol_ids) {
575
- symbol_id_names[kv.second] = kv.first;
576
- }
577
- for (size_t i = 0, end = rules.size(); i < end; i++) {
578
- // fprintf(file, "%zu: ", i);
579
- // print_rule_binary(file, rules[i]);
580
- print_rule(file, uint32_t(i), rules[i], symbol_id_names);
581
- // fprintf(file, "\n");
582
- }
583
- } catch (const std::exception & err) {
584
- fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
585
- }
586
- }
587
-
588
- llama_grammar_stack llama_grammar_parser::c_rules() const {
589
- llama_grammar_stack ret;
590
- ret.reserve(rules.size());
591
- for (const auto & rule : rules) {
592
- ret.push_back(rule.data());
593
- }
594
- return ret;
595
- }
596
-
597
- // returns true iff pos points to the end of one of the definitions of a rule
598
- static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599
- switch (pos->type) {
600
- case LLAMA_GRETYPE_END: return true; // NOLINT
601
- case LLAMA_GRETYPE_ALT: return true; // NOLINT
602
- default: return false;
603
- }
604
- }
605
-
606
- // returns true iff chr satisfies the char range at pos (regular or inverse range)
607
- // asserts that pos is pointing to a char range element
608
- static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609
- const llama_grammar_element * pos,
610
- const uint32_t chr) {
611
- bool found = false;
612
- bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
-
614
- LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
-
616
- do {
617
- if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618
- // inclusive range, e.g. [a-z]
619
- found = found || (pos->value <= chr && chr <= pos[1].value);
620
- pos += 2;
621
- } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622
- // Any character matches "."
623
- found = true;
624
- pos += 1;
625
- } else {
626
- // exact char match, e.g. [a] or "a"
627
- found = found || pos->value == chr;
628
- pos += 1;
629
- }
630
- } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
-
632
- return std::make_pair(found == is_positive_char, pos);
633
- }
634
-
635
- // returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636
- // range at pos (regular or inverse range)
637
- // asserts that pos is pointing to a char range element
638
- static bool llama_grammar_match_partial_char(
639
- const llama_grammar_element * pos,
640
- const llama_partial_utf8 partial_utf8) {
641
- bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
642
- LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
-
644
- uint32_t partial_value = partial_utf8.value;
645
- int n_remain = partial_utf8.n_remain;
646
-
647
- // invalid sequence or 7-bit char split across 2 bytes (overlong)
648
- if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
649
- return false;
650
- }
651
-
652
- // range of possible code points this partial UTF-8 sequence could complete to
653
- uint32_t low = partial_value << (n_remain * 6);
654
- uint32_t high = low | ((1 << (n_remain * 6)) - 1);
655
-
656
- if (low == 0) {
657
- if (n_remain == 2) {
658
- low = 1 << 11;
659
- } else if (n_remain == 3) {
660
- low = 1 << 16;
661
- }
662
- }
663
-
664
- do {
665
- if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666
- // inclusive range, e.g. [a-z]
667
- if (pos->value <= high && low <= pos[1].value) {
668
- return is_positive_char;
669
- }
670
- pos += 2;
671
- } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672
- // Any character matches "."
673
- return true;
674
- } else {
675
- // exact char match, e.g. [a] or "a"
676
- if (low <= pos->value && pos->value <= high) {
677
- return is_positive_char;
678
- }
679
- pos += 1;
680
- }
681
- } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
-
683
- return !is_positive_char;
684
- }
685
-
686
- // transforms a grammar pushdown stack into N possible stacks, all ending
687
- // at a character range (terminal element)
688
- static void llama_grammar_advance_stack(
689
- const llama_grammar_rules & rules,
690
- const llama_grammar_stack & stack,
691
- llama_grammar_stacks & new_stacks) {
692
- if (stack.empty()) {
693
- if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
694
- new_stacks.emplace_back(stack);
695
- }
696
- return;
697
- }
698
-
699
- const llama_grammar_element * pos = stack.back();
700
-
701
- switch (pos->type) {
702
- case LLAMA_GRETYPE_RULE_REF: {
703
- const size_t rule_id = static_cast<size_t>(pos->value);
704
- const llama_grammar_element * subpos = rules[rule_id].data();
705
- do {
706
- // init new stack without the top (pos)
707
- llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
708
- if (!llama_grammar_is_end_of_sequence(pos + 1)) {
709
- // if this rule ref is followed by another element, add that to stack
710
- new_stack.push_back(pos + 1);
711
- }
712
- if (!llama_grammar_is_end_of_sequence(subpos)) {
713
- // if alternate is nonempty, add to stack
714
- new_stack.push_back(subpos);
715
- }
716
- llama_grammar_advance_stack(rules, new_stack, new_stacks);
717
- while (!llama_grammar_is_end_of_sequence(subpos)) {
718
- // scan to end of alternate def
719
- subpos++;
720
- }
721
- if (subpos->type == LLAMA_GRETYPE_ALT) {
722
- // there's another alternate def of this rule to process
723
- subpos++;
724
- } else {
725
- break;
726
- }
727
- } while (true);
728
- break;
729
- }
730
- case LLAMA_GRETYPE_CHAR:
731
- case LLAMA_GRETYPE_CHAR_NOT:
732
- case LLAMA_GRETYPE_CHAR_ANY:
733
- if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
734
- // only add the stack if it's not a duplicate of one we already have
735
- new_stacks.emplace_back(stack);
736
- }
737
- break;
738
- default:
739
- // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740
- // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741
- // those
742
- LM_GGML_ABORT("fatal error");
743
- }
744
- }
745
-
746
- static llama_grammar_candidates llama_grammar_reject_candidates(
747
- const llama_grammar_rules & rules,
748
- const llama_grammar_stacks & stacks,
749
- const llama_grammar_candidates & candidates) {
750
- LM_GGML_ASSERT(!stacks.empty()); // REVIEW
751
-
752
- if (candidates.empty()) {
753
- return {};
754
- }
755
-
756
- auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
757
-
758
- for (size_t i = 1, size = stacks.size(); i < size; ++i) {
759
- rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
760
- }
761
-
762
- return rejects;
763
- }
764
-
765
- static bool llama_grammar_detect_left_recursion(
766
- const llama_grammar_rules & rules,
767
- size_t rule_index,
768
- std::vector<bool> * rules_visited,
769
- std::vector<bool> * rules_in_progress,
770
- std::vector<bool> * rules_may_be_empty) {
771
- if ((*rules_in_progress)[rule_index]) {
772
- return true;
773
- }
774
-
775
- (*rules_in_progress)[rule_index] = true;
776
-
777
- const llama_grammar_rule & rule = rules[rule_index];
778
-
779
- // First check if the rule might produce the empty string. This could be done combined with the second
780
- // step but it's more readable as two steps.
781
- bool at_rule_start = true;
782
- for (size_t i = 0; i < rule.size(); i++) {
783
- if (llama_grammar_is_end_of_sequence(&rule[i])) {
784
- if (at_rule_start) {
785
- (*rules_may_be_empty)[rule_index] = true;
786
- break;
787
- }
788
- at_rule_start = true;
789
- } else {
790
- at_rule_start = false;
791
- }
792
- }
793
-
794
- // Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795
- // be empty)
796
- bool recurse_into_nonterminal = true;
797
- for (size_t i = 0; i < rule.size(); i++) {
798
- if (rule[i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799
- if (llama_grammar_detect_left_recursion(rules, (size_t)rule[i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800
- return true;
801
- }
802
- if (!((*rules_may_be_empty)[(size_t)rule[i].value])) {
803
- recurse_into_nonterminal = false;
804
- }
805
- } else if (llama_grammar_is_end_of_sequence(&rule[i])) {
806
- recurse_into_nonterminal = true;
807
- } else {
808
- recurse_into_nonterminal = false;
809
- }
810
- }
811
-
812
- (*rules_in_progress)[rule_index] = false;
813
- (*rules_visited)[rule_index] = true;
814
-
815
- return false;
816
- }
817
-
818
- const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819
- return grammar->rules;
820
- }
821
-
822
- llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823
- return grammar->stacks;
824
- }
825
-
826
- void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827
- llama_grammar_stacks stacks_new;
828
- stacks_new.reserve(grammar->stacks.size());
829
-
830
- for (const auto & stack : grammar->stacks) {
831
- if (stack.empty()) {
832
- continue;
833
- }
834
-
835
- auto match = llama_grammar_match_char(stack.back(), chr);
836
- if (match.first) {
837
- const llama_grammar_element * pos = match.second;
838
-
839
- // update top of stack to next element, if any
840
- llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
841
- if (!llama_grammar_is_end_of_sequence(pos)) {
842
- new_stack.push_back(pos);
843
- }
844
- llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
845
- }
846
- }
847
-
848
- grammar->stacks = std::move(stacks_new);
849
- }
850
-
851
- llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852
- const llama_grammar_rules & rules,
853
- const llama_grammar_stack & stack,
854
- const llama_grammar_candidates & candidates) {
855
-
856
- llama_grammar_candidates rejects;
857
- rejects.reserve(candidates.size());
858
-
859
- if (stack.empty()) {
860
- for (const auto & tok : candidates) {
861
- if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
862
- rejects.push_back(tok);
863
- }
864
- }
865
- return rejects;
866
- }
867
-
868
- const llama_grammar_element * stack_pos = stack.back();
869
-
870
- llama_grammar_candidates next_candidates;
871
- next_candidates.reserve(candidates.size());
872
-
873
- for (const auto & tok : candidates) {
874
- if (*tok.code_points == 0) {
875
- // reached end of full codepoints in token, reject iff it ended in a partial sequence
876
- // that cannot satisfy this position in grammar
877
- if (tok.partial_utf8.n_remain != 0 &&
878
- !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
879
- rejects.push_back(tok);
880
- }
881
- } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
882
- next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
883
- } else {
884
- rejects.push_back(tok);
885
- }
886
- }
887
-
888
- const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
889
-
890
- // update top of stack to next element, if any
891
- llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
892
- if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
893
- stack_after.push_back(stack_pos_after);
894
- }
895
- llama_grammar_stacks next_stacks;
896
- llama_grammar_advance_stack(rules, stack_after, next_stacks);
897
-
898
- auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
899
- for (const auto & tok : next_rejects) {
900
- rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
901
- }
902
-
903
- return rejects;
904
- }
905
-
906
- ////////////////////
907
-
908
- struct llama_grammar * llama_grammar_init_impl(
909
- const struct llama_vocab * vocab,
910
- const llama_grammar_element ** rules,
911
- size_t n_rules,
912
- size_t start_rule_index) {
913
- const llama_grammar_element * pos;
914
-
915
- // copy rule definitions into vectors
916
- llama_grammar_rules vec_rules(n_rules);
917
- for (size_t i = 0; i < n_rules; i++) {
918
- for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919
- vec_rules[i].push_back(*pos);
920
- }
921
- vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
922
- }
923
-
924
- // Check for left recursion
925
- std::vector<bool> rules_visited(n_rules);
926
- std::vector<bool> rules_in_progress(n_rules);
927
- std::vector<bool> rules_may_be_empty(n_rules);
928
- for (size_t i = 0; i < n_rules; i++) {
929
- if (rules_visited[i]) {
930
- continue;
931
- }
932
- if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
933
- LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934
- return nullptr;
935
- }
936
- }
937
-
938
- // loop over alternates of start rule to build initial stacks
939
- llama_grammar_stacks stacks;
940
- pos = vec_rules[start_rule_index].data();
941
- do {
942
- llama_grammar_stack stack;
943
- if (!llama_grammar_is_end_of_sequence(pos)) {
944
- // if alternate is nonempty, add to stack
945
- stack.push_back(pos);
946
- }
947
- llama_grammar_advance_stack(vec_rules, stack, stacks);
948
- while (!llama_grammar_is_end_of_sequence(pos)) {
949
- // scan to end of alternate def
950
- pos++;
951
- }
952
- if (pos->type == LLAMA_GRETYPE_ALT) {
953
- // there's another alternate def of this rule to process
954
- pos++;
955
- } else {
956
- break;
957
- }
958
- } while (true);
959
-
960
- // Important: vec_rules has to be moved here, not copied, because stacks contains
961
- // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962
- // then the pointers would be invalidated when the local vec_rules goes out of scope.
963
- return new llama_grammar { vocab, std::move(vec_rules), std::move(stacks), {}, };
964
- }
965
-
966
- struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) {
967
- llama_grammar_parser parser;
968
-
969
- // if there is a grammar, parse it
970
- if (!parser.parse(grammar_str)) {
971
- return nullptr;
972
- }
973
-
974
- // will be empty (default) if there are parse errors
975
- if (parser.rules.empty()) {
976
- fprintf(stderr, "%s: failed to parse grammar\n", __func__);
977
- return nullptr;
978
- }
979
-
980
- // Ensure that there is a "root" node.
981
- if (parser.symbol_ids.find("root") == parser.symbol_ids.end()) {
982
- fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
983
- return nullptr;
984
- }
985
-
986
- std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
987
-
988
- const size_t n_rules = grammar_rules.size();
989
- const size_t start_rule_index = parser.symbol_ids.at(grammar_root);
990
-
991
- const llama_grammar_element * pos;
992
-
993
- // copy rule definitions into vectors
994
- llama_grammar_rules vec_rules(n_rules);
995
- for (size_t i = 0; i < n_rules; i++) {
996
- for (pos = grammar_rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
997
- vec_rules[i].push_back(*pos);
998
- }
999
- vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
1000
- }
1001
-
1002
- // Check for left recursion
1003
- std::vector<bool> rules_visited(n_rules);
1004
- std::vector<bool> rules_in_progress(n_rules);
1005
- std::vector<bool> rules_may_be_empty(n_rules);
1006
- for (size_t i = 0; i < n_rules; i++) {
1007
- if (rules_visited[i]) {
1008
- continue;
1009
- }
1010
- if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
1011
- LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1012
- return nullptr;
1013
- }
1014
- }
1015
-
1016
- // loop over alternates of start rule to build initial stacks
1017
- llama_grammar_stacks stacks;
1018
- pos = vec_rules[start_rule_index].data();
1019
- do {
1020
- llama_grammar_stack stack;
1021
- if (!llama_grammar_is_end_of_sequence(pos)) {
1022
- // if alternate is nonempty, add to stack
1023
- stack.push_back(pos);
1024
- }
1025
- llama_grammar_advance_stack(vec_rules, stack, stacks);
1026
- while (!llama_grammar_is_end_of_sequence(pos)) {
1027
- // scan to end of alternate def
1028
- pos++;
1029
- }
1030
- if (pos->type == LLAMA_GRETYPE_ALT) {
1031
- // there's another alternate def of this rule to process
1032
- pos++;
1033
- } else {
1034
- break;
1035
- }
1036
- } while (true);
1037
-
1038
- // Important: vec_rules has to be moved here, not copied, because stacks contains
1039
- // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1040
- // then the pointers would be invalidated when the local vec_rules goes out of scope.
1041
- return new llama_grammar { vocab, std::move(vec_rules), std::move(stacks), {}, };
1042
- }
1043
-
1044
- void llama_grammar_free_impl(struct llama_grammar * grammar) {
1045
- if (grammar == nullptr) {
1046
- return;
1047
- }
1048
-
1049
- delete grammar;
1050
- }
1051
-
1052
- struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1053
- llama_grammar * result = new llama_grammar {
1054
- grammar.vocab,
1055
- grammar.rules,
1056
- grammar.stacks,
1057
- grammar.partial_utf8,
1058
- };
1059
-
1060
- // redirect elements in stacks to point to new rules
1061
- for (size_t is = 0; is < result->stacks.size(); is++) {
1062
- for (size_t ie = 0; ie < result->stacks[is].size(); ie++) {
1063
- for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1064
- for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1065
- if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1066
- result->stacks[is][ie] = &result->rules[ir0][ir1];
1067
- }
1068
- }
1069
- }
1070
- }
1071
- }
1072
-
1073
- return result;
1074
- }
1075
-
1076
- void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1077
- LM_GGML_ASSERT(grammar.vocab != nullptr);
1078
-
1079
- bool allow_eog = false;
1080
- for (const auto & stack : grammar.stacks) {
1081
- if (stack.empty()) {
1082
- allow_eog = true;
1083
- break;
1084
- }
1085
- }
1086
-
1087
- std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1088
- candidates_decoded.reserve(cur_p->size);
1089
-
1090
- llama_grammar_candidates candidates_grammar;
1091
- candidates_grammar.reserve(cur_p->size);
1092
-
1093
- for (size_t i = 0; i < cur_p->size; ++i) {
1094
- const llama_token id = cur_p->data[i].id;
1095
- const std::string & piece = grammar.vocab->cache_token_to_piece.at(id);
1096
-
1097
- if (llama_token_is_eog_impl(*grammar.vocab, id)) {
1098
- if (!allow_eog) {
1099
- cur_p->data[i].logit = -INFINITY;
1100
- }
1101
- } else if (piece.empty() || piece[0] == 0) {
1102
- cur_p->data[i].logit = -INFINITY;
1103
- } else {
1104
- candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
1105
- candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
1106
- }
1107
- }
1108
-
1109
- const auto rejects = llama_grammar_reject_candidates(grammar.rules, grammar.stacks, candidates_grammar);
1110
- for (const auto & reject : rejects) {
1111
- cur_p->data[reject.index].logit = -INFINITY;
1112
- }
1113
- }
1114
-
1115
- void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1116
- LM_GGML_ASSERT(grammar.vocab != nullptr);
1117
-
1118
- if (llama_token_is_eog_impl(*grammar.vocab, token)) {
1119
- for (const auto & stack : grammar.stacks) {
1120
- if (stack.empty()) {
1121
- return;
1122
- }
1123
- }
1124
- LM_GGML_ABORT("fatal error");
1125
- }
1126
-
1127
- const std::string & piece = grammar.vocab->cache_token_to_piece.at(token);
1128
-
1129
- // Note terminating 0 in decoded string
1130
- const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1131
- const auto & code_points = decoded.first;
1132
-
1133
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1134
- llama_grammar_accept(&grammar, *it);
1135
- }
1136
-
1137
- grammar.partial_utf8 = decoded.second;
1138
- LM_GGML_ASSERT(!grammar.stacks.empty());
1139
- }
1
+ #include "llama-grammar.h"
2
+
3
+ #include "llama-impl.h"
4
+ #include "llama-vocab.h"
5
+ #include "llama-sampling.h"
6
+
7
+ #include <cmath>
8
+ #include <algorithm>
9
+ #include <stdexcept>
10
+
11
+ //
12
+ // helpers
13
+ //
14
+
15
+ // NOTE: assumes valid utf8 (but checks for overrun)
16
+ static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
17
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
18
+ uint8_t first_byte = static_cast<uint8_t>(*src);
19
+ uint8_t highbits = first_byte >> 4;
20
+ int len = lookup[highbits];
21
+ uint8_t mask = (1 << (8 - len)) - 1;
22
+ uint32_t value = first_byte & mask;
23
+ const char * end = src + len; // may overrun!
24
+ const char * pos = src + 1;
25
+ for ( ; pos < end && *pos; pos++) {
26
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
27
+ }
28
+ return std::make_pair(value, pos);
29
+ }
30
+
31
+ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32
+ const std::string & src,
33
+ llama_partial_utf8 partial_start) {
34
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
35
+ const char * pos = src.c_str();
36
+ std::vector<uint32_t> code_points;
37
+
38
+ // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39
+ code_points.reserve(src.size() + 1);
40
+ uint32_t value = partial_start.value;
41
+ int n_remain = partial_start.n_remain;
42
+
43
+ // continue previous decode, if applicable
44
+ while (*pos != 0 && n_remain > 0) {
45
+ uint8_t next_byte = static_cast<uint8_t>(*pos);
46
+ if ((next_byte >> 6) != 2) {
47
+ // invalid sequence, abort
48
+ code_points.push_back(0);
49
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
50
+ }
51
+ value = (value << 6) + (next_byte & 0x3F);
52
+ ++pos;
53
+ --n_remain;
54
+ }
55
+
56
+ if (partial_start.n_remain > 0 && n_remain == 0) {
57
+ code_points.push_back(value);
58
+ }
59
+
60
+ // decode any subsequent utf-8 sequences, which may end in an incomplete one
61
+ while (*pos != 0) {
62
+ uint8_t first_byte = static_cast<uint8_t>(*pos);
63
+ uint8_t highbits = first_byte >> 4;
64
+ n_remain = lookup[highbits] - 1;
65
+
66
+ if (n_remain < 0) {
67
+ // invalid sequence, abort
68
+ code_points.clear();
69
+ code_points.push_back(0);
70
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
71
+ }
72
+
73
+ uint8_t mask = (1 << (7 - n_remain)) - 1;
74
+ value = first_byte & mask;
75
+
76
+ ++pos;
77
+ while (*pos != 0 && n_remain > 0) {
78
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
79
+ ++pos;
80
+ --n_remain;
81
+ }
82
+ if (n_remain == 0) {
83
+ code_points.push_back(value);
84
+ }
85
+ }
86
+ code_points.push_back(0);
87
+
88
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
89
+ }
90
+
91
+ static bool is_digit_char(char c) {
92
+ return '0' <= c && c <= '9';
93
+ }
94
+
95
+ static bool is_word_char(char c) {
96
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
97
+ }
98
+
99
+ static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
100
+ const char * pos = src;
101
+ const char * end = src + size;
102
+ uint32_t value = 0;
103
+ for ( ; pos < end && *pos; pos++) {
104
+ value <<= 4;
105
+ char c = *pos;
106
+ if ('a' <= c && c <= 'f') {
107
+ value += c - 'a' + 10;
108
+ } else if ('A' <= c && c <= 'F') {
109
+ value += c - 'A' + 10;
110
+ } else if ('0' <= c && c <= '9') {
111
+ value += c - '0';
112
+ } else {
113
+ break;
114
+ }
115
+ }
116
+ if (pos != end) {
117
+ throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
118
+ }
119
+ return std::make_pair(value, pos);
120
+ }
121
+
122
+ static const char * parse_space(const char * src, bool newline_ok) {
123
+ const char * pos = src;
124
+ while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
125
+ (newline_ok && (*pos == '\r' || *pos == '\n'))) {
126
+ if (*pos == '#') {
127
+ while (*pos && *pos != '\r' && *pos != '\n') {
128
+ pos++;
129
+ }
130
+ } else {
131
+ pos++;
132
+ }
133
+ }
134
+ return pos;
135
+ }
136
+
137
+ static const char * parse_name(const char * src) {
138
+ const char * pos = src;
139
+ while (is_word_char(*pos)) {
140
+ pos++;
141
+ }
142
+ if (pos == src) {
143
+ throw std::runtime_error(std::string("expecting name at ") + src);
144
+ }
145
+ return pos;
146
+ }
147
+
148
+ static const char * parse_int(const char * src) {
149
+ const char * pos = src;
150
+ while (is_digit_char(*pos)) {
151
+ pos++;
152
+ }
153
+ if (pos == src) {
154
+ throw std::runtime_error(std::string("expecting integer at ") + src);
155
+ }
156
+ return pos;
157
+ }
158
+
159
+ static std::pair<uint32_t, const char *> parse_char(const char * src) {
160
+ if (*src == '\\') {
161
+ switch (src[1]) {
162
+ case 'x': return parse_hex(src + 2, 2);
163
+ case 'u': return parse_hex(src + 2, 4);
164
+ case 'U': return parse_hex(src + 2, 8);
165
+ case 't': return std::make_pair('\t', src + 2);
166
+ case 'r': return std::make_pair('\r', src + 2);
167
+ case 'n': return std::make_pair('\n', src + 2);
168
+ case '\\':
169
+ case '"':
170
+ case '[':
171
+ case ']':
172
+ return std::make_pair(src[1], src + 2);
173
+ default:
174
+ throw std::runtime_error(std::string("unknown escape at ") + src);
175
+ }
176
+ } else if (*src) {
177
+ return decode_utf8(src);
178
+ }
179
+ throw std::runtime_error("unexpected end of input");
180
+ }
181
+
182
+ static void print_grammar_char(FILE * file, uint32_t c) {
183
+ if (0x20 <= c && c <= 0x7f) {
184
+ fprintf(file, "%c", static_cast<char>(c));
185
+ } else {
186
+ // cop out of encoding UTF-8
187
+ fprintf(file, "<U+%04X>", c);
188
+ }
189
+ }
190
+
191
+ static bool is_char_element(llama_grammar_element elem) {
192
+ switch (elem.type) {
193
+ case LLAMA_GRETYPE_CHAR: return true;
194
+ case LLAMA_GRETYPE_CHAR_NOT: return true;
195
+ case LLAMA_GRETYPE_CHAR_ALT: return true;
196
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197
+ case LLAMA_GRETYPE_CHAR_ANY: return true;
198
+ default: return false;
199
+ }
200
+ }
201
+
202
+ static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203
+ for (auto elem : rule) {
204
+ switch (elem.type) {
205
+ case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
206
+ case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
207
+ case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
208
+ case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
209
+ case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
210
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
211
+ case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
212
+ case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
213
+ }
214
+ switch (elem.type) {
215
+ case LLAMA_GRETYPE_END:
216
+ case LLAMA_GRETYPE_ALT:
217
+ case LLAMA_GRETYPE_RULE_REF:
218
+ fprintf(file, "(%u) ", elem.value);
219
+ break;
220
+ case LLAMA_GRETYPE_CHAR:
221
+ case LLAMA_GRETYPE_CHAR_NOT:
222
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223
+ case LLAMA_GRETYPE_CHAR_ALT:
224
+ case LLAMA_GRETYPE_CHAR_ANY:
225
+ fprintf(file, "(\"");
226
+ print_grammar_char(file, elem.value);
227
+ fprintf(file, "\") ");
228
+ break;
229
+ }
230
+ }
231
+ fprintf(file, "\n");
232
+ }
233
+
234
+ static void print_rule(
235
+ FILE * file,
236
+ uint32_t rule_id,
237
+ const llama_grammar_rule & rule,
238
+ const std::map<uint32_t, std::string> & symbol_id_names) {
239
+ if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
240
+ throw std::runtime_error(
241
+ "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
242
+ }
243
+ fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
244
+ for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
245
+ llama_grammar_element elem = rule[i];
246
+ switch (elem.type) {
247
+ case LLAMA_GRETYPE_END:
248
+ throw std::runtime_error(
249
+ "unexpected end of rule: " + std::to_string(rule_id) + "," +
250
+ std::to_string(i));
251
+ case LLAMA_GRETYPE_ALT:
252
+ fprintf(file, "| ");
253
+ break;
254
+ case LLAMA_GRETYPE_RULE_REF:
255
+ fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
256
+ break;
257
+ case LLAMA_GRETYPE_CHAR:
258
+ fprintf(file, "[");
259
+ print_grammar_char(file, elem.value);
260
+ break;
261
+ case LLAMA_GRETYPE_CHAR_NOT:
262
+ fprintf(file, "[^");
263
+ print_grammar_char(file, elem.value);
264
+ break;
265
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266
+ if (i == 0 || !is_char_element(rule[i - 1])) {
267
+ throw std::runtime_error(
268
+ "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269
+ std::to_string(rule_id) + "," + std::to_string(i));
270
+ }
271
+ fprintf(file, "-");
272
+ print_grammar_char(file, elem.value);
273
+ break;
274
+ case LLAMA_GRETYPE_CHAR_ALT:
275
+ if (i == 0 || !is_char_element(rule[i - 1])) {
276
+ throw std::runtime_error(
277
+ "LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278
+ std::to_string(rule_id) + "," + std::to_string(i));
279
+ }
280
+ print_grammar_char(file, elem.value);
281
+ break;
282
+ case LLAMA_GRETYPE_CHAR_ANY:
283
+ fprintf(file, ".");
284
+ break;
285
+ }
286
+ if (is_char_element(elem)) {
287
+ switch (rule[i + 1].type) {
288
+ case LLAMA_GRETYPE_CHAR_ALT:
289
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290
+ case LLAMA_GRETYPE_CHAR_ANY:
291
+ break;
292
+ default:
293
+ fprintf(file, "] ");
294
+ }
295
+ }
296
+ }
297
+ fprintf(file, "\n");
298
+ }
299
+
300
+ //
301
+ // implementation
302
+ //
303
+
304
+ uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306
+ auto result = symbol_ids.emplace(std::string(src, len), next_id);
307
+ return result.first->second;
308
+ }
309
+
310
+ uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312
+ symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
313
+ return next_id;
314
+ }
315
+
316
+ void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317
+ if (rules.size() <= rule_id) {
318
+ rules.resize(rule_id + 1);
319
+ }
320
+ rules[rule_id] = rule;
321
+ }
322
+
323
+ const char * llama_grammar_parser::parse_alternates(
324
+ const char * src,
325
+ const std::string & rule_name,
326
+ uint32_t rule_id,
327
+ bool is_nested) {
328
+ llama_grammar_rule rule;
329
+ const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330
+ while (*pos == '|') {
331
+ rule.push_back({LLAMA_GRETYPE_ALT, 0});
332
+ pos = parse_space(pos + 1, true);
333
+ pos = parse_sequence(pos, rule_name, rule, is_nested);
334
+ }
335
+ rule.push_back({LLAMA_GRETYPE_END, 0});
336
+ add_rule(rule_id, rule);
337
+ return pos;
338
+ }
339
+
340
+ const char * llama_grammar_parser::parse_sequence(
341
+ const char * src,
342
+ const std::string & rule_name,
343
+ llama_grammar_rule & rule,
344
+ bool is_nested) {
345
+ size_t last_sym_start = rule.size();
346
+ const char * pos = src;
347
+
348
+ auto handle_repetitions = [&](int min_times, int max_times) {
349
+
350
+ if (last_sym_start == rule.size()) {
351
+ throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
352
+ }
353
+
354
+ // apply transformation to previous symbol (last_sym_start to end) according to
355
+ // the following rewrite rules:
356
+ // S{m,n} --> S S S (m times) S'(n-m)
357
+ // S'(x) ::= S S'(x-1) |
358
+ // (... n-m definitions of these S' rules ...)
359
+ // S'(1) ::= S |
360
+ // S{m,} --> S S S (m times) S'
361
+ // S' ::= S S' |
362
+ // S* --> S{0,}
363
+ // --> S' ::= S S' |
364
+ // S+ --> S{1,}
365
+ // --> S S'
366
+ // S' ::= S S' |
367
+ // S? --> S{0,1}
368
+ // --> S'
369
+ // S' ::= S |
370
+
371
+ llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372
+ if (min_times == 0) {
373
+ rule.resize(last_sym_start);
374
+ } else {
375
+ // Repeat the previous elements (min_times - 1) times
376
+ for (int i = 1; i < min_times; i++) {
377
+ rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
378
+ }
379
+ }
380
+
381
+ uint32_t last_rec_rule_id = 0;
382
+ auto n_opt = max_times < 0 ? 1 : max_times - min_times;
383
+
384
+ llama_grammar_rule rec_rule(prev_rule);
385
+ for (int i = 0; i < n_opt; i++) {
386
+ rec_rule.resize(prev_rule.size());
387
+ uint32_t rec_rule_id = generate_symbol_id( rule_name);
388
+ if (i > 0 || max_times < 0) {
389
+ rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
390
+ }
391
+ rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
392
+ rec_rule.push_back({LLAMA_GRETYPE_END, 0});
393
+ add_rule( rec_rule_id, rec_rule);
394
+ last_rec_rule_id = rec_rule_id;
395
+ }
396
+ if (n_opt > 0) {
397
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
398
+ }
399
+ };
400
+
401
+ while (*pos) {
402
+ if (*pos == '"') { // literal string
403
+ pos++;
404
+ last_sym_start = rule.size();
405
+ while (*pos != '"') {
406
+ if (!*pos) {
407
+ throw std::runtime_error("unexpected end of input");
408
+ }
409
+ auto char_pair = parse_char(pos);
410
+ pos = char_pair.second;
411
+ rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
412
+ }
413
+ pos = parse_space(pos + 1, is_nested);
414
+ } else if (*pos == '[') { // char range(s)
415
+ pos++;
416
+ enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417
+ if (*pos == '^') {
418
+ pos++;
419
+ start_type = LLAMA_GRETYPE_CHAR_NOT;
420
+ }
421
+ last_sym_start = rule.size();
422
+ while (*pos != ']') {
423
+ if (!*pos) {
424
+ throw std::runtime_error("unexpected end of input");
425
+ }
426
+ auto char_pair = parse_char(pos);
427
+ pos = char_pair.second;
428
+ enum llama_gretype type = last_sym_start < rule.size()
429
+ ? LLAMA_GRETYPE_CHAR_ALT
430
+ : start_type;
431
+
432
+ rule.push_back({type, char_pair.first});
433
+ if (pos[0] == '-' && pos[1] != ']') {
434
+ if (!pos[1]) {
435
+ throw std::runtime_error("unexpected end of input");
436
+ }
437
+ auto endchar_pair = parse_char(pos + 1);
438
+ pos = endchar_pair.second;
439
+ rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
440
+ }
441
+ }
442
+ pos = parse_space(pos + 1, is_nested);
443
+ } else if (is_word_char(*pos)) { // rule reference
444
+ const char * name_end = parse_name(pos);
445
+ uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
446
+ pos = parse_space(name_end, is_nested);
447
+ last_sym_start = rule.size();
448
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
449
+ } else if (*pos == '(') { // grouping
450
+ // parse nested alternates into synthesized rule
451
+ pos = parse_space(pos + 1, true);
452
+ uint32_t sub_rule_id = generate_symbol_id(rule_name);
453
+ pos = parse_alternates(pos, rule_name, sub_rule_id, true);
454
+ last_sym_start = rule.size();
455
+ // output reference to synthesized rule
456
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
457
+ if (*pos != ')') {
458
+ throw std::runtime_error(std::string("expecting ')' at ") + pos);
459
+ }
460
+ pos = parse_space(pos + 1, is_nested);
461
+ } else if (*pos == '.') { // any char
462
+ last_sym_start = rule.size();
463
+ rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
464
+ pos = parse_space(pos + 1, is_nested);
465
+ } else if (*pos == '*') {
466
+ pos = parse_space(pos + 1, is_nested);
467
+ handle_repetitions(0, -1);
468
+ } else if (*pos == '+') {
469
+ pos = parse_space(pos + 1, is_nested);
470
+ handle_repetitions(1, -1);
471
+ } else if (*pos == '?') {
472
+ pos = parse_space(pos + 1, is_nested);
473
+ handle_repetitions(0, 1);
474
+ } else if (*pos == '{') {
475
+ pos = parse_space(pos + 1, is_nested);
476
+
477
+ if (!is_digit_char(*pos)) {
478
+ throw std::runtime_error(std::string("expecting an int at ") + pos);
479
+ }
480
+ const char * int_end = parse_int(pos);
481
+ int min_times = std::stoul(std::string(pos, int_end - pos));
482
+ pos = parse_space(int_end, is_nested);
483
+
484
+ int max_times = -1;
485
+
486
+ if (*pos == '}') {
487
+ max_times = min_times;
488
+ pos = parse_space(pos + 1, is_nested);
489
+ } else if (*pos == ',') {
490
+ pos = parse_space(pos + 1, is_nested);
491
+
492
+ if (is_digit_char(*pos)) {
493
+ const char * int_end = parse_int(pos);
494
+ max_times = std::stoul(std::string(pos, int_end - pos));
495
+ pos = parse_space(int_end, is_nested);
496
+ }
497
+
498
+ if (*pos != '}') {
499
+ throw std::runtime_error(std::string("expecting '}' at ") + pos);
500
+ }
501
+ pos = parse_space(pos + 1, is_nested);
502
+ } else {
503
+ throw std::runtime_error(std::string("expecting ',' at ") + pos);
504
+ }
505
+ handle_repetitions(min_times, max_times);
506
+ } else {
507
+ break;
508
+ }
509
+ }
510
+ return pos;
511
+ }
512
+
513
+ const char * llama_grammar_parser::parse_rule(const char * src) {
514
+ const char * name_end = parse_name(src);
515
+ const char * pos = parse_space(name_end, false);
516
+ size_t name_len = name_end - src;
517
+ uint32_t rule_id = get_symbol_id(src, name_len);
518
+ const std::string name(src, name_len);
519
+
520
+ if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
521
+ throw std::runtime_error(std::string("expecting ::= at ") + pos);
522
+ }
523
+ pos = parse_space(pos + 3, true);
524
+
525
+ pos = parse_alternates(pos, name, rule_id, false);
526
+
527
+ if (*pos == '\r') {
528
+ pos += pos[1] == '\n' ? 2 : 1;
529
+ } else if (*pos == '\n') {
530
+ pos++;
531
+ } else if (*pos) {
532
+ throw std::runtime_error(std::string("expecting newline or end at ") + pos);
533
+ }
534
+ return parse_space(pos, true);
535
+ }
536
+
537
+ bool llama_grammar_parser::parse(const char * src) {
538
+ try {
539
+ const char * pos = parse_space(src, true);
540
+ while (*pos) {
541
+ pos = parse_rule(pos);
542
+ }
543
+ // Validate the state to ensure that all rules are defined
544
+ for (const auto & rule : rules) {
545
+ if (rule.empty()) {
546
+ throw std::runtime_error("Undefined rule");
547
+ }
548
+ for (const auto & elem : rule) {
549
+ if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550
+ // Ensure that the rule at that location exists
551
+ if (elem.value >= rules.size() || rules[elem.value].empty()) {
552
+ // Get the name of the rule that is missing
553
+ for (const auto & kv : symbol_ids) {
554
+ if (kv.second == elem.value) {
555
+ throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
556
+ }
557
+ }
558
+ }
559
+ }
560
+ }
561
+ }
562
+ } catch (const std::exception & err) {
563
+ fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
564
+ rules.clear();
565
+ return false;
566
+ }
567
+
568
+ return true;
569
+ }
570
+
571
+ void llama_grammar_parser::print(FILE * file) {
572
+ try {
573
+ std::map<uint32_t, std::string> symbol_id_names;
574
+ for (const auto & kv : symbol_ids) {
575
+ symbol_id_names[kv.second] = kv.first;
576
+ }
577
+ for (size_t i = 0, end = rules.size(); i < end; i++) {
578
+ // fprintf(file, "%zu: ", i);
579
+ // print_rule_binary(file, rules[i]);
580
+ print_rule(file, uint32_t(i), rules[i], symbol_id_names);
581
+ // fprintf(file, "\n");
582
+ }
583
+ } catch (const std::exception & err) {
584
+ fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
585
+ }
586
+ }
587
+
588
+ llama_grammar_stack llama_grammar_parser::c_rules() const {
589
+ llama_grammar_stack ret;
590
+ ret.reserve(rules.size());
591
+ for (const auto & rule : rules) {
592
+ ret.push_back(rule.data());
593
+ }
594
+ return ret;
595
+ }
596
+
597
+ // returns true iff pos points to the end of one of the definitions of a rule
598
+ static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599
+ switch (pos->type) {
600
+ case LLAMA_GRETYPE_END: return true; // NOLINT
601
+ case LLAMA_GRETYPE_ALT: return true; // NOLINT
602
+ default: return false;
603
+ }
604
+ }
605
+
606
+ // returns true iff chr satisfies the char range at pos (regular or inverse range)
607
+ // asserts that pos is pointing to a char range element
608
+ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609
+ const llama_grammar_element * pos,
610
+ const uint32_t chr) {
611
+ bool found = false;
612
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
+
614
+ LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
+
616
+ do {
617
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618
+ // inclusive range, e.g. [a-z]
619
+ found = found || (pos->value <= chr && chr <= pos[1].value);
620
+ pos += 2;
621
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622
+ // Any character matches "."
623
+ found = true;
624
+ pos += 1;
625
+ } else {
626
+ // exact char match, e.g. [a] or "a"
627
+ found = found || pos->value == chr;
628
+ pos += 1;
629
+ }
630
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
+
632
+ return std::make_pair(found == is_positive_char, pos);
633
+ }
634
+
635
+ // returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636
+ // range at pos (regular or inverse range)
637
+ // asserts that pos is pointing to a char range element
638
+ static bool llama_grammar_match_partial_char(
639
+ const llama_grammar_element * pos,
640
+ const llama_partial_utf8 partial_utf8) {
641
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
642
+ LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
+
644
+ uint32_t partial_value = partial_utf8.value;
645
+ int n_remain = partial_utf8.n_remain;
646
+
647
+ // invalid sequence or 7-bit char split across 2 bytes (overlong)
648
+ if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
649
+ return false;
650
+ }
651
+
652
+ // range of possible code points this partial UTF-8 sequence could complete to
653
+ uint32_t low = partial_value << (n_remain * 6);
654
+ uint32_t high = low | ((1 << (n_remain * 6)) - 1);
655
+
656
+ if (low == 0) {
657
+ if (n_remain == 2) {
658
+ low = 1 << 11;
659
+ } else if (n_remain == 3) {
660
+ low = 1 << 16;
661
+ }
662
+ }
663
+
664
+ do {
665
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666
+ // inclusive range, e.g. [a-z]
667
+ if (pos->value <= high && low <= pos[1].value) {
668
+ return is_positive_char;
669
+ }
670
+ pos += 2;
671
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672
+ // Any character matches "."
673
+ return true;
674
+ } else {
675
+ // exact char match, e.g. [a] or "a"
676
+ if (low <= pos->value && pos->value <= high) {
677
+ return is_positive_char;
678
+ }
679
+ pos += 1;
680
+ }
681
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
+
683
+ return !is_positive_char;
684
+ }
685
+
686
+ // transforms a grammar pushdown stack into N possible stacks, all ending
687
+ // at a character range (terminal element)
688
+ static void llama_grammar_advance_stack(
689
+ const llama_grammar_rules & rules,
690
+ const llama_grammar_stack & stack,
691
+ llama_grammar_stacks & new_stacks) {
692
+ if (stack.empty()) {
693
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
694
+ new_stacks.emplace_back(stack);
695
+ }
696
+ return;
697
+ }
698
+
699
+ const llama_grammar_element * pos = stack.back();
700
+
701
+ switch (pos->type) {
702
+ case LLAMA_GRETYPE_RULE_REF: {
703
+ const size_t rule_id = static_cast<size_t>(pos->value);
704
+ const llama_grammar_element * subpos = rules[rule_id].data();
705
+ do {
706
+ // init new stack without the top (pos)
707
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
708
+ if (!llama_grammar_is_end_of_sequence(pos + 1)) {
709
+ // if this rule ref is followed by another element, add that to stack
710
+ new_stack.push_back(pos + 1);
711
+ }
712
+ if (!llama_grammar_is_end_of_sequence(subpos)) {
713
+ // if alternate is nonempty, add to stack
714
+ new_stack.push_back(subpos);
715
+ }
716
+ llama_grammar_advance_stack(rules, new_stack, new_stacks);
717
+ while (!llama_grammar_is_end_of_sequence(subpos)) {
718
+ // scan to end of alternate def
719
+ subpos++;
720
+ }
721
+ if (subpos->type == LLAMA_GRETYPE_ALT) {
722
+ // there's another alternate def of this rule to process
723
+ subpos++;
724
+ } else {
725
+ break;
726
+ }
727
+ } while (true);
728
+ break;
729
+ }
730
+ case LLAMA_GRETYPE_CHAR:
731
+ case LLAMA_GRETYPE_CHAR_NOT:
732
+ case LLAMA_GRETYPE_CHAR_ANY:
733
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
734
+ // only add the stack if it's not a duplicate of one we already have
735
+ new_stacks.emplace_back(stack);
736
+ }
737
+ break;
738
+ default:
739
+ // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740
+ // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741
+ // those
742
+ LM_GGML_ABORT("fatal error");
743
+ }
744
+ }
745
+
746
+ static llama_grammar_candidates llama_grammar_reject_candidates(
747
+ const llama_grammar_rules & rules,
748
+ const llama_grammar_stacks & stacks,
749
+ const llama_grammar_candidates & candidates) {
750
+ LM_GGML_ASSERT(!stacks.empty()); // REVIEW
751
+
752
+ if (candidates.empty()) {
753
+ return {};
754
+ }
755
+
756
+ auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
757
+
758
+ for (size_t i = 1, size = stacks.size(); i < size; ++i) {
759
+ rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
760
+ }
761
+
762
+ return rejects;
763
+ }
764
+
765
+ static bool llama_grammar_detect_left_recursion(
766
+ const llama_grammar_rules & rules,
767
+ size_t rule_index,
768
+ std::vector<bool> * rules_visited,
769
+ std::vector<bool> * rules_in_progress,
770
+ std::vector<bool> * rules_may_be_empty) {
771
+ if ((*rules_in_progress)[rule_index]) {
772
+ return true;
773
+ }
774
+
775
+ (*rules_in_progress)[rule_index] = true;
776
+
777
+ const llama_grammar_rule & rule = rules[rule_index];
778
+
779
+ // First check if the rule might produce the empty string. This could be done combined with the second
780
+ // step but it's more readable as two steps.
781
+ bool at_rule_start = true;
782
+ for (size_t i = 0; i < rule.size(); i++) {
783
+ if (llama_grammar_is_end_of_sequence(&rule[i])) {
784
+ if (at_rule_start) {
785
+ (*rules_may_be_empty)[rule_index] = true;
786
+ break;
787
+ }
788
+ at_rule_start = true;
789
+ } else {
790
+ at_rule_start = false;
791
+ }
792
+ }
793
+
794
+ // Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795
+ // be empty)
796
+ bool recurse_into_nonterminal = true;
797
+ for (size_t i = 0; i < rule.size(); i++) {
798
+ if (rule[i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799
+ if (llama_grammar_detect_left_recursion(rules, (size_t)rule[i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800
+ return true;
801
+ }
802
+ if (!((*rules_may_be_empty)[(size_t)rule[i].value])) {
803
+ recurse_into_nonterminal = false;
804
+ }
805
+ } else if (llama_grammar_is_end_of_sequence(&rule[i])) {
806
+ recurse_into_nonterminal = true;
807
+ } else {
808
+ recurse_into_nonterminal = false;
809
+ }
810
+ }
811
+
812
+ (*rules_in_progress)[rule_index] = false;
813
+ (*rules_visited)[rule_index] = true;
814
+
815
+ return false;
816
+ }
817
+
818
+ const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819
+ return grammar->rules;
820
+ }
821
+
822
+ llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823
+ return grammar->stacks;
824
+ }
825
+
826
+ void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827
+ llama_grammar_stacks stacks_new;
828
+ stacks_new.reserve(grammar->stacks.size());
829
+
830
+ for (const auto & stack : grammar->stacks) {
831
+ if (stack.empty()) {
832
+ continue;
833
+ }
834
+
835
+ auto match = llama_grammar_match_char(stack.back(), chr);
836
+ if (match.first) {
837
+ const llama_grammar_element * pos = match.second;
838
+
839
+ // update top of stack to next element, if any
840
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
841
+ if (!llama_grammar_is_end_of_sequence(pos)) {
842
+ new_stack.push_back(pos);
843
+ }
844
+ llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
845
+ }
846
+ }
847
+
848
+ grammar->stacks = std::move(stacks_new);
849
+ }
850
+
851
+ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852
+ const llama_grammar_rules & rules,
853
+ const llama_grammar_stack & stack,
854
+ const llama_grammar_candidates & candidates) {
855
+
856
+ llama_grammar_candidates rejects;
857
+ rejects.reserve(candidates.size());
858
+
859
+ if (stack.empty()) {
860
+ for (const auto & tok : candidates) {
861
+ if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
862
+ rejects.push_back(tok);
863
+ }
864
+ }
865
+ return rejects;
866
+ }
867
+
868
+ const llama_grammar_element * stack_pos = stack.back();
869
+
870
+ llama_grammar_candidates next_candidates;
871
+ next_candidates.reserve(candidates.size());
872
+
873
+ for (const auto & tok : candidates) {
874
+ if (*tok.code_points == 0) {
875
+ // reached end of full codepoints in token, reject iff it ended in a partial sequence
876
+ // that cannot satisfy this position in grammar
877
+ if (tok.partial_utf8.n_remain != 0 &&
878
+ !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
879
+ rejects.push_back(tok);
880
+ }
881
+ } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
882
+ next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
883
+ } else {
884
+ rejects.push_back(tok);
885
+ }
886
+ }
887
+
888
+ const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
889
+
890
+ // update top of stack to next element, if any
891
+ llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
892
+ if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
893
+ stack_after.push_back(stack_pos_after);
894
+ }
895
+ llama_grammar_stacks next_stacks;
896
+ llama_grammar_advance_stack(rules, stack_after, next_stacks);
897
+
898
+ auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
899
+ for (const auto & tok : next_rejects) {
900
+ rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
901
+ }
902
+
903
+ return rejects;
904
+ }
905
+
906
+ ////////////////////
907
+
908
+ struct llama_grammar * llama_grammar_init_impl(
909
+ const struct llama_vocab * vocab,
910
+ const llama_grammar_element ** rules,
911
+ size_t n_rules,
912
+ size_t start_rule_index) {
913
+ const llama_grammar_element * pos;
914
+
915
+ // copy rule definitions into vectors
916
+ llama_grammar_rules vec_rules(n_rules);
917
+ for (size_t i = 0; i < n_rules; i++) {
918
+ for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919
+ vec_rules[i].push_back(*pos);
920
+ }
921
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
922
+ }
923
+
924
+ // Check for left recursion
925
+ std::vector<bool> rules_visited(n_rules);
926
+ std::vector<bool> rules_in_progress(n_rules);
927
+ std::vector<bool> rules_may_be_empty(n_rules);
928
+ for (size_t i = 0; i < n_rules; i++) {
929
+ if (rules_visited[i]) {
930
+ continue;
931
+ }
932
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
933
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934
+ return nullptr;
935
+ }
936
+ }
937
+
938
+ // loop over alternates of start rule to build initial stacks
939
+ llama_grammar_stacks stacks;
940
+ pos = vec_rules[start_rule_index].data();
941
+ do {
942
+ llama_grammar_stack stack;
943
+ if (!llama_grammar_is_end_of_sequence(pos)) {
944
+ // if alternate is nonempty, add to stack
945
+ stack.push_back(pos);
946
+ }
947
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
948
+ while (!llama_grammar_is_end_of_sequence(pos)) {
949
+ // scan to end of alternate def
950
+ pos++;
951
+ }
952
+ if (pos->type == LLAMA_GRETYPE_ALT) {
953
+ // there's another alternate def of this rule to process
954
+ pos++;
955
+ } else {
956
+ break;
957
+ }
958
+ } while (true);
959
+
960
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
961
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
963
+ return new llama_grammar { vocab, std::move(vec_rules), std::move(stacks), {}, };
964
+ }
965
+
966
+ struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) {
967
+ llama_grammar_parser parser;
968
+
969
+ // if there is a grammar, parse it
970
+ if (!parser.parse(grammar_str)) {
971
+ return nullptr;
972
+ }
973
+
974
+ // will be empty (default) if there are parse errors
975
+ if (parser.rules.empty()) {
976
+ fprintf(stderr, "%s: failed to parse grammar\n", __func__);
977
+ return nullptr;
978
+ }
979
+
980
+ // Ensure that there is a "root" node.
981
+ if (parser.symbol_ids.find("root") == parser.symbol_ids.end()) {
982
+ fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
983
+ return nullptr;
984
+ }
985
+
986
+ std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
987
+
988
+ const size_t n_rules = grammar_rules.size();
989
+ const size_t start_rule_index = parser.symbol_ids.at(grammar_root);
990
+
991
+ const llama_grammar_element * pos;
992
+
993
+ // copy rule definitions into vectors
994
+ llama_grammar_rules vec_rules(n_rules);
995
+ for (size_t i = 0; i < n_rules; i++) {
996
+ for (pos = grammar_rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
997
+ vec_rules[i].push_back(*pos);
998
+ }
999
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
1000
+ }
1001
+
1002
+ // Check for left recursion
1003
+ std::vector<bool> rules_visited(n_rules);
1004
+ std::vector<bool> rules_in_progress(n_rules);
1005
+ std::vector<bool> rules_may_be_empty(n_rules);
1006
+ for (size_t i = 0; i < n_rules; i++) {
1007
+ if (rules_visited[i]) {
1008
+ continue;
1009
+ }
1010
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
1011
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1012
+ return nullptr;
1013
+ }
1014
+ }
1015
+
1016
+ // loop over alternates of start rule to build initial stacks
1017
+ llama_grammar_stacks stacks;
1018
+ pos = vec_rules[start_rule_index].data();
1019
+ do {
1020
+ llama_grammar_stack stack;
1021
+ if (!llama_grammar_is_end_of_sequence(pos)) {
1022
+ // if alternate is nonempty, add to stack
1023
+ stack.push_back(pos);
1024
+ }
1025
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
1026
+ while (!llama_grammar_is_end_of_sequence(pos)) {
1027
+ // scan to end of alternate def
1028
+ pos++;
1029
+ }
1030
+ if (pos->type == LLAMA_GRETYPE_ALT) {
1031
+ // there's another alternate def of this rule to process
1032
+ pos++;
1033
+ } else {
1034
+ break;
1035
+ }
1036
+ } while (true);
1037
+
1038
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
1039
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1040
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
1041
+ return new llama_grammar { vocab, std::move(vec_rules), std::move(stacks), {}, };
1042
+ }
1043
+
1044
+ void llama_grammar_free_impl(struct llama_grammar * grammar) {
1045
+ if (grammar == nullptr) {
1046
+ return;
1047
+ }
1048
+
1049
+ delete grammar;
1050
+ }
1051
+
1052
+ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1053
+ llama_grammar * result = new llama_grammar {
1054
+ grammar.vocab,
1055
+ grammar.rules,
1056
+ grammar.stacks,
1057
+ grammar.partial_utf8,
1058
+ };
1059
+
1060
+ // redirect elements in stacks to point to new rules
1061
+ for (size_t is = 0; is < result->stacks.size(); is++) {
1062
+ for (size_t ie = 0; ie < result->stacks[is].size(); ie++) {
1063
+ for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1064
+ for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1065
+ if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1066
+ result->stacks[is][ie] = &result->rules[ir0][ir1];
1067
+ }
1068
+ }
1069
+ }
1070
+ }
1071
+ }
1072
+
1073
+ return result;
1074
+ }
1075
+
1076
+ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1077
+ LM_GGML_ASSERT(grammar.vocab != nullptr);
1078
+
1079
+ bool allow_eog = false;
1080
+ for (const auto & stack : grammar.stacks) {
1081
+ if (stack.empty()) {
1082
+ allow_eog = true;
1083
+ break;
1084
+ }
1085
+ }
1086
+
1087
+ std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1088
+ candidates_decoded.reserve(cur_p->size);
1089
+
1090
+ llama_grammar_candidates candidates_grammar;
1091
+ candidates_grammar.reserve(cur_p->size);
1092
+
1093
+ for (size_t i = 0; i < cur_p->size; ++i) {
1094
+ const llama_token id = cur_p->data[i].id;
1095
+ const std::string & piece = grammar.vocab->token_to_piece(id);
1096
+
1097
+ if (grammar.vocab->is_eog(id)) {
1098
+ if (!allow_eog) {
1099
+ cur_p->data[i].logit = -INFINITY;
1100
+ }
1101
+ } else if (piece.empty() || piece[0] == 0) {
1102
+ cur_p->data[i].logit = -INFINITY;
1103
+ } else {
1104
+ candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
1105
+ candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
1106
+ }
1107
+ }
1108
+
1109
+ const auto rejects = llama_grammar_reject_candidates(grammar.rules, grammar.stacks, candidates_grammar);
1110
+ for (const auto & reject : rejects) {
1111
+ cur_p->data[reject.index].logit = -INFINITY;
1112
+ }
1113
+ }
1114
+
1115
+ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1116
+ LM_GGML_ASSERT(grammar.vocab != nullptr);
1117
+
1118
+ if (grammar.vocab->is_eog(token)) {
1119
+ for (const auto & stack : grammar.stacks) {
1120
+ if (stack.empty()) {
1121
+ return;
1122
+ }
1123
+ }
1124
+ LM_GGML_ABORT("fatal error");
1125
+ }
1126
+
1127
+ const std::string & piece = grammar.vocab->token_to_piece(token);
1128
+
1129
+ // Note terminating 0 in decoded string
1130
+ const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1131
+ const auto & code_points = decoded.first;
1132
+
1133
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1134
+ llama_grammar_accept(&grammar, *it);
1135
+ }
1136
+
1137
+ grammar.partial_utf8 = decoded.second;
1138
+ LM_GGML_ASSERT(!grammar.stacks.empty());
1139
+ }