cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,1219 +1,1219 @@
1
- #include "llama-grammar.h"
2
-
3
- #include "llama-impl.h"
4
- #include "llama-vocab.h"
5
- #include "llama-sampling.h"
6
-
7
- #include <cmath>
8
- #include <algorithm>
9
- #include <stdexcept>
10
-
11
- //
12
- // helpers
13
- //
14
-
15
- // NOTE: assumes valid utf8 (but checks for overrun)
16
- static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
17
- static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
18
- uint8_t first_byte = static_cast<uint8_t>(*src);
19
- uint8_t highbits = first_byte >> 4;
20
- int len = lookup[highbits];
21
- uint8_t mask = (1 << (8 - len)) - 1;
22
- uint32_t value = first_byte & mask;
23
- const char * end = src + len; // may overrun!
24
- const char * pos = src + 1;
25
- for ( ; pos < end && *pos; pos++) {
26
- value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
27
- }
28
- return std::make_pair(value, pos);
29
- }
30
-
31
- static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32
- const std::string & src,
33
- llama_partial_utf8 partial_start) {
34
- static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
35
- const char * pos = src.c_str();
36
- std::vector<uint32_t> code_points;
37
-
38
- // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39
- code_points.reserve(src.size() + 1);
40
- uint32_t value = partial_start.value;
41
- int n_remain = partial_start.n_remain;
42
-
43
- // continue previous decode, if applicable
44
- while (*pos != 0 && n_remain > 0) {
45
- uint8_t next_byte = static_cast<uint8_t>(*pos);
46
- if ((next_byte >> 6) != 2) {
47
- // invalid sequence, abort
48
- code_points.push_back(0);
49
- return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
50
- }
51
- value = (value << 6) + (next_byte & 0x3F);
52
- ++pos;
53
- --n_remain;
54
- }
55
-
56
- if (partial_start.n_remain > 0 && n_remain == 0) {
57
- code_points.push_back(value);
58
- }
59
-
60
- // decode any subsequent utf-8 sequences, which may end in an incomplete one
61
- while (*pos != 0) {
62
- uint8_t first_byte = static_cast<uint8_t>(*pos);
63
- uint8_t highbits = first_byte >> 4;
64
- n_remain = lookup[highbits] - 1;
65
-
66
- if (n_remain < 0) {
67
- // invalid sequence, abort
68
- code_points.clear();
69
- code_points.push_back(0);
70
- return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
71
- }
72
-
73
- uint8_t mask = (1 << (7 - n_remain)) - 1;
74
- value = first_byte & mask;
75
-
76
- ++pos;
77
- while (*pos != 0 && n_remain > 0) {
78
- value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
79
- ++pos;
80
- --n_remain;
81
- }
82
- if (n_remain == 0) {
83
- code_points.push_back(value);
84
- }
85
- }
86
- code_points.push_back(0);
87
-
88
- return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
89
- }
90
-
91
- static bool is_digit_char(char c) {
92
- return '0' <= c && c <= '9';
93
- }
94
-
95
- static bool is_word_char(char c) {
96
- return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
97
- }
98
-
99
- static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
100
- const char * pos = src;
101
- const char * end = src + size;
102
- uint32_t value = 0;
103
- for ( ; pos < end && *pos; pos++) {
104
- value <<= 4;
105
- char c = *pos;
106
- if ('a' <= c && c <= 'f') {
107
- value += c - 'a' + 10;
108
- } else if ('A' <= c && c <= 'F') {
109
- value += c - 'A' + 10;
110
- } else if ('0' <= c && c <= '9') {
111
- value += c - '0';
112
- } else {
113
- break;
114
- }
115
- }
116
- if (pos != end) {
117
- throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
118
- }
119
- return std::make_pair(value, pos);
120
- }
121
-
122
- static const char * parse_space(const char * src, bool newline_ok) {
123
- const char * pos = src;
124
- while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
125
- (newline_ok && (*pos == '\r' || *pos == '\n'))) {
126
- if (*pos == '#') {
127
- while (*pos && *pos != '\r' && *pos != '\n') {
128
- pos++;
129
- }
130
- } else {
131
- pos++;
132
- }
133
- }
134
- return pos;
135
- }
136
-
137
- static const char * parse_name(const char * src) {
138
- const char * pos = src;
139
- while (is_word_char(*pos)) {
140
- pos++;
141
- }
142
- if (pos == src) {
143
- throw std::runtime_error(std::string("expecting name at ") + src);
144
- }
145
- return pos;
146
- }
147
-
148
- static const char * parse_int(const char * src) {
149
- const char * pos = src;
150
- while (is_digit_char(*pos)) {
151
- pos++;
152
- }
153
- if (pos == src) {
154
- throw std::runtime_error(std::string("expecting integer at ") + src);
155
- }
156
- return pos;
157
- }
158
-
159
- static std::pair<uint32_t, const char *> parse_char(const char * src) {
160
- if (*src == '\\') {
161
- switch (src[1]) {
162
- case 'x': return parse_hex(src + 2, 2);
163
- case 'u': return parse_hex(src + 2, 4);
164
- case 'U': return parse_hex(src + 2, 8);
165
- case 't': return std::make_pair('\t', src + 2);
166
- case 'r': return std::make_pair('\r', src + 2);
167
- case 'n': return std::make_pair('\n', src + 2);
168
- case '\\':
169
- case '"':
170
- case '[':
171
- case ']':
172
- return std::make_pair(src[1], src + 2);
173
- default:
174
- throw std::runtime_error(std::string("unknown escape at ") + src);
175
- }
176
- } else if (*src) {
177
- return decode_utf8(src);
178
- }
179
- throw std::runtime_error("unexpected end of input");
180
- }
181
-
182
- static void print_grammar_char(FILE * file, uint32_t c) {
183
- if (0x20 <= c && c <= 0x7f) {
184
- fprintf(file, "%c", static_cast<char>(c));
185
- } else {
186
- // cop out of encoding UTF-8
187
- fprintf(file, "<U+%04X>", c);
188
- }
189
- }
190
-
191
- static bool is_char_element(llama_grammar_element elem) {
192
- switch (elem.type) {
193
- case LLAMA_GRETYPE_CHAR: return true;
194
- case LLAMA_GRETYPE_CHAR_NOT: return true;
195
- case LLAMA_GRETYPE_CHAR_ALT: return true;
196
- case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197
- case LLAMA_GRETYPE_CHAR_ANY: return true;
198
- default: return false;
199
- }
200
- }
201
-
202
- static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203
- for (auto elem : rule) {
204
- switch (elem.type) {
205
- case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
206
- case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
207
- case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
208
- case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
209
- case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
210
- case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
211
- case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
212
- case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
213
- }
214
- switch (elem.type) {
215
- case LLAMA_GRETYPE_END:
216
- case LLAMA_GRETYPE_ALT:
217
- case LLAMA_GRETYPE_RULE_REF:
218
- fprintf(file, "(%u) ", elem.value);
219
- break;
220
- case LLAMA_GRETYPE_CHAR:
221
- case LLAMA_GRETYPE_CHAR_NOT:
222
- case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223
- case LLAMA_GRETYPE_CHAR_ALT:
224
- case LLAMA_GRETYPE_CHAR_ANY:
225
- fprintf(file, "(\"");
226
- print_grammar_char(file, elem.value);
227
- fprintf(file, "\") ");
228
- break;
229
- }
230
- }
231
- fprintf(file, "\n");
232
- }
233
-
234
- static void print_rule(
235
- FILE * file,
236
- uint32_t rule_id,
237
- const llama_grammar_rule & rule,
238
- const std::map<uint32_t, std::string> & symbol_id_names) {
239
- if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
240
- throw std::runtime_error(
241
- "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
242
- }
243
- fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
244
- for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
245
- llama_grammar_element elem = rule[i];
246
- switch (elem.type) {
247
- case LLAMA_GRETYPE_END:
248
- throw std::runtime_error(
249
- "unexpected end of rule: " + std::to_string(rule_id) + "," +
250
- std::to_string(i));
251
- case LLAMA_GRETYPE_ALT:
252
- fprintf(file, "| ");
253
- break;
254
- case LLAMA_GRETYPE_RULE_REF:
255
- fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
256
- break;
257
- case LLAMA_GRETYPE_CHAR:
258
- fprintf(file, "[");
259
- print_grammar_char(file, elem.value);
260
- break;
261
- case LLAMA_GRETYPE_CHAR_NOT:
262
- fprintf(file, "[^");
263
- print_grammar_char(file, elem.value);
264
- break;
265
- case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266
- if (i == 0 || !is_char_element(rule[i - 1])) {
267
- throw std::runtime_error(
268
- "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269
- std::to_string(rule_id) + "," + std::to_string(i));
270
- }
271
- fprintf(file, "-");
272
- print_grammar_char(file, elem.value);
273
- break;
274
- case LLAMA_GRETYPE_CHAR_ALT:
275
- if (i == 0 || !is_char_element(rule[i - 1])) {
276
- throw std::runtime_error(
277
- "LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278
- std::to_string(rule_id) + "," + std::to_string(i));
279
- }
280
- print_grammar_char(file, elem.value);
281
- break;
282
- case LLAMA_GRETYPE_CHAR_ANY:
283
- fprintf(file, ".");
284
- break;
285
- }
286
- if (is_char_element(elem)) {
287
- switch (rule[i + 1].type) {
288
- case LLAMA_GRETYPE_CHAR_ALT:
289
- case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290
- case LLAMA_GRETYPE_CHAR_ANY:
291
- break;
292
- default:
293
- fprintf(file, "] ");
294
- }
295
- }
296
- }
297
- fprintf(file, "\n");
298
- }
299
-
300
- //
301
- // implementation
302
- //
303
-
304
- uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305
- uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306
- auto result = symbol_ids.emplace(std::string(src, len), next_id);
307
- return result.first->second;
308
- }
309
-
310
- uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311
- uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312
- symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
313
- return next_id;
314
- }
315
-
316
- void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317
- if (rules.size() <= rule_id) {
318
- rules.resize(rule_id + 1);
319
- }
320
- rules[rule_id] = rule;
321
- }
322
-
323
- const char * llama_grammar_parser::parse_alternates(
324
- const char * src,
325
- const std::string & rule_name,
326
- uint32_t rule_id,
327
- bool is_nested) {
328
- llama_grammar_rule rule;
329
- const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330
- while (*pos == '|') {
331
- rule.push_back({LLAMA_GRETYPE_ALT, 0});
332
- pos = parse_space(pos + 1, true);
333
- pos = parse_sequence(pos, rule_name, rule, is_nested);
334
- }
335
- rule.push_back({LLAMA_GRETYPE_END, 0});
336
- add_rule(rule_id, rule);
337
- return pos;
338
- }
339
-
340
- const char * llama_grammar_parser::parse_sequence(
341
- const char * src,
342
- const std::string & rule_name,
343
- llama_grammar_rule & rule,
344
- bool is_nested) {
345
- size_t last_sym_start = rule.size();
346
- const char * pos = src;
347
-
348
- auto handle_repetitions = [&](int min_times, int max_times) {
349
-
350
- if (last_sym_start == rule.size()) {
351
- throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
352
- }
353
-
354
- // apply transformation to previous symbol (last_sym_start to end) according to
355
- // the following rewrite rules:
356
- // S{m,n} --> S S S (m times) S'(n-m)
357
- // S'(x) ::= S S'(x-1) |
358
- // (... n-m definitions of these S' rules ...)
359
- // S'(1) ::= S |
360
- // S{m,} --> S S S (m times) S'
361
- // S' ::= S S' |
362
- // S* --> S{0,}
363
- // --> S' ::= S S' |
364
- // S+ --> S{1,}
365
- // --> S S'
366
- // S' ::= S S' |
367
- // S? --> S{0,1}
368
- // --> S'
369
- // S' ::= S |
370
-
371
- llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372
- if (min_times == 0) {
373
- rule.resize(last_sym_start);
374
- } else {
375
- // Repeat the previous elements (min_times - 1) times
376
- for (int i = 1; i < min_times; i++) {
377
- rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
378
- }
379
- }
380
-
381
- uint32_t last_rec_rule_id = 0;
382
- auto n_opt = max_times < 0 ? 1 : max_times - min_times;
383
-
384
- llama_grammar_rule rec_rule(prev_rule);
385
- for (int i = 0; i < n_opt; i++) {
386
- rec_rule.resize(prev_rule.size());
387
- uint32_t rec_rule_id = generate_symbol_id( rule_name);
388
- if (i > 0 || max_times < 0) {
389
- rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
390
- }
391
- rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
392
- rec_rule.push_back({LLAMA_GRETYPE_END, 0});
393
- add_rule( rec_rule_id, rec_rule);
394
- last_rec_rule_id = rec_rule_id;
395
- }
396
- if (n_opt > 0) {
397
- rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
398
- }
399
- };
400
-
401
- while (*pos) {
402
- if (*pos == '"') { // literal string
403
- pos++;
404
- last_sym_start = rule.size();
405
- while (*pos != '"') {
406
- if (!*pos) {
407
- throw std::runtime_error("unexpected end of input");
408
- }
409
- auto char_pair = parse_char(pos);
410
- pos = char_pair.second;
411
- rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
412
- }
413
- pos = parse_space(pos + 1, is_nested);
414
- } else if (*pos == '[') { // char range(s)
415
- pos++;
416
- enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417
- if (*pos == '^') {
418
- pos++;
419
- start_type = LLAMA_GRETYPE_CHAR_NOT;
420
- }
421
- last_sym_start = rule.size();
422
- while (*pos != ']') {
423
- if (!*pos) {
424
- throw std::runtime_error("unexpected end of input");
425
- }
426
- auto char_pair = parse_char(pos);
427
- pos = char_pair.second;
428
- enum llama_gretype type = last_sym_start < rule.size()
429
- ? LLAMA_GRETYPE_CHAR_ALT
430
- : start_type;
431
-
432
- rule.push_back({type, char_pair.first});
433
- if (pos[0] == '-' && pos[1] != ']') {
434
- if (!pos[1]) {
435
- throw std::runtime_error("unexpected end of input");
436
- }
437
- auto endchar_pair = parse_char(pos + 1);
438
- pos = endchar_pair.second;
439
- rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
440
- }
441
- }
442
- pos = parse_space(pos + 1, is_nested);
443
- } else if (is_word_char(*pos)) { // rule reference
444
- const char * name_end = parse_name(pos);
445
- uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
446
- pos = parse_space(name_end, is_nested);
447
- last_sym_start = rule.size();
448
- rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
449
- } else if (*pos == '(') { // grouping
450
- // parse nested alternates into synthesized rule
451
- pos = parse_space(pos + 1, true);
452
- uint32_t sub_rule_id = generate_symbol_id(rule_name);
453
- pos = parse_alternates(pos, rule_name, sub_rule_id, true);
454
- last_sym_start = rule.size();
455
- // output reference to synthesized rule
456
- rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
457
- if (*pos != ')') {
458
- throw std::runtime_error(std::string("expecting ')' at ") + pos);
459
- }
460
- pos = parse_space(pos + 1, is_nested);
461
- } else if (*pos == '.') { // any char
462
- last_sym_start = rule.size();
463
- rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
464
- pos = parse_space(pos + 1, is_nested);
465
- } else if (*pos == '*') {
466
- pos = parse_space(pos + 1, is_nested);
467
- handle_repetitions(0, -1);
468
- } else if (*pos == '+') {
469
- pos = parse_space(pos + 1, is_nested);
470
- handle_repetitions(1, -1);
471
- } else if (*pos == '?') {
472
- pos = parse_space(pos + 1, is_nested);
473
- handle_repetitions(0, 1);
474
- } else if (*pos == '{') {
475
- pos = parse_space(pos + 1, is_nested);
476
-
477
- if (!is_digit_char(*pos)) {
478
- throw std::runtime_error(std::string("expecting an int at ") + pos);
479
- }
480
- const char * int_end = parse_int(pos);
481
- int min_times = std::stoul(std::string(pos, int_end - pos));
482
- pos = parse_space(int_end, is_nested);
483
-
484
- int max_times = -1;
485
-
486
- if (*pos == '}') {
487
- max_times = min_times;
488
- pos = parse_space(pos + 1, is_nested);
489
- } else if (*pos == ',') {
490
- pos = parse_space(pos + 1, is_nested);
491
-
492
- if (is_digit_char(*pos)) {
493
- const char * int_end = parse_int(pos);
494
- max_times = std::stoul(std::string(pos, int_end - pos));
495
- pos = parse_space(int_end, is_nested);
496
- }
497
-
498
- if (*pos != '}') {
499
- throw std::runtime_error(std::string("expecting '}' at ") + pos);
500
- }
501
- pos = parse_space(pos + 1, is_nested);
502
- } else {
503
- throw std::runtime_error(std::string("expecting ',' at ") + pos);
504
- }
505
- handle_repetitions(min_times, max_times);
506
- } else {
507
- break;
508
- }
509
- }
510
- return pos;
511
- }
512
-
513
- const char * llama_grammar_parser::parse_rule(const char * src) {
514
- const char * name_end = parse_name(src);
515
- const char * pos = parse_space(name_end, false);
516
- size_t name_len = name_end - src;
517
- uint32_t rule_id = get_symbol_id(src, name_len);
518
- const std::string name(src, name_len);
519
-
520
- if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
521
- throw std::runtime_error(std::string("expecting ::= at ") + pos);
522
- }
523
- pos = parse_space(pos + 3, true);
524
-
525
- pos = parse_alternates(pos, name, rule_id, false);
526
-
527
- if (*pos == '\r') {
528
- pos += pos[1] == '\n' ? 2 : 1;
529
- } else if (*pos == '\n') {
530
- pos++;
531
- } else if (*pos) {
532
- throw std::runtime_error(std::string("expecting newline or end at ") + pos);
533
- }
534
- return parse_space(pos, true);
535
- }
536
-
537
- bool llama_grammar_parser::parse(const char * src) {
538
- try {
539
- const char * pos = parse_space(src, true);
540
- while (*pos) {
541
- pos = parse_rule(pos);
542
- }
543
- // Validate the state to ensure that all rules are defined
544
- for (const auto & rule : rules) {
545
- if (rule.empty()) {
546
- throw std::runtime_error("Undefined rule");
547
- }
548
- for (const auto & elem : rule) {
549
- if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550
- // Ensure that the rule at that location exists
551
- if (elem.value >= rules.size() || rules[elem.value].empty()) {
552
- // Get the name of the rule that is missing
553
- for (const auto & kv : symbol_ids) {
554
- if (kv.second == elem.value) {
555
- throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
556
- }
557
- }
558
- }
559
- }
560
- }
561
- }
562
- } catch (const std::exception & err) {
563
- fprintf(stderr, "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src);
564
- rules.clear();
565
- return false;
566
- }
567
-
568
- return true;
569
- }
570
-
571
- void llama_grammar_parser::print(FILE * file) {
572
- try {
573
- std::map<uint32_t, std::string> symbol_id_names;
574
- for (const auto & kv : symbol_ids) {
575
- symbol_id_names[kv.second] = kv.first;
576
- }
577
- for (size_t i = 0, end = rules.size(); i < end; i++) {
578
- // fprintf(file, "%zu: ", i);
579
- // print_rule_binary(file, rules[i]);
580
- print_rule(file, uint32_t(i), rules[i], symbol_id_names);
581
- // fprintf(file, "\n");
582
- }
583
- } catch (const std::exception & err) {
584
- fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
585
- }
586
- }
587
-
588
- llama_grammar_stack llama_grammar_parser::c_rules() const {
589
- llama_grammar_stack ret;
590
- ret.reserve(rules.size());
591
- for (const auto & rule : rules) {
592
- ret.push_back(rule.data());
593
- }
594
- return ret;
595
- }
596
-
597
- // returns true iff pos points to the end of one of the definitions of a rule
598
- static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599
- switch (pos->type) {
600
- case LLAMA_GRETYPE_END: return true; // NOLINT
601
- case LLAMA_GRETYPE_ALT: return true; // NOLINT
602
- default: return false;
603
- }
604
- }
605
-
606
- // returns true iff chr satisfies the char range at pos (regular or inverse range)
607
- // asserts that pos is pointing to a char range element
608
- static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609
- const llama_grammar_element * pos,
610
- const uint32_t chr) {
611
- bool found = false;
612
- bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
-
614
- LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
-
616
- do {
617
- if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618
- // inclusive range, e.g. [a-z]
619
- found = found || (pos->value <= chr && chr <= pos[1].value);
620
- pos += 2;
621
- } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622
- // Any character matches "."
623
- found = true;
624
- pos += 1;
625
- } else {
626
- // exact char match, e.g. [a] or "a"
627
- found = found || pos->value == chr;
628
- pos += 1;
629
- }
630
- } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
-
632
- return std::make_pair(found == is_positive_char, pos);
633
- }
634
-
635
- // returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636
- // range at pos (regular or inverse range)
637
- // asserts that pos is pointing to a char range element
638
- static bool llama_grammar_match_partial_char(
639
- const llama_grammar_element * pos,
640
- const llama_partial_utf8 partial_utf8) {
641
- bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
642
- LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
-
644
- uint32_t partial_value = partial_utf8.value;
645
- int n_remain = partial_utf8.n_remain;
646
-
647
- // invalid sequence or 7-bit char split across 2 bytes (overlong)
648
- if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
649
- return false;
650
- }
651
-
652
- // range of possible code points this partial UTF-8 sequence could complete to
653
- uint32_t low = partial_value << (n_remain * 6);
654
- uint32_t high = low | ((1 << (n_remain * 6)) - 1);
655
-
656
- if (low == 0) {
657
- if (n_remain == 2) {
658
- low = 1 << 11;
659
- } else if (n_remain == 3) {
660
- low = 1 << 16;
661
- }
662
- }
663
-
664
- do {
665
- if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666
- // inclusive range, e.g. [a-z]
667
- if (pos->value <= high && low <= pos[1].value) {
668
- return is_positive_char;
669
- }
670
- pos += 2;
671
- } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672
- // Any character matches "."
673
- return true;
674
- } else {
675
- // exact char match, e.g. [a] or "a"
676
- if (low <= pos->value && pos->value <= high) {
677
- return is_positive_char;
678
- }
679
- pos += 1;
680
- }
681
- } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
-
683
- return !is_positive_char;
684
- }
685
-
686
- // transforms a grammar pushdown stack into N possible stacks, all ending
687
- // at a character range (terminal element)
688
- static void llama_grammar_advance_stack(
689
- const llama_grammar_rules & rules,
690
- const llama_grammar_stack & stack,
691
- llama_grammar_stacks & new_stacks) {
692
- if (stack.empty()) {
693
- if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
694
- new_stacks.emplace_back(stack);
695
- }
696
- return;
697
- }
698
-
699
- const llama_grammar_element * pos = stack.back();
700
-
701
- switch (pos->type) {
702
- case LLAMA_GRETYPE_RULE_REF: {
703
- const size_t rule_id = static_cast<size_t>(pos->value);
704
- const llama_grammar_element * subpos = rules[rule_id].data();
705
- do {
706
- // init new stack without the top (pos)
707
- llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
708
- if (!llama_grammar_is_end_of_sequence(pos + 1)) {
709
- // if this rule ref is followed by another element, add that to stack
710
- new_stack.push_back(pos + 1);
711
- }
712
- if (!llama_grammar_is_end_of_sequence(subpos)) {
713
- // if alternate is nonempty, add to stack
714
- new_stack.push_back(subpos);
715
- }
716
- llama_grammar_advance_stack(rules, new_stack, new_stacks);
717
- while (!llama_grammar_is_end_of_sequence(subpos)) {
718
- // scan to end of alternate def
719
- subpos++;
720
- }
721
- if (subpos->type == LLAMA_GRETYPE_ALT) {
722
- // there's another alternate def of this rule to process
723
- subpos++;
724
- } else {
725
- break;
726
- }
727
- } while (true);
728
- break;
729
- }
730
- case LLAMA_GRETYPE_CHAR:
731
- case LLAMA_GRETYPE_CHAR_NOT:
732
- case LLAMA_GRETYPE_CHAR_ANY:
733
- if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
734
- // only add the stack if it's not a duplicate of one we already have
735
- new_stacks.emplace_back(stack);
736
- }
737
- break;
738
- default:
739
- // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740
- // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741
- // those
742
- LM_GGML_ABORT("fatal error");
743
- }
744
- }
745
-
746
- static llama_grammar_candidates llama_grammar_reject_candidates(
747
- const llama_grammar_rules & rules,
748
- const llama_grammar_stacks & stacks,
749
- const llama_grammar_candidates & candidates) {
750
- LM_GGML_ASSERT(!stacks.empty()); // REVIEW
751
-
752
- if (candidates.empty()) {
753
- return {};
754
- }
755
-
756
- auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
757
-
758
- for (size_t i = 1, size = stacks.size(); i < size; ++i) {
759
- rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
760
- }
761
-
762
- return rejects;
763
- }
764
-
765
- static bool llama_grammar_detect_left_recursion(
766
- const llama_grammar_rules & rules,
767
- size_t rule_index,
768
- std::vector<bool> * rules_visited,
769
- std::vector<bool> * rules_in_progress,
770
- std::vector<bool> * rules_may_be_empty) {
771
- if ((*rules_in_progress)[rule_index]) {
772
- return true;
773
- }
774
-
775
- (*rules_in_progress)[rule_index] = true;
776
-
777
- const llama_grammar_rule & rule = rules[rule_index];
778
-
779
- // First check if the rule might produce the empty string. This could be done combined with the second
780
- // step but it's more readable as two steps.
781
- bool at_rule_start = true;
782
- for (size_t i = 0; i < rule.size(); i++) {
783
- if (llama_grammar_is_end_of_sequence(&rule[i])) {
784
- if (at_rule_start) {
785
- (*rules_may_be_empty)[rule_index] = true;
786
- break;
787
- }
788
- at_rule_start = true;
789
- } else {
790
- at_rule_start = false;
791
- }
792
- }
793
-
794
- // Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795
- // be empty)
796
- bool recurse_into_nonterminal = true;
797
- for (size_t i = 0; i < rule.size(); i++) {
798
- if (rule[i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799
- if (llama_grammar_detect_left_recursion(rules, (size_t)rule[i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800
- return true;
801
- }
802
- if (!((*rules_may_be_empty)[(size_t)rule[i].value])) {
803
- recurse_into_nonterminal = false;
804
- }
805
- } else if (llama_grammar_is_end_of_sequence(&rule[i])) {
806
- recurse_into_nonterminal = true;
807
- } else {
808
- recurse_into_nonterminal = false;
809
- }
810
- }
811
-
812
- (*rules_in_progress)[rule_index] = false;
813
- (*rules_visited)[rule_index] = true;
814
-
815
- return false;
816
- }
817
-
818
- const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819
- return grammar->rules;
820
- }
821
-
822
- llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823
- return grammar->stacks;
824
- }
825
-
826
- void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827
- llama_grammar_stacks stacks_new;
828
- stacks_new.reserve(grammar->stacks.size());
829
-
830
- for (const auto & stack : grammar->stacks) {
831
- if (stack.empty()) {
832
- continue;
833
- }
834
-
835
- auto match = llama_grammar_match_char(stack.back(), chr);
836
- if (match.first) {
837
- const llama_grammar_element * pos = match.second;
838
-
839
- // update top of stack to next element, if any
840
- llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
841
- if (!llama_grammar_is_end_of_sequence(pos)) {
842
- new_stack.push_back(pos);
843
- }
844
- llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
845
- }
846
- }
847
-
848
- grammar->stacks = std::move(stacks_new);
849
- }
850
-
851
- llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852
- const llama_grammar_rules & rules,
853
- const llama_grammar_stack & stack,
854
- const llama_grammar_candidates & candidates) {
855
-
856
- llama_grammar_candidates rejects;
857
- rejects.reserve(candidates.size());
858
-
859
- if (stack.empty()) {
860
- for (const auto & tok : candidates) {
861
- if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
862
- rejects.push_back(tok);
863
- }
864
- }
865
- return rejects;
866
- }
867
-
868
- const llama_grammar_element * stack_pos = stack.back();
869
-
870
- llama_grammar_candidates next_candidates;
871
- next_candidates.reserve(candidates.size());
872
-
873
- for (const auto & tok : candidates) {
874
- if (*tok.code_points == 0) {
875
- // reached end of full codepoints in token, reject iff it ended in a partial sequence
876
- // that cannot satisfy this position in grammar
877
- if (tok.partial_utf8.n_remain != 0 &&
878
- !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
879
- rejects.push_back(tok);
880
- }
881
- } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
882
- next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
883
- } else {
884
- rejects.push_back(tok);
885
- }
886
- }
887
-
888
- const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
889
-
890
- // update top of stack to next element, if any
891
- llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
892
- if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
893
- stack_after.push_back(stack_pos_after);
894
- }
895
- llama_grammar_stacks next_stacks;
896
- llama_grammar_advance_stack(rules, stack_after, next_stacks);
897
-
898
- auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
899
- for (const auto & tok : next_rejects) {
900
- rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
901
- }
902
-
903
- return rejects;
904
- }
905
-
906
- ////////////////////
907
-
908
- struct llama_grammar * llama_grammar_init_impl(
909
- const struct llama_vocab * vocab,
910
- const llama_grammar_element ** rules,
911
- size_t n_rules,
912
- size_t start_rule_index) {
913
- const llama_grammar_element * pos;
914
-
915
- // copy rule definitions into vectors
916
- llama_grammar_rules vec_rules(n_rules);
917
- for (size_t i = 0; i < n_rules; i++) {
918
- for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919
- vec_rules[i].push_back(*pos);
920
- }
921
- vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
922
- }
923
-
924
- // Check for left recursion
925
- std::vector<bool> rules_visited(n_rules);
926
- std::vector<bool> rules_in_progress(n_rules);
927
- std::vector<bool> rules_may_be_empty(n_rules);
928
- for (size_t i = 0; i < n_rules; i++) {
929
- if (rules_visited[i]) {
930
- continue;
931
- }
932
- if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
933
- LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934
- return nullptr;
935
- }
936
- }
937
-
938
- // loop over alternates of start rule to build initial stacks
939
- llama_grammar_stacks stacks;
940
- pos = vec_rules[start_rule_index].data();
941
- do {
942
- llama_grammar_stack stack;
943
- if (!llama_grammar_is_end_of_sequence(pos)) {
944
- // if alternate is nonempty, add to stack
945
- stack.push_back(pos);
946
- }
947
- llama_grammar_advance_stack(vec_rules, stack, stacks);
948
- while (!llama_grammar_is_end_of_sequence(pos)) {
949
- // scan to end of alternate def
950
- pos++;
951
- }
952
- if (pos->type == LLAMA_GRETYPE_ALT) {
953
- // there's another alternate def of this rule to process
954
- pos++;
955
- } else {
956
- break;
957
- }
958
- } while (true);
959
-
960
- // Important: vec_rules has to be moved here, not copied, because stacks contains
961
- // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962
- // then the pointers would be invalidated when the local vec_rules goes out of scope.
963
- return new llama_grammar {
964
- vocab,
965
- std::move(vec_rules),
966
- std::move(stacks),
967
- /* .partial_utf8 = */ {},
968
- /* .lazy =*/ false,
969
- /* .awaiting_trigger = */ false,
970
- /* .trigger_buffer = */ "",
971
- /* .trigger_tokens = */ {},
972
- /* .trigger_words = */ {},
973
- };
974
- }
975
-
976
- struct llama_grammar * llama_grammar_init_impl(
977
- const struct llama_vocab * vocab,
978
- const char * grammar_str,
979
- const char * grammar_root,
980
- bool lazy,
981
- const char ** trigger_words,
982
- size_t num_trigger_words,
983
- const llama_token * trigger_tokens,
984
- size_t num_trigger_tokens) {
985
- llama_grammar_parser parser;
986
-
987
- // if there is a grammar, parse it
988
- if (!parser.parse(grammar_str)) {
989
- return nullptr;
990
- }
991
-
992
- // will be empty (default) if there are parse errors
993
- if (parser.rules.empty()) {
994
- fprintf(stderr, "%s: failed to parse grammar\n", __func__);
995
- return nullptr;
996
- }
997
-
998
- // Ensure that there is a "root" node.
999
- if (parser.symbol_ids.find("root") == parser.symbol_ids.end()) {
1000
- fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
1001
- return nullptr;
1002
- }
1003
-
1004
- std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
1005
-
1006
- const size_t n_rules = grammar_rules.size();
1007
- const size_t start_rule_index = parser.symbol_ids.at(grammar_root);
1008
-
1009
- const llama_grammar_element * pos;
1010
-
1011
- // copy rule definitions into vectors
1012
- llama_grammar_rules vec_rules(n_rules);
1013
- for (size_t i = 0; i < n_rules; i++) {
1014
- for (pos = grammar_rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
1015
- vec_rules[i].push_back(*pos);
1016
- }
1017
- vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
1018
- }
1019
-
1020
- // Check for left recursion
1021
- std::vector<bool> rules_visited(n_rules);
1022
- std::vector<bool> rules_in_progress(n_rules);
1023
- std::vector<bool> rules_may_be_empty(n_rules);
1024
- for (size_t i = 0; i < n_rules; i++) {
1025
- if (rules_visited[i]) {
1026
- continue;
1027
- }
1028
- if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
1029
- LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1030
- return nullptr;
1031
- }
1032
- }
1033
-
1034
- // loop over alternates of start rule to build initial stacks
1035
- llama_grammar_stacks stacks;
1036
- pos = vec_rules[start_rule_index].data();
1037
- do {
1038
- llama_grammar_stack stack;
1039
- if (!llama_grammar_is_end_of_sequence(pos)) {
1040
- // if alternate is nonempty, add to stack
1041
- stack.push_back(pos);
1042
- }
1043
- llama_grammar_advance_stack(vec_rules, stack, stacks);
1044
- while (!llama_grammar_is_end_of_sequence(pos)) {
1045
- // scan to end of alternate def
1046
- pos++;
1047
- }
1048
- if (pos->type == LLAMA_GRETYPE_ALT) {
1049
- // there's another alternate def of this rule to process
1050
- pos++;
1051
- } else {
1052
- break;
1053
- }
1054
- } while (true);
1055
-
1056
- std::vector<llama_token> vec_trigger_tokens;
1057
- std::vector<std::string> vec_trigger_words;
1058
- for (size_t i = 0; i < num_trigger_tokens; i++) {
1059
- LM_GGML_ASSERT(trigger_tokens != nullptr);
1060
- vec_trigger_tokens.push_back(trigger_tokens[i]);
1061
- }
1062
- for (size_t i = 0; i < num_trigger_words; i++) {
1063
- LM_GGML_ASSERT(trigger_words != nullptr);
1064
- vec_trigger_words.push_back(trigger_words[i]);
1065
- }
1066
-
1067
- // Important: vec_rules has to be moved here, not copied, because stacks contains
1068
- // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1069
- // then the pointers would be invalidated when the local vec_rules goes out of scope.
1070
- return new llama_grammar {
1071
- vocab,
1072
- std::move(vec_rules),
1073
- std::move(stacks),
1074
- /* .partial_utf8 = */ {},
1075
- /* .lazy = */ lazy,
1076
- /* .awaiting_trigger = */ lazy,
1077
- /* .trigger_buffer = */ "",
1078
- std::move(vec_trigger_tokens),
1079
- std::move(vec_trigger_words),
1080
- };
1081
- }
1082
-
1083
- void llama_grammar_free_impl(struct llama_grammar * grammar) {
1084
- if (grammar == nullptr) {
1085
- return;
1086
- }
1087
-
1088
- delete grammar;
1089
- }
1090
-
1091
- struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1092
- llama_grammar * result = new llama_grammar {
1093
- grammar.vocab,
1094
- grammar.rules,
1095
- grammar.stacks,
1096
- grammar.partial_utf8,
1097
- grammar.lazy,
1098
- grammar.awaiting_trigger,
1099
- grammar.trigger_buffer,
1100
- grammar.trigger_tokens,
1101
- grammar.trigger_words,
1102
- };
1103
-
1104
- // redirect elements in stacks to point to new rules
1105
- for (size_t is = 0; is < result->stacks.size(); is++) {
1106
- for (size_t ie = 0; ie < result->stacks[is].size(); ie++) {
1107
- for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1108
- for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1109
- if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1110
- result->stacks[is][ie] = &result->rules[ir0][ir1];
1111
- }
1112
- }
1113
- }
1114
- }
1115
- }
1116
-
1117
- return result;
1118
- }
1119
-
1120
- void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1121
- LM_GGML_ASSERT(grammar.vocab != nullptr);
1122
-
1123
- if (grammar.awaiting_trigger) {
1124
- return;
1125
- }
1126
-
1127
- bool allow_eog = false;
1128
- for (const auto & stack : grammar.stacks) {
1129
- if (stack.empty()) {
1130
- allow_eog = true;
1131
- break;
1132
- }
1133
- }
1134
-
1135
- std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1136
- candidates_decoded.reserve(cur_p->size);
1137
-
1138
- llama_grammar_candidates candidates_grammar;
1139
- candidates_grammar.reserve(cur_p->size);
1140
-
1141
- for (size_t i = 0; i < cur_p->size; ++i) {
1142
- const llama_token id = cur_p->data[i].id;
1143
- const std::string & piece = grammar.vocab->token_to_piece(id);
1144
-
1145
- if (grammar.vocab->is_eog(id)) {
1146
- if (!allow_eog) {
1147
- cur_p->data[i].logit = -INFINITY;
1148
- }
1149
- } else if (piece.empty() || piece[0] == 0) {
1150
- cur_p->data[i].logit = -INFINITY;
1151
- } else {
1152
- candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
1153
- candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
1154
- }
1155
- }
1156
-
1157
- const auto rejects = llama_grammar_reject_candidates(grammar.rules, grammar.stacks, candidates_grammar);
1158
- for (const auto & reject : rejects) {
1159
- cur_p->data[reject.index].logit = -INFINITY;
1160
- }
1161
- }
1162
-
1163
- void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1164
- LM_GGML_ASSERT(grammar.vocab != nullptr);
1165
-
1166
- const auto & piece = grammar.vocab->token_to_piece(token);
1167
-
1168
- if (grammar.awaiting_trigger) {
1169
- if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) {
1170
- grammar.awaiting_trigger = false;
1171
- grammar.trigger_buffer.clear();
1172
- llama_grammar_accept_str(grammar, piece);
1173
- LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
1174
- return;
1175
- } else {
1176
- // TODO: consider a smarter incremental substring search algorithm (store last position to search from).
1177
- grammar.trigger_buffer += piece;
1178
- for (const auto & word : grammar.trigger_words) {
1179
- auto pos = grammar.trigger_buffer.find(word);
1180
- if (pos != std::string::npos) {
1181
- grammar.awaiting_trigger = false;
1182
- auto constrained_str = grammar.trigger_buffer.substr(pos);
1183
- grammar.trigger_buffer.clear();
1184
- llama_grammar_accept_str(grammar, constrained_str);
1185
- LLAMA_LOG_DEBUG("Grammar triggered on word `%s`", word.c_str());
1186
- return;
1187
- }
1188
- }
1189
- LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
1190
- return;
1191
- }
1192
- }
1193
-
1194
- if (grammar.vocab->is_eog(token)) {
1195
- for (const auto & stack : grammar.stacks) {
1196
- if (stack.empty()) {
1197
- return;
1198
- }
1199
- }
1200
- LM_GGML_ABORT("fatal error");
1201
- }
1202
-
1203
- llama_grammar_accept_str(grammar, piece);
1204
- }
1205
-
1206
- void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
1207
- // Note terminating 0 in decoded string
1208
- const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1209
- const auto & code_points = decoded.first;
1210
-
1211
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1212
- llama_grammar_accept(&grammar, *it);
1213
- }
1214
-
1215
- grammar.partial_utf8 = decoded.second;
1216
- if (grammar.stacks.empty()) {
1217
- throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
1218
- }
1219
- }
1
+ #include "llama-grammar.h"
2
+
3
+ #include "llama-impl.h"
4
+ #include "llama-vocab.h"
5
+ #include "llama-sampling.h"
6
+
7
+ #include <cmath>
8
+ #include <algorithm>
9
+ #include <stdexcept>
10
+
11
+ //
12
+ // helpers
13
+ //
14
+
15
+ // NOTE: assumes valid utf8 (but checks for overrun)
16
+ static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
17
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
18
+ uint8_t first_byte = static_cast<uint8_t>(*src);
19
+ uint8_t highbits = first_byte >> 4;
20
+ int len = lookup[highbits];
21
+ uint8_t mask = (1 << (8 - len)) - 1;
22
+ uint32_t value = first_byte & mask;
23
+ const char * end = src + len; // may overrun!
24
+ const char * pos = src + 1;
25
+ for ( ; pos < end && *pos; pos++) {
26
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
27
+ }
28
+ return std::make_pair(value, pos);
29
+ }
30
+
31
+ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32
+ const std::string & src,
33
+ llama_partial_utf8 partial_start) {
34
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
35
+ const char * pos = src.c_str();
36
+ std::vector<uint32_t> code_points;
37
+
38
+ // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39
+ code_points.reserve(src.size() + 1);
40
+ uint32_t value = partial_start.value;
41
+ int n_remain = partial_start.n_remain;
42
+
43
+ // continue previous decode, if applicable
44
+ while (*pos != 0 && n_remain > 0) {
45
+ uint8_t next_byte = static_cast<uint8_t>(*pos);
46
+ if ((next_byte >> 6) != 2) {
47
+ // invalid sequence, abort
48
+ code_points.push_back(0);
49
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
50
+ }
51
+ value = (value << 6) + (next_byte & 0x3F);
52
+ ++pos;
53
+ --n_remain;
54
+ }
55
+
56
+ if (partial_start.n_remain > 0 && n_remain == 0) {
57
+ code_points.push_back(value);
58
+ }
59
+
60
+ // decode any subsequent utf-8 sequences, which may end in an incomplete one
61
+ while (*pos != 0) {
62
+ uint8_t first_byte = static_cast<uint8_t>(*pos);
63
+ uint8_t highbits = first_byte >> 4;
64
+ n_remain = lookup[highbits] - 1;
65
+
66
+ if (n_remain < 0) {
67
+ // invalid sequence, abort
68
+ code_points.clear();
69
+ code_points.push_back(0);
70
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
71
+ }
72
+
73
+ uint8_t mask = (1 << (7 - n_remain)) - 1;
74
+ value = first_byte & mask;
75
+
76
+ ++pos;
77
+ while (*pos != 0 && n_remain > 0) {
78
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
79
+ ++pos;
80
+ --n_remain;
81
+ }
82
+ if (n_remain == 0) {
83
+ code_points.push_back(value);
84
+ }
85
+ }
86
+ code_points.push_back(0);
87
+
88
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
89
+ }
90
+
91
+ static bool is_digit_char(char c) {
92
+ return '0' <= c && c <= '9';
93
+ }
94
+
95
+ static bool is_word_char(char c) {
96
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
97
+ }
98
+
99
+ static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
100
+ const char * pos = src;
101
+ const char * end = src + size;
102
+ uint32_t value = 0;
103
+ for ( ; pos < end && *pos; pos++) {
104
+ value <<= 4;
105
+ char c = *pos;
106
+ if ('a' <= c && c <= 'f') {
107
+ value += c - 'a' + 10;
108
+ } else if ('A' <= c && c <= 'F') {
109
+ value += c - 'A' + 10;
110
+ } else if ('0' <= c && c <= '9') {
111
+ value += c - '0';
112
+ } else {
113
+ break;
114
+ }
115
+ }
116
+ if (pos != end) {
117
+ throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
118
+ }
119
+ return std::make_pair(value, pos);
120
+ }
121
+
122
+ static const char * parse_space(const char * src, bool newline_ok) {
123
+ const char * pos = src;
124
+ while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
125
+ (newline_ok && (*pos == '\r' || *pos == '\n'))) {
126
+ if (*pos == '#') {
127
+ while (*pos && *pos != '\r' && *pos != '\n') {
128
+ pos++;
129
+ }
130
+ } else {
131
+ pos++;
132
+ }
133
+ }
134
+ return pos;
135
+ }
136
+
137
+ static const char * parse_name(const char * src) {
138
+ const char * pos = src;
139
+ while (is_word_char(*pos)) {
140
+ pos++;
141
+ }
142
+ if (pos == src) {
143
+ throw std::runtime_error(std::string("expecting name at ") + src);
144
+ }
145
+ return pos;
146
+ }
147
+
148
+ static const char * parse_int(const char * src) {
149
+ const char * pos = src;
150
+ while (is_digit_char(*pos)) {
151
+ pos++;
152
+ }
153
+ if (pos == src) {
154
+ throw std::runtime_error(std::string("expecting integer at ") + src);
155
+ }
156
+ return pos;
157
+ }
158
+
159
+ static std::pair<uint32_t, const char *> parse_char(const char * src) {
160
+ if (*src == '\\') {
161
+ switch (src[1]) {
162
+ case 'x': return parse_hex(src + 2, 2);
163
+ case 'u': return parse_hex(src + 2, 4);
164
+ case 'U': return parse_hex(src + 2, 8);
165
+ case 't': return std::make_pair('\t', src + 2);
166
+ case 'r': return std::make_pair('\r', src + 2);
167
+ case 'n': return std::make_pair('\n', src + 2);
168
+ case '\\':
169
+ case '"':
170
+ case '[':
171
+ case ']':
172
+ return std::make_pair(src[1], src + 2);
173
+ default:
174
+ throw std::runtime_error(std::string("unknown escape at ") + src);
175
+ }
176
+ } else if (*src) {
177
+ return decode_utf8(src);
178
+ }
179
+ throw std::runtime_error("unexpected end of input");
180
+ }
181
+
182
+ static void print_grammar_char(FILE * file, uint32_t c) {
183
+ if (0x20 <= c && c <= 0x7f) {
184
+ fprintf(file, "%c", static_cast<char>(c));
185
+ } else {
186
+ // cop out of encoding UTF-8
187
+ fprintf(file, "<U+%04X>", c);
188
+ }
189
+ }
190
+
191
+ static bool is_char_element(llama_grammar_element elem) {
192
+ switch (elem.type) {
193
+ case LLAMA_GRETYPE_CHAR: return true;
194
+ case LLAMA_GRETYPE_CHAR_NOT: return true;
195
+ case LLAMA_GRETYPE_CHAR_ALT: return true;
196
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197
+ case LLAMA_GRETYPE_CHAR_ANY: return true;
198
+ default: return false;
199
+ }
200
+ }
201
+
202
+ static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203
+ for (auto elem : rule) {
204
+ switch (elem.type) {
205
+ case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
206
+ case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
207
+ case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
208
+ case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
209
+ case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
210
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
211
+ case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
212
+ case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
213
+ }
214
+ switch (elem.type) {
215
+ case LLAMA_GRETYPE_END:
216
+ case LLAMA_GRETYPE_ALT:
217
+ case LLAMA_GRETYPE_RULE_REF:
218
+ fprintf(file, "(%u) ", elem.value);
219
+ break;
220
+ case LLAMA_GRETYPE_CHAR:
221
+ case LLAMA_GRETYPE_CHAR_NOT:
222
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223
+ case LLAMA_GRETYPE_CHAR_ALT:
224
+ case LLAMA_GRETYPE_CHAR_ANY:
225
+ fprintf(file, "(\"");
226
+ print_grammar_char(file, elem.value);
227
+ fprintf(file, "\") ");
228
+ break;
229
+ }
230
+ }
231
+ fprintf(file, "\n");
232
+ }
233
+
234
+ static void print_rule(
235
+ FILE * file,
236
+ uint32_t rule_id,
237
+ const llama_grammar_rule & rule,
238
+ const std::map<uint32_t, std::string> & symbol_id_names) {
239
+ if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
240
+ throw std::runtime_error(
241
+ "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
242
+ }
243
+ fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
244
+ for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
245
+ llama_grammar_element elem = rule[i];
246
+ switch (elem.type) {
247
+ case LLAMA_GRETYPE_END:
248
+ throw std::runtime_error(
249
+ "unexpected end of rule: " + std::to_string(rule_id) + "," +
250
+ std::to_string(i));
251
+ case LLAMA_GRETYPE_ALT:
252
+ fprintf(file, "| ");
253
+ break;
254
+ case LLAMA_GRETYPE_RULE_REF:
255
+ fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
256
+ break;
257
+ case LLAMA_GRETYPE_CHAR:
258
+ fprintf(file, "[");
259
+ print_grammar_char(file, elem.value);
260
+ break;
261
+ case LLAMA_GRETYPE_CHAR_NOT:
262
+ fprintf(file, "[^");
263
+ print_grammar_char(file, elem.value);
264
+ break;
265
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266
+ if (i == 0 || !is_char_element(rule[i - 1])) {
267
+ throw std::runtime_error(
268
+ "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269
+ std::to_string(rule_id) + "," + std::to_string(i));
270
+ }
271
+ fprintf(file, "-");
272
+ print_grammar_char(file, elem.value);
273
+ break;
274
+ case LLAMA_GRETYPE_CHAR_ALT:
275
+ if (i == 0 || !is_char_element(rule[i - 1])) {
276
+ throw std::runtime_error(
277
+ "LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278
+ std::to_string(rule_id) + "," + std::to_string(i));
279
+ }
280
+ print_grammar_char(file, elem.value);
281
+ break;
282
+ case LLAMA_GRETYPE_CHAR_ANY:
283
+ fprintf(file, ".");
284
+ break;
285
+ }
286
+ if (is_char_element(elem)) {
287
+ switch (rule[i + 1].type) {
288
+ case LLAMA_GRETYPE_CHAR_ALT:
289
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290
+ case LLAMA_GRETYPE_CHAR_ANY:
291
+ break;
292
+ default:
293
+ fprintf(file, "] ");
294
+ }
295
+ }
296
+ }
297
+ fprintf(file, "\n");
298
+ }
299
+
300
+ //
301
+ // implementation
302
+ //
303
+
304
+ uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306
+ auto result = symbol_ids.emplace(std::string(src, len), next_id);
307
+ return result.first->second;
308
+ }
309
+
310
+ uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312
+ symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
313
+ return next_id;
314
+ }
315
+
316
+ void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317
+ if (rules.size() <= rule_id) {
318
+ rules.resize(rule_id + 1);
319
+ }
320
+ rules[rule_id] = rule;
321
+ }
322
+
323
+ const char * llama_grammar_parser::parse_alternates(
324
+ const char * src,
325
+ const std::string & rule_name,
326
+ uint32_t rule_id,
327
+ bool is_nested) {
328
+ llama_grammar_rule rule;
329
+ const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330
+ while (*pos == '|') {
331
+ rule.push_back({LLAMA_GRETYPE_ALT, 0});
332
+ pos = parse_space(pos + 1, true);
333
+ pos = parse_sequence(pos, rule_name, rule, is_nested);
334
+ }
335
+ rule.push_back({LLAMA_GRETYPE_END, 0});
336
+ add_rule(rule_id, rule);
337
+ return pos;
338
+ }
339
+
340
+ const char * llama_grammar_parser::parse_sequence(
341
+ const char * src,
342
+ const std::string & rule_name,
343
+ llama_grammar_rule & rule,
344
+ bool is_nested) {
345
+ size_t last_sym_start = rule.size();
346
+ const char * pos = src;
347
+
348
+ auto handle_repetitions = [&](int min_times, int max_times) {
349
+
350
+ if (last_sym_start == rule.size()) {
351
+ throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
352
+ }
353
+
354
+ // apply transformation to previous symbol (last_sym_start to end) according to
355
+ // the following rewrite rules:
356
+ // S{m,n} --> S S S (m times) S'(n-m)
357
+ // S'(x) ::= S S'(x-1) |
358
+ // (... n-m definitions of these S' rules ...)
359
+ // S'(1) ::= S |
360
+ // S{m,} --> S S S (m times) S'
361
+ // S' ::= S S' |
362
+ // S* --> S{0,}
363
+ // --> S' ::= S S' |
364
+ // S+ --> S{1,}
365
+ // --> S S'
366
+ // S' ::= S S' |
367
+ // S? --> S{0,1}
368
+ // --> S'
369
+ // S' ::= S |
370
+
371
+ llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372
+ if (min_times == 0) {
373
+ rule.resize(last_sym_start);
374
+ } else {
375
+ // Repeat the previous elements (min_times - 1) times
376
+ for (int i = 1; i < min_times; i++) {
377
+ rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
378
+ }
379
+ }
380
+
381
+ uint32_t last_rec_rule_id = 0;
382
+ auto n_opt = max_times < 0 ? 1 : max_times - min_times;
383
+
384
+ llama_grammar_rule rec_rule(prev_rule);
385
+ for (int i = 0; i < n_opt; i++) {
386
+ rec_rule.resize(prev_rule.size());
387
+ uint32_t rec_rule_id = generate_symbol_id( rule_name);
388
+ if (i > 0 || max_times < 0) {
389
+ rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
390
+ }
391
+ rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
392
+ rec_rule.push_back({LLAMA_GRETYPE_END, 0});
393
+ add_rule( rec_rule_id, rec_rule);
394
+ last_rec_rule_id = rec_rule_id;
395
+ }
396
+ if (n_opt > 0) {
397
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
398
+ }
399
+ };
400
+
401
+ while (*pos) {
402
+ if (*pos == '"') { // literal string
403
+ pos++;
404
+ last_sym_start = rule.size();
405
+ while (*pos != '"') {
406
+ if (!*pos) {
407
+ throw std::runtime_error("unexpected end of input");
408
+ }
409
+ auto char_pair = parse_char(pos);
410
+ pos = char_pair.second;
411
+ rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
412
+ }
413
+ pos = parse_space(pos + 1, is_nested);
414
+ } else if (*pos == '[') { // char range(s)
415
+ pos++;
416
+ enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417
+ if (*pos == '^') {
418
+ pos++;
419
+ start_type = LLAMA_GRETYPE_CHAR_NOT;
420
+ }
421
+ last_sym_start = rule.size();
422
+ while (*pos != ']') {
423
+ if (!*pos) {
424
+ throw std::runtime_error("unexpected end of input");
425
+ }
426
+ auto char_pair = parse_char(pos);
427
+ pos = char_pair.second;
428
+ enum llama_gretype type = last_sym_start < rule.size()
429
+ ? LLAMA_GRETYPE_CHAR_ALT
430
+ : start_type;
431
+
432
+ rule.push_back({type, char_pair.first});
433
+ if (pos[0] == '-' && pos[1] != ']') {
434
+ if (!pos[1]) {
435
+ throw std::runtime_error("unexpected end of input");
436
+ }
437
+ auto endchar_pair = parse_char(pos + 1);
438
+ pos = endchar_pair.second;
439
+ rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
440
+ }
441
+ }
442
+ pos = parse_space(pos + 1, is_nested);
443
+ } else if (is_word_char(*pos)) { // rule reference
444
+ const char * name_end = parse_name(pos);
445
+ uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
446
+ pos = parse_space(name_end, is_nested);
447
+ last_sym_start = rule.size();
448
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
449
+ } else if (*pos == '(') { // grouping
450
+ // parse nested alternates into synthesized rule
451
+ pos = parse_space(pos + 1, true);
452
+ uint32_t sub_rule_id = generate_symbol_id(rule_name);
453
+ pos = parse_alternates(pos, rule_name, sub_rule_id, true);
454
+ last_sym_start = rule.size();
455
+ // output reference to synthesized rule
456
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
457
+ if (*pos != ')') {
458
+ throw std::runtime_error(std::string("expecting ')' at ") + pos);
459
+ }
460
+ pos = parse_space(pos + 1, is_nested);
461
+ } else if (*pos == '.') { // any char
462
+ last_sym_start = rule.size();
463
+ rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
464
+ pos = parse_space(pos + 1, is_nested);
465
+ } else if (*pos == '*') {
466
+ pos = parse_space(pos + 1, is_nested);
467
+ handle_repetitions(0, -1);
468
+ } else if (*pos == '+') {
469
+ pos = parse_space(pos + 1, is_nested);
470
+ handle_repetitions(1, -1);
471
+ } else if (*pos == '?') {
472
+ pos = parse_space(pos + 1, is_nested);
473
+ handle_repetitions(0, 1);
474
+ } else if (*pos == '{') {
475
+ pos = parse_space(pos + 1, is_nested);
476
+
477
+ if (!is_digit_char(*pos)) {
478
+ throw std::runtime_error(std::string("expecting an int at ") + pos);
479
+ }
480
+ const char * int_end = parse_int(pos);
481
+ int min_times = std::stoul(std::string(pos, int_end - pos));
482
+ pos = parse_space(int_end, is_nested);
483
+
484
+ int max_times = -1;
485
+
486
+ if (*pos == '}') {
487
+ max_times = min_times;
488
+ pos = parse_space(pos + 1, is_nested);
489
+ } else if (*pos == ',') {
490
+ pos = parse_space(pos + 1, is_nested);
491
+
492
+ if (is_digit_char(*pos)) {
493
+ const char * int_end = parse_int(pos);
494
+ max_times = std::stoul(std::string(pos, int_end - pos));
495
+ pos = parse_space(int_end, is_nested);
496
+ }
497
+
498
+ if (*pos != '}') {
499
+ throw std::runtime_error(std::string("expecting '}' at ") + pos);
500
+ }
501
+ pos = parse_space(pos + 1, is_nested);
502
+ } else {
503
+ throw std::runtime_error(std::string("expecting ',' at ") + pos);
504
+ }
505
+ handle_repetitions(min_times, max_times);
506
+ } else {
507
+ break;
508
+ }
509
+ }
510
+ return pos;
511
+ }
512
+
513
+ const char * llama_grammar_parser::parse_rule(const char * src) {
514
+ const char * name_end = parse_name(src);
515
+ const char * pos = parse_space(name_end, false);
516
+ size_t name_len = name_end - src;
517
+ uint32_t rule_id = get_symbol_id(src, name_len);
518
+ const std::string name(src, name_len);
519
+
520
+ if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
521
+ throw std::runtime_error(std::string("expecting ::= at ") + pos);
522
+ }
523
+ pos = parse_space(pos + 3, true);
524
+
525
+ pos = parse_alternates(pos, name, rule_id, false);
526
+
527
+ if (*pos == '\r') {
528
+ pos += pos[1] == '\n' ? 2 : 1;
529
+ } else if (*pos == '\n') {
530
+ pos++;
531
+ } else if (*pos) {
532
+ throw std::runtime_error(std::string("expecting newline or end at ") + pos);
533
+ }
534
+ return parse_space(pos, true);
535
+ }
536
+
537
+ bool llama_grammar_parser::parse(const char * src) {
538
+ try {
539
+ const char * pos = parse_space(src, true);
540
+ while (*pos) {
541
+ pos = parse_rule(pos);
542
+ }
543
+ // Validate the state to ensure that all rules are defined
544
+ for (const auto & rule : rules) {
545
+ if (rule.empty()) {
546
+ throw std::runtime_error("Undefined rule");
547
+ }
548
+ for (const auto & elem : rule) {
549
+ if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550
+ // Ensure that the rule at that location exists
551
+ if (elem.value >= rules.size() || rules[elem.value].empty()) {
552
+ // Get the name of the rule that is missing
553
+ for (const auto & kv : symbol_ids) {
554
+ if (kv.second == elem.value) {
555
+ throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
556
+ }
557
+ }
558
+ }
559
+ }
560
+ }
561
+ }
562
+ } catch (const std::exception & err) {
563
+ fprintf(stderr, "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src);
564
+ rules.clear();
565
+ return false;
566
+ }
567
+
568
+ return true;
569
+ }
570
+
571
+ void llama_grammar_parser::print(FILE * file) {
572
+ try {
573
+ std::map<uint32_t, std::string> symbol_id_names;
574
+ for (const auto & kv : symbol_ids) {
575
+ symbol_id_names[kv.second] = kv.first;
576
+ }
577
+ for (size_t i = 0, end = rules.size(); i < end; i++) {
578
+ // fprintf(file, "%zu: ", i);
579
+ // print_rule_binary(file, rules[i]);
580
+ print_rule(file, uint32_t(i), rules[i], symbol_id_names);
581
+ // fprintf(file, "\n");
582
+ }
583
+ } catch (const std::exception & err) {
584
+ fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
585
+ }
586
+ }
587
+
588
+ llama_grammar_stack llama_grammar_parser::c_rules() const {
589
+ llama_grammar_stack ret;
590
+ ret.reserve(rules.size());
591
+ for (const auto & rule : rules) {
592
+ ret.push_back(rule.data());
593
+ }
594
+ return ret;
595
+ }
596
+
597
+ // returns true iff pos points to the end of one of the definitions of a rule
598
+ static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599
+ switch (pos->type) {
600
+ case LLAMA_GRETYPE_END: return true; // NOLINT
601
+ case LLAMA_GRETYPE_ALT: return true; // NOLINT
602
+ default: return false;
603
+ }
604
+ }
605
+
606
+ // returns true iff chr satisfies the char range at pos (regular or inverse range)
607
+ // asserts that pos is pointing to a char range element
608
+ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609
+ const llama_grammar_element * pos,
610
+ const uint32_t chr) {
611
+ bool found = false;
612
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
+
614
+ LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
+
616
+ do {
617
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618
+ // inclusive range, e.g. [a-z]
619
+ found = found || (pos->value <= chr && chr <= pos[1].value);
620
+ pos += 2;
621
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622
+ // Any character matches "."
623
+ found = true;
624
+ pos += 1;
625
+ } else {
626
+ // exact char match, e.g. [a] or "a"
627
+ found = found || pos->value == chr;
628
+ pos += 1;
629
+ }
630
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
+
632
+ return std::make_pair(found == is_positive_char, pos);
633
+ }
634
+
635
+ // returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636
+ // range at pos (regular or inverse range)
637
+ // asserts that pos is pointing to a char range element
638
+ static bool llama_grammar_match_partial_char(
639
+ const llama_grammar_element * pos,
640
+ const llama_partial_utf8 partial_utf8) {
641
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
642
+ LM_GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
+
644
+ uint32_t partial_value = partial_utf8.value;
645
+ int n_remain = partial_utf8.n_remain;
646
+
647
+ // invalid sequence or 7-bit char split across 2 bytes (overlong)
648
+ if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
649
+ return false;
650
+ }
651
+
652
+ // range of possible code points this partial UTF-8 sequence could complete to
653
+ uint32_t low = partial_value << (n_remain * 6);
654
+ uint32_t high = low | ((1 << (n_remain * 6)) - 1);
655
+
656
+ if (low == 0) {
657
+ if (n_remain == 2) {
658
+ low = 1 << 11;
659
+ } else if (n_remain == 3) {
660
+ low = 1 << 16;
661
+ }
662
+ }
663
+
664
+ do {
665
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666
+ // inclusive range, e.g. [a-z]
667
+ if (pos->value <= high && low <= pos[1].value) {
668
+ return is_positive_char;
669
+ }
670
+ pos += 2;
671
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672
+ // Any character matches "."
673
+ return true;
674
+ } else {
675
+ // exact char match, e.g. [a] or "a"
676
+ if (low <= pos->value && pos->value <= high) {
677
+ return is_positive_char;
678
+ }
679
+ pos += 1;
680
+ }
681
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
+
683
+ return !is_positive_char;
684
+ }
685
+
686
+ // transforms a grammar pushdown stack into N possible stacks, all ending
687
+ // at a character range (terminal element)
688
+ static void llama_grammar_advance_stack(
689
+ const llama_grammar_rules & rules,
690
+ const llama_grammar_stack & stack,
691
+ llama_grammar_stacks & new_stacks) {
692
+ if (stack.empty()) {
693
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
694
+ new_stacks.emplace_back(stack);
695
+ }
696
+ return;
697
+ }
698
+
699
+ const llama_grammar_element * pos = stack.back();
700
+
701
+ switch (pos->type) {
702
+ case LLAMA_GRETYPE_RULE_REF: {
703
+ const size_t rule_id = static_cast<size_t>(pos->value);
704
+ const llama_grammar_element * subpos = rules[rule_id].data();
705
+ do {
706
+ // init new stack without the top (pos)
707
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
708
+ if (!llama_grammar_is_end_of_sequence(pos + 1)) {
709
+ // if this rule ref is followed by another element, add that to stack
710
+ new_stack.push_back(pos + 1);
711
+ }
712
+ if (!llama_grammar_is_end_of_sequence(subpos)) {
713
+ // if alternate is nonempty, add to stack
714
+ new_stack.push_back(subpos);
715
+ }
716
+ llama_grammar_advance_stack(rules, new_stack, new_stacks);
717
+ while (!llama_grammar_is_end_of_sequence(subpos)) {
718
+ // scan to end of alternate def
719
+ subpos++;
720
+ }
721
+ if (subpos->type == LLAMA_GRETYPE_ALT) {
722
+ // there's another alternate def of this rule to process
723
+ subpos++;
724
+ } else {
725
+ break;
726
+ }
727
+ } while (true);
728
+ break;
729
+ }
730
+ case LLAMA_GRETYPE_CHAR:
731
+ case LLAMA_GRETYPE_CHAR_NOT:
732
+ case LLAMA_GRETYPE_CHAR_ANY:
733
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
734
+ // only add the stack if it's not a duplicate of one we already have
735
+ new_stacks.emplace_back(stack);
736
+ }
737
+ break;
738
+ default:
739
+ // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740
+ // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741
+ // those
742
+ LM_GGML_ABORT("fatal error");
743
+ }
744
+ }
745
+
746
+ static llama_grammar_candidates llama_grammar_reject_candidates(
747
+ const llama_grammar_rules & rules,
748
+ const llama_grammar_stacks & stacks,
749
+ const llama_grammar_candidates & candidates) {
750
+ LM_GGML_ASSERT(!stacks.empty()); // REVIEW
751
+
752
+ if (candidates.empty()) {
753
+ return {};
754
+ }
755
+
756
+ auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
757
+
758
+ for (size_t i = 1, size = stacks.size(); i < size; ++i) {
759
+ rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
760
+ }
761
+
762
+ return rejects;
763
+ }
764
+
765
+ static bool llama_grammar_detect_left_recursion(
766
+ const llama_grammar_rules & rules,
767
+ size_t rule_index,
768
+ std::vector<bool> * rules_visited,
769
+ std::vector<bool> * rules_in_progress,
770
+ std::vector<bool> * rules_may_be_empty) {
771
+ if ((*rules_in_progress)[rule_index]) {
772
+ return true;
773
+ }
774
+
775
+ (*rules_in_progress)[rule_index] = true;
776
+
777
+ const llama_grammar_rule & rule = rules[rule_index];
778
+
779
+ // First check if the rule might produce the empty string. This could be done combined with the second
780
+ // step but it's more readable as two steps.
781
+ bool at_rule_start = true;
782
+ for (size_t i = 0; i < rule.size(); i++) {
783
+ if (llama_grammar_is_end_of_sequence(&rule[i])) {
784
+ if (at_rule_start) {
785
+ (*rules_may_be_empty)[rule_index] = true;
786
+ break;
787
+ }
788
+ at_rule_start = true;
789
+ } else {
790
+ at_rule_start = false;
791
+ }
792
+ }
793
+
794
+ // Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795
+ // be empty)
796
+ bool recurse_into_nonterminal = true;
797
+ for (size_t i = 0; i < rule.size(); i++) {
798
+ if (rule[i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799
+ if (llama_grammar_detect_left_recursion(rules, (size_t)rule[i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800
+ return true;
801
+ }
802
+ if (!((*rules_may_be_empty)[(size_t)rule[i].value])) {
803
+ recurse_into_nonterminal = false;
804
+ }
805
+ } else if (llama_grammar_is_end_of_sequence(&rule[i])) {
806
+ recurse_into_nonterminal = true;
807
+ } else {
808
+ recurse_into_nonterminal = false;
809
+ }
810
+ }
811
+
812
+ (*rules_in_progress)[rule_index] = false;
813
+ (*rules_visited)[rule_index] = true;
814
+
815
+ return false;
816
+ }
817
+
818
+ const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819
+ return grammar->rules;
820
+ }
821
+
822
+ llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823
+ return grammar->stacks;
824
+ }
825
+
826
+ void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827
+ llama_grammar_stacks stacks_new;
828
+ stacks_new.reserve(grammar->stacks.size());
829
+
830
+ for (const auto & stack : grammar->stacks) {
831
+ if (stack.empty()) {
832
+ continue;
833
+ }
834
+
835
+ auto match = llama_grammar_match_char(stack.back(), chr);
836
+ if (match.first) {
837
+ const llama_grammar_element * pos = match.second;
838
+
839
+ // update top of stack to next element, if any
840
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
841
+ if (!llama_grammar_is_end_of_sequence(pos)) {
842
+ new_stack.push_back(pos);
843
+ }
844
+ llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
845
+ }
846
+ }
847
+
848
+ grammar->stacks = std::move(stacks_new);
849
+ }
850
+
851
+ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852
+ const llama_grammar_rules & rules,
853
+ const llama_grammar_stack & stack,
854
+ const llama_grammar_candidates & candidates) {
855
+
856
+ llama_grammar_candidates rejects;
857
+ rejects.reserve(candidates.size());
858
+
859
+ if (stack.empty()) {
860
+ for (const auto & tok : candidates) {
861
+ if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
862
+ rejects.push_back(tok);
863
+ }
864
+ }
865
+ return rejects;
866
+ }
867
+
868
+ const llama_grammar_element * stack_pos = stack.back();
869
+
870
+ llama_grammar_candidates next_candidates;
871
+ next_candidates.reserve(candidates.size());
872
+
873
+ for (const auto & tok : candidates) {
874
+ if (*tok.code_points == 0) {
875
+ // reached end of full codepoints in token, reject iff it ended in a partial sequence
876
+ // that cannot satisfy this position in grammar
877
+ if (tok.partial_utf8.n_remain != 0 &&
878
+ !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
879
+ rejects.push_back(tok);
880
+ }
881
+ } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
882
+ next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
883
+ } else {
884
+ rejects.push_back(tok);
885
+ }
886
+ }
887
+
888
+ const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
889
+
890
+ // update top of stack to next element, if any
891
+ llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
892
+ if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
893
+ stack_after.push_back(stack_pos_after);
894
+ }
895
+ llama_grammar_stacks next_stacks;
896
+ llama_grammar_advance_stack(rules, stack_after, next_stacks);
897
+
898
+ auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
899
+ for (const auto & tok : next_rejects) {
900
+ rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
901
+ }
902
+
903
+ return rejects;
904
+ }
905
+
906
+ ////////////////////
907
+
908
+ struct llama_grammar * llama_grammar_init_impl(
909
+ const struct llama_vocab * vocab,
910
+ const llama_grammar_element ** rules,
911
+ size_t n_rules,
912
+ size_t start_rule_index) {
913
+ const llama_grammar_element * pos;
914
+
915
+ // copy rule definitions into vectors
916
+ llama_grammar_rules vec_rules(n_rules);
917
+ for (size_t i = 0; i < n_rules; i++) {
918
+ for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919
+ vec_rules[i].push_back(*pos);
920
+ }
921
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
922
+ }
923
+
924
+ // Check for left recursion
925
+ std::vector<bool> rules_visited(n_rules);
926
+ std::vector<bool> rules_in_progress(n_rules);
927
+ std::vector<bool> rules_may_be_empty(n_rules);
928
+ for (size_t i = 0; i < n_rules; i++) {
929
+ if (rules_visited[i]) {
930
+ continue;
931
+ }
932
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
933
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934
+ return nullptr;
935
+ }
936
+ }
937
+
938
+ // loop over alternates of start rule to build initial stacks
939
+ llama_grammar_stacks stacks;
940
+ pos = vec_rules[start_rule_index].data();
941
+ do {
942
+ llama_grammar_stack stack;
943
+ if (!llama_grammar_is_end_of_sequence(pos)) {
944
+ // if alternate is nonempty, add to stack
945
+ stack.push_back(pos);
946
+ }
947
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
948
+ while (!llama_grammar_is_end_of_sequence(pos)) {
949
+ // scan to end of alternate def
950
+ pos++;
951
+ }
952
+ if (pos->type == LLAMA_GRETYPE_ALT) {
953
+ // there's another alternate def of this rule to process
954
+ pos++;
955
+ } else {
956
+ break;
957
+ }
958
+ } while (true);
959
+
960
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
961
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
963
+ return new llama_grammar {
964
+ vocab,
965
+ std::move(vec_rules),
966
+ std::move(stacks),
967
+ /* .partial_utf8 = */ {},
968
+ /* .lazy =*/ false,
969
+ /* .awaiting_trigger = */ false,
970
+ /* .trigger_buffer = */ "",
971
+ /* .trigger_tokens = */ {},
972
+ /* .trigger_patterns = */ {},
973
+ };
974
+ }
975
+
976
+ struct llama_grammar * llama_grammar_init_impl(
977
+ const struct llama_vocab * vocab,
978
+ const char * grammar_str,
979
+ const char * grammar_root,
980
+ bool lazy,
981
+ const char ** trigger_patterns,
982
+ size_t num_trigger_patterns,
983
+ const llama_token * trigger_tokens,
984
+ size_t num_trigger_tokens) {
985
+ llama_grammar_parser parser;
986
+
987
+ // if there is a grammar, parse it
988
+ // rules will be empty (default) if there are parse errors
989
+ if (!parser.parse(grammar_str) || parser.rules.empty()) {
990
+ fprintf(stderr, "%s: failed to parse grammar\n", __func__);
991
+ return nullptr;
992
+ }
993
+
994
+ // Ensure that there is a "root" node.
995
+ if (parser.symbol_ids.find("root") == parser.symbol_ids.end()) {
996
+ fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
997
+ return nullptr;
998
+ }
999
+
1000
+ std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
1001
+
1002
+ const size_t n_rules = grammar_rules.size();
1003
+ const size_t start_rule_index = parser.symbol_ids.at(grammar_root);
1004
+
1005
+ const llama_grammar_element * pos;
1006
+
1007
+ // copy rule definitions into vectors
1008
+ llama_grammar_rules vec_rules(n_rules);
1009
+ for (size_t i = 0; i < n_rules; i++) {
1010
+ for (pos = grammar_rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
1011
+ vec_rules[i].push_back(*pos);
1012
+ }
1013
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
1014
+ }
1015
+
1016
+ // Check for left recursion
1017
+ std::vector<bool> rules_visited(n_rules);
1018
+ std::vector<bool> rules_in_progress(n_rules);
1019
+ std::vector<bool> rules_may_be_empty(n_rules);
1020
+ for (size_t i = 0; i < n_rules; i++) {
1021
+ if (rules_visited[i]) {
1022
+ continue;
1023
+ }
1024
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
1025
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1026
+ return nullptr;
1027
+ }
1028
+ }
1029
+
1030
+ // loop over alternates of start rule to build initial stacks
1031
+ llama_grammar_stacks stacks;
1032
+ pos = vec_rules[start_rule_index].data();
1033
+ do {
1034
+ llama_grammar_stack stack;
1035
+ if (!llama_grammar_is_end_of_sequence(pos)) {
1036
+ // if alternate is nonempty, add to stack
1037
+ stack.push_back(pos);
1038
+ }
1039
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
1040
+ while (!llama_grammar_is_end_of_sequence(pos)) {
1041
+ // scan to end of alternate def
1042
+ pos++;
1043
+ }
1044
+ if (pos->type == LLAMA_GRETYPE_ALT) {
1045
+ // there's another alternate def of this rule to process
1046
+ pos++;
1047
+ } else {
1048
+ break;
1049
+ }
1050
+ } while (true);
1051
+
1052
+ std::vector<llama_token> vec_trigger_tokens;
1053
+ std::vector<llama_grammar_trigger_pattern> vec_trigger_patterns;
1054
+ for (size_t i = 0; i < num_trigger_tokens; i++) {
1055
+ LM_GGML_ASSERT(trigger_tokens != nullptr);
1056
+ vec_trigger_tokens.push_back(trigger_tokens[i]);
1057
+ }
1058
+ for (size_t i = 0; i < num_trigger_patterns; i++) {
1059
+ LM_GGML_ASSERT(trigger_patterns != nullptr);
1060
+ auto & trigger = vec_trigger_patterns.emplace_back();
1061
+ trigger.pattern = trigger_patterns[i];
1062
+ trigger.regex = std::regex(trigger.pattern);
1063
+ }
1064
+
1065
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
1066
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1067
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
1068
+ return new llama_grammar {
1069
+ vocab,
1070
+ std::move(vec_rules),
1071
+ std::move(stacks),
1072
+ /* .partial_utf8 = */ {},
1073
+ /* .lazy = */ lazy,
1074
+ /* .awaiting_trigger = */ lazy,
1075
+ /* .trigger_buffer = */ "",
1076
+ std::move(vec_trigger_tokens),
1077
+ std::move(vec_trigger_patterns),
1078
+ };
1079
+ }
1080
+
1081
+ void llama_grammar_free_impl(struct llama_grammar * grammar) {
1082
+ if (grammar == nullptr) {
1083
+ return;
1084
+ }
1085
+
1086
+ delete grammar;
1087
+ }
1088
+
1089
+ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1090
+ auto * result = new llama_grammar {
1091
+ grammar.vocab,
1092
+ grammar.rules,
1093
+ grammar.stacks,
1094
+ grammar.partial_utf8,
1095
+ grammar.lazy,
1096
+ grammar.awaiting_trigger,
1097
+ grammar.trigger_buffer,
1098
+ grammar.trigger_tokens,
1099
+ grammar.trigger_patterns,
1100
+ };
1101
+
1102
+ // redirect elements in stacks to point to new rules
1103
+ for (size_t is = 0; is < result->stacks.size(); is++) {
1104
+ for (size_t ie = 0; ie < result->stacks[is].size(); ie++) {
1105
+ for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1106
+ for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1107
+ if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1108
+ result->stacks[is][ie] = &result->rules[ir0][ir1];
1109
+ }
1110
+ }
1111
+ }
1112
+ }
1113
+ }
1114
+
1115
+ return result;
1116
+ }
1117
+
1118
+ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1119
+ LM_GGML_ASSERT(grammar.vocab != nullptr);
1120
+
1121
+ if (grammar.awaiting_trigger) {
1122
+ return;
1123
+ }
1124
+
1125
+ bool allow_eog = false;
1126
+ for (const auto & stack : grammar.stacks) {
1127
+ if (stack.empty()) {
1128
+ allow_eog = true;
1129
+ break;
1130
+ }
1131
+ }
1132
+
1133
+ std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1134
+ candidates_decoded.reserve(cur_p->size);
1135
+
1136
+ llama_grammar_candidates candidates_grammar;
1137
+ candidates_grammar.reserve(cur_p->size);
1138
+
1139
+ for (size_t i = 0; i < cur_p->size; ++i) {
1140
+ const llama_token id = cur_p->data[i].id;
1141
+ const std::string & piece = grammar.vocab->token_to_piece(id);
1142
+
1143
+ if (grammar.vocab->is_eog(id)) {
1144
+ if (!allow_eog) {
1145
+ cur_p->data[i].logit = -INFINITY;
1146
+ }
1147
+ } else if (piece.empty() || piece[0] == 0) {
1148
+ cur_p->data[i].logit = -INFINITY;
1149
+ } else {
1150
+ candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
1151
+ candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
1152
+ }
1153
+ }
1154
+
1155
+ const auto rejects = llama_grammar_reject_candidates(grammar.rules, grammar.stacks, candidates_grammar);
1156
+ for (const auto & reject : rejects) {
1157
+ cur_p->data[reject.index].logit = -INFINITY;
1158
+ }
1159
+ }
1160
+
1161
+ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1162
+ LM_GGML_ASSERT(grammar.vocab != nullptr);
1163
+
1164
+ const auto & piece = grammar.vocab->token_to_piece(token);
1165
+
1166
+ if (grammar.awaiting_trigger) {
1167
+ if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) {
1168
+ grammar.awaiting_trigger = false;
1169
+ grammar.trigger_buffer.clear();
1170
+ llama_grammar_accept_str(grammar, piece);
1171
+ LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
1172
+ return;
1173
+ } else {
1174
+ grammar.trigger_buffer += piece;
1175
+
1176
+ std::smatch match;
1177
+ for (const auto & trigger_pattern : grammar.trigger_patterns) {
1178
+ if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) {
1179
+ grammar.awaiting_trigger = false;
1180
+ // get from the first match to the end of the string
1181
+ auto constrained_str = grammar.trigger_buffer.substr(match.position(1));
1182
+ // std::string constrained_str(match[1].first, grammar.trigger_buffer.end());
1183
+ grammar.trigger_buffer.clear();
1184
+ llama_grammar_accept_str(grammar, constrained_str);
1185
+ LLAMA_LOG_DEBUG("Grammar triggered on regex: '%s'\n", constrained_str.c_str());
1186
+ return;
1187
+ }
1188
+ }
1189
+ LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
1190
+ return;
1191
+ }
1192
+ }
1193
+
1194
+ if (grammar.vocab->is_eog(token)) {
1195
+ for (const auto & stack : grammar.stacks) {
1196
+ if (stack.empty()) {
1197
+ return;
1198
+ }
1199
+ }
1200
+ LM_GGML_ABORT("fatal error");
1201
+ }
1202
+
1203
+ llama_grammar_accept_str(grammar, piece);
1204
+ }
1205
+
1206
+ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
1207
+ // Note terminating 0 in decoded string
1208
+ const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1209
+ const auto & code_points = decoded.first;
1210
+
1211
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1212
+ llama_grammar_accept(&grammar, *it);
1213
+ }
1214
+
1215
+ grammar.partial_utf8 = decoded.second;
1216
+ if (grammar.stacks.empty()) {
1217
+ throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
1218
+ }
1219
+ }