@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,243 @@
1
+ #ifdef NDEBUG
2
+ #undef NDEBUG
3
+ #endif
4
+
5
+ #define LLAMA_API_INTERNAL
6
+
7
+ #include "ggml.h"
8
+ #include "llama.h"
9
+ #include "grammar-parser.h"
10
+ #include "unicode.h"
11
+ #include <cassert>
12
+ #include <string>
13
+
14
+ static void test_simple_grammar() {
15
+ // Test case for a simple grammar
16
+ const std::string grammar_str = R"""(root ::= expr
17
+ expr ::= term ("+" term)*
18
+ term ::= number
19
+ number ::= [0-9]+)""";
20
+
21
+ grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
22
+
23
+ // Ensure we parsed correctly
24
+ assert(!parsed_grammar.rules.empty());
25
+
26
+ // Ensure we have a root node
27
+ assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
28
+
29
+ std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
30
+ llama_grammar* grammar = llama_grammar_init(
31
+ grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
32
+
33
+ std::string input = "123+456";
34
+
35
+ auto decoded = decode_utf8(input, {});
36
+
37
+ const auto & code_points = decoded.first;
38
+
39
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
40
+ auto prev_stacks = grammar->stacks;
41
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
42
+ assert(!grammar->stacks.empty());
43
+ }
44
+
45
+ bool completed_grammar = false;
46
+
47
+ for (const auto & stack : grammar->stacks) {
48
+ if (stack.empty()) {
49
+ completed_grammar = true;
50
+ break;
51
+ }
52
+ }
53
+
54
+ assert(completed_grammar);
55
+
56
+ // Clean up allocated memory
57
+ llama_grammar_free(grammar);
58
+ }
59
+
60
+ static void test_complex_grammar() {
61
+ // Test case for a more complex grammar, with both failure strings and success strings
62
+ const std::string grammar_str = R"""(root ::= expression
63
+ expression ::= term ws (("+"|"-") ws term)*
64
+ term ::= factor ws (("*"|"/") ws factor)*
65
+ factor ::= number | variable | "(" expression ")" | function-call
66
+ number ::= [0-9]+
67
+ variable ::= [a-zA-Z_][a-zA-Z0-9_]*
68
+ function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
69
+ ws ::= [ \t\n\r]?)""";
70
+
71
+ grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
72
+
73
+ // Ensure we parsed correctly
74
+ assert(!parsed_grammar.rules.empty());
75
+
76
+ // Ensure we have a root node
77
+ assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
78
+
79
+ std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
80
+ llama_grammar* grammar = llama_grammar_init(
81
+ grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
82
+
83
+ // Save the original grammar stacks so that we can reset after every new string we want to test
84
+ auto original_stacks = grammar->stacks;
85
+
86
+ // Test a few strings
87
+ std::vector<std::string> test_strings_pass = {
88
+ "42",
89
+ "1*2*3*4*5",
90
+ "x",
91
+ "x+10",
92
+ "x1+y2",
93
+ "(a+b)*(c-d)",
94
+ "func()",
95
+ "func(x,y+2)",
96
+ "a*(b+c)-d/e",
97
+ "f(g(x),h(y,z))",
98
+ "x + 10",
99
+ "x1 + y2",
100
+ "(a + b) * (c - d)",
101
+ "func()",
102
+ "func(x, y + 2)",
103
+ "a * (b + c) - d / e",
104
+ "f(g(x), h(y, z))",
105
+ "123+456",
106
+ "123*456*789-123/456+789*123",
107
+ "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
108
+ };
109
+
110
+ std::vector<std::string> test_strings_fail = {
111
+ "+",
112
+ "/ 3x",
113
+ "x + + y",
114
+ "a * / b",
115
+ "func(,)",
116
+ "func(x y)",
117
+ "(a + b",
118
+ "x + y)",
119
+ "a + b * (c - d",
120
+ "42 +",
121
+ "x +",
122
+ "x + 10 +",
123
+ "(a + b) * (c - d",
124
+ "func(",
125
+ "func(x, y + 2",
126
+ "a * (b + c) - d /",
127
+ "f(g(x), h(y, z)",
128
+ "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
129
+ };
130
+
131
+ // Passing strings
132
+ for (const auto & test_string : test_strings_pass) {
133
+ auto decoded = decode_utf8(test_string, {});
134
+
135
+ const auto & code_points = decoded.first;
136
+
137
+ int pos = 0;
138
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
139
+ ++pos;
140
+ auto prev_stacks = grammar->stacks;
141
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
142
+
143
+ // Expect that each code point will not cause the grammar to fail
144
+ if (grammar->stacks.empty()) {
145
+ fprintf(stdout, "Error at position %d\n", pos);
146
+ fprintf(stderr, "Unexpected character '%s'\n", unicode_cpt_to_utf8(*it).c_str());
147
+ fprintf(stderr, "Input string is %s:\n", test_string.c_str());
148
+ }
149
+ assert(!grammar->stacks.empty());
150
+ }
151
+
152
+ bool completed_grammar = false;
153
+
154
+ for (const auto & stack : grammar->stacks) {
155
+ if (stack.empty()) {
156
+ completed_grammar = true;
157
+ break;
158
+ }
159
+ }
160
+
161
+ assert(completed_grammar);
162
+
163
+ // Reset the grammar stacks
164
+ grammar->stacks = original_stacks;
165
+ }
166
+
167
+ // Failing strings
168
+ for (const auto & test_string : test_strings_fail) {
169
+ auto decoded = decode_utf8(test_string, {});
170
+
171
+ const auto & code_points = decoded.first;
172
+ bool parse_failed = false;
173
+
174
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
175
+ auto prev_stacks = grammar->stacks;
176
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
177
+ if (grammar->stacks.empty()) {
178
+ parse_failed = true;
179
+ break;
180
+ }
181
+ assert(!grammar->stacks.empty());
182
+ }
183
+
184
+ bool completed_grammar = false;
185
+
186
+ for (const auto & stack : grammar->stacks) {
187
+ if (stack.empty()) {
188
+ completed_grammar = true;
189
+ break;
190
+ }
191
+ }
192
+
193
+ // Ensure that the grammar is not completed, or that each string failed to match as-expected
194
+ assert((!completed_grammar) || parse_failed);
195
+
196
+ // Reset the grammar stacks
197
+ grammar->stacks = original_stacks;
198
+ }
199
+
200
+ // Clean up allocated memory
201
+ llama_grammar_free(grammar);
202
+ }
203
+
204
+ static void test_failure_missing_root() {
205
+ // Test case for a grammar that is missing a root rule
206
+ const std::string grammar_str = R"""(rot ::= expr
207
+ expr ::= term ("+" term)*
208
+ term ::= number
209
+ number ::= [0-9]+)""";
210
+
211
+ grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
212
+
213
+ // Ensure we parsed correctly
214
+ assert(!parsed_grammar.rules.empty());
215
+
216
+ // Ensure we do NOT have a root node
217
+ assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
218
+ }
219
+
220
+ static void test_failure_missing_reference() {
221
+ // Test case for a grammar that is missing a referenced rule
222
+ const std::string grammar_str = R"""(root ::= expr
223
+ expr ::= term ("+" term)*
224
+ term ::= numero
225
+ number ::= [0-9]+)""";
226
+
227
+ fprintf(stderr, "Expected error: ");
228
+
229
+ grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
230
+
231
+ // Ensure we did NOT parsed correctly
232
+ assert(parsed_grammar.rules.empty());
233
+
234
+ fprintf(stderr, "End of expected error. Test successful.\n");
235
+ }
236
+
237
+ int main() {
238
+ test_simple_grammar();
239
+ test_complex_grammar();
240
+ test_failure_missing_root();
241
+ test_failure_missing_reference();
242
+ return 0;
243
+ }
@@ -0,0 +1,250 @@
1
+ #ifdef NDEBUG
2
+ #undef NDEBUG
3
+ #endif
4
+
5
+ #include "llama.h"
6
+ #include "grammar-parser.h"
7
+
8
+ #include <cassert>
9
+
10
+ int main()
11
+ {
12
+ grammar_parser::parse_state parsed_grammar;
13
+
14
+ const char *grammar_bytes = R"""(root ::= (expr "=" term "\n")+
15
+ expr ::= term ([-+*/] term)*
16
+ term ::= [0-9]+)""";
17
+
18
+ parsed_grammar = grammar_parser::parse(grammar_bytes);
19
+
20
+ std::vector<std::pair<std::string, uint32_t>> expected = {
21
+ {"expr", 2},
22
+ {"expr_5", 5},
23
+ {"expr_6", 6},
24
+ {"root", 0},
25
+ {"root_1", 1},
26
+ {"root_4", 4},
27
+ {"term", 3},
28
+ {"term_7", 7},
29
+ };
30
+
31
+ uint32_t index = 0;
32
+ for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
33
+ {
34
+ std::string key = it->first;
35
+ uint32_t value = it->second;
36
+ std::pair<std::string, uint32_t> expected_pair = expected[index];
37
+
38
+ // pretty print error message before asserting
39
+ if (expected_pair.first != key || expected_pair.second != value)
40
+ {
41
+ fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
42
+ fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
43
+ fprintf(stderr, "expected_pair != actual_pair\n");
44
+ }
45
+
46
+ assert(expected_pair.first == key && expected_pair.second == value);
47
+
48
+ index++;
49
+ }
50
+ std::vector<llama_grammar_element> expected_rules = {
51
+ {LLAMA_GRETYPE_RULE_REF, 4},
52
+ {LLAMA_GRETYPE_END, 0},
53
+ {LLAMA_GRETYPE_RULE_REF, 2},
54
+ {LLAMA_GRETYPE_CHAR, 61},
55
+ {LLAMA_GRETYPE_RULE_REF, 3},
56
+ {LLAMA_GRETYPE_CHAR, 10},
57
+ {LLAMA_GRETYPE_END, 0},
58
+ {LLAMA_GRETYPE_RULE_REF, 3},
59
+ {LLAMA_GRETYPE_RULE_REF, 6},
60
+ {LLAMA_GRETYPE_END, 0},
61
+ {LLAMA_GRETYPE_RULE_REF, 7},
62
+ {LLAMA_GRETYPE_END, 0},
63
+ {LLAMA_GRETYPE_RULE_REF, 1},
64
+ {LLAMA_GRETYPE_RULE_REF, 4},
65
+ {LLAMA_GRETYPE_ALT, 0},
66
+ {LLAMA_GRETYPE_RULE_REF, 1},
67
+ {LLAMA_GRETYPE_END, 0},
68
+ {LLAMA_GRETYPE_CHAR, 45},
69
+ {LLAMA_GRETYPE_CHAR_ALT, 43},
70
+ {LLAMA_GRETYPE_CHAR_ALT, 42},
71
+ {LLAMA_GRETYPE_CHAR_ALT, 47},
72
+ {LLAMA_GRETYPE_RULE_REF, 3},
73
+ {LLAMA_GRETYPE_END, 0},
74
+ {LLAMA_GRETYPE_RULE_REF, 5},
75
+ {LLAMA_GRETYPE_RULE_REF, 6},
76
+ {LLAMA_GRETYPE_ALT, 0},
77
+ {LLAMA_GRETYPE_END, 0},
78
+ {LLAMA_GRETYPE_CHAR, 48},
79
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
80
+ {LLAMA_GRETYPE_RULE_REF, 7},
81
+ {LLAMA_GRETYPE_ALT, 0},
82
+ {LLAMA_GRETYPE_CHAR, 48},
83
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
84
+ {LLAMA_GRETYPE_END, 0},
85
+ };
86
+
87
+ index = 0;
88
+ for (auto rule : parsed_grammar.rules)
89
+ {
90
+ // compare rule to expected rule
91
+ for (uint32_t i = 0; i < rule.size(); i++)
92
+ {
93
+ llama_grammar_element element = rule[i];
94
+ llama_grammar_element expected_element = expected_rules[index];
95
+
96
+ // pretty print error message before asserting
97
+ if (expected_element.type != element.type || expected_element.value != element.value)
98
+ {
99
+ fprintf(stderr, "index: %u\n", index);
100
+ fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
101
+ fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
102
+ fprintf(stderr, "expected_element != actual_element\n");
103
+ }
104
+
105
+ assert(expected_element.type == element.type && expected_element.value == element.value);
106
+ index++;
107
+ }
108
+ }
109
+
110
+ const char *longer_grammar_bytes = R"""(
111
+ root ::= (expr "=" ws term "\n")+
112
+ expr ::= term ([-+*/] term)*
113
+ term ::= ident | num | "(" ws expr ")" ws
114
+ ident ::= [a-z] [a-z0-9_]* ws
115
+ num ::= [0-9]+ ws
116
+ ws ::= [ \t\n]*
117
+ )""";
118
+
119
+ parsed_grammar = grammar_parser::parse(longer_grammar_bytes);
120
+
121
+ expected = {
122
+ {"expr", 2},
123
+ {"expr_6", 6},
124
+ {"expr_7", 7},
125
+ {"ident", 8},
126
+ {"ident_10", 10},
127
+ {"num", 9},
128
+ {"num_11", 11},
129
+ {"root", 0},
130
+ {"root_1", 1},
131
+ {"root_5", 5},
132
+ {"term", 4},
133
+ {"ws", 3},
134
+ {"ws_12", 12},
135
+ };
136
+
137
+ index = 0;
138
+ for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
139
+ {
140
+ std::string key = it->first;
141
+ uint32_t value = it->second;
142
+ std::pair<std::string, uint32_t> expected_pair = expected[index];
143
+
144
+ // pretty print error message before asserting
145
+ if (expected_pair.first != key || expected_pair.second != value)
146
+ {
147
+ fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
148
+ fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
149
+ fprintf(stderr, "expected_pair != actual_pair\n");
150
+ }
151
+
152
+ assert(expected_pair.first == key && expected_pair.second == value);
153
+
154
+ index++;
155
+ }
156
+ expected_rules = {
157
+ {LLAMA_GRETYPE_RULE_REF, 5},
158
+ {LLAMA_GRETYPE_END, 0},
159
+ {LLAMA_GRETYPE_RULE_REF, 2},
160
+ {LLAMA_GRETYPE_CHAR, 61},
161
+ {LLAMA_GRETYPE_RULE_REF, 3},
162
+ {LLAMA_GRETYPE_RULE_REF, 4},
163
+ {LLAMA_GRETYPE_CHAR, 10},
164
+ {LLAMA_GRETYPE_END, 0},
165
+ {LLAMA_GRETYPE_RULE_REF, 4},
166
+ {LLAMA_GRETYPE_RULE_REF, 7},
167
+ {LLAMA_GRETYPE_END, 0},
168
+ {LLAMA_GRETYPE_RULE_REF, 12},
169
+ {LLAMA_GRETYPE_END, 0},
170
+ {LLAMA_GRETYPE_RULE_REF, 8},
171
+ {LLAMA_GRETYPE_ALT, 0},
172
+ {LLAMA_GRETYPE_RULE_REF, 9},
173
+ {LLAMA_GRETYPE_ALT, 0},
174
+ {LLAMA_GRETYPE_CHAR, 40},
175
+ {LLAMA_GRETYPE_RULE_REF, 3},
176
+ {LLAMA_GRETYPE_RULE_REF, 2},
177
+ {LLAMA_GRETYPE_CHAR, 41},
178
+ {LLAMA_GRETYPE_RULE_REF, 3},
179
+ {LLAMA_GRETYPE_END, 0},
180
+ {LLAMA_GRETYPE_RULE_REF, 1},
181
+ {LLAMA_GRETYPE_RULE_REF, 5},
182
+ {LLAMA_GRETYPE_ALT, 0},
183
+ {LLAMA_GRETYPE_RULE_REF, 1},
184
+ {LLAMA_GRETYPE_END, 0},
185
+ {LLAMA_GRETYPE_CHAR, 45},
186
+ {LLAMA_GRETYPE_CHAR_ALT, 43},
187
+ {LLAMA_GRETYPE_CHAR_ALT, 42},
188
+ {LLAMA_GRETYPE_CHAR_ALT, 47},
189
+ {LLAMA_GRETYPE_RULE_REF, 4},
190
+ {LLAMA_GRETYPE_END, 0},
191
+ {LLAMA_GRETYPE_RULE_REF, 6},
192
+ {LLAMA_GRETYPE_RULE_REF, 7},
193
+ {LLAMA_GRETYPE_ALT, 0},
194
+ {LLAMA_GRETYPE_END, 0},
195
+ {LLAMA_GRETYPE_CHAR, 97},
196
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
197
+ {LLAMA_GRETYPE_RULE_REF, 10},
198
+ {LLAMA_GRETYPE_RULE_REF, 3},
199
+ {LLAMA_GRETYPE_END, 0},
200
+ {LLAMA_GRETYPE_RULE_REF, 11},
201
+ {LLAMA_GRETYPE_RULE_REF, 3},
202
+ {LLAMA_GRETYPE_END, 0},
203
+ {LLAMA_GRETYPE_CHAR, 97},
204
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
205
+ {LLAMA_GRETYPE_CHAR_ALT, 48},
206
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
207
+ {LLAMA_GRETYPE_CHAR_ALT, 95},
208
+ {LLAMA_GRETYPE_RULE_REF, 10},
209
+ {LLAMA_GRETYPE_ALT, 0},
210
+ {LLAMA_GRETYPE_END, 0},
211
+ {LLAMA_GRETYPE_CHAR, 48},
212
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
213
+ {LLAMA_GRETYPE_RULE_REF, 11},
214
+ {LLAMA_GRETYPE_ALT, 0},
215
+ {LLAMA_GRETYPE_CHAR, 48},
216
+ {LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
217
+ {LLAMA_GRETYPE_END, 0},
218
+ {LLAMA_GRETYPE_CHAR, 32},
219
+ {LLAMA_GRETYPE_CHAR_ALT, 9},
220
+ {LLAMA_GRETYPE_CHAR_ALT, 10},
221
+ {LLAMA_GRETYPE_RULE_REF, 12},
222
+ {LLAMA_GRETYPE_ALT, 0},
223
+ {LLAMA_GRETYPE_END, 0},
224
+ };
225
+
226
+ index = 0;
227
+ for (auto rule : parsed_grammar.rules)
228
+ {
229
+ // compare rule to expected rule
230
+ for (uint32_t i = 0; i < rule.size(); i++)
231
+ {
232
+ llama_grammar_element element = rule[i];
233
+ llama_grammar_element expected_element = expected_rules[index];
234
+
235
+ // pretty print error message before asserting
236
+ if (expected_element.type != element.type || expected_element.value != element.value)
237
+ {
238
+ fprintf(stderr, "index: %u\n", index);
239
+ fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
240
+ fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
241
+ fprintf(stderr, "expected_element != actual_element\n");
242
+ }
243
+
244
+ assert(expected_element.type == element.type && expected_element.value == element.value);
245
+ index++;
246
+ }
247
+ }
248
+
249
+ return 0;
250
+ }