@fugood/llama.node 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +85 -0
- package/README.md +56 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/lib/binding.js +13 -0
- package/lib/binding.ts +57 -0
- package/lib/index.js +24 -0
- package/lib/index.ts +13 -0
- package/package.json +65 -0
- package/src/addons.cpp +506 -0
- package/src/llama.cpp/CMakeLists.txt +1320 -0
- package/src/llama.cpp/build.zig +172 -0
- package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
- package/src/llama.cpp/common/CMakeLists.txt +87 -0
- package/src/llama.cpp/common/base64.hpp +392 -0
- package/src/llama.cpp/common/common.cpp +2949 -0
- package/src/llama.cpp/common/common.h +324 -0
- package/src/llama.cpp/common/console.cpp +501 -0
- package/src/llama.cpp/common/console.h +19 -0
- package/src/llama.cpp/common/grammar-parser.cpp +440 -0
- package/src/llama.cpp/common/grammar-parser.h +29 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
- package/src/llama.cpp/common/json.hpp +24766 -0
- package/src/llama.cpp/common/log.h +724 -0
- package/src/llama.cpp/common/ngram-cache.cpp +282 -0
- package/src/llama.cpp/common/ngram-cache.h +94 -0
- package/src/llama.cpp/common/sampling.cpp +353 -0
- package/src/llama.cpp/common/sampling.h +147 -0
- package/src/llama.cpp/common/stb_image.h +8396 -0
- package/src/llama.cpp/common/train.cpp +1513 -0
- package/src/llama.cpp/common/train.h +233 -0
- package/src/llama.cpp/examples/CMakeLists.txt +52 -0
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
- package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched/batched.cpp +262 -0
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
- package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
- package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/infill/infill.cpp +767 -0
- package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
- package/src/llama.cpp/examples/llava/clip.h +85 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
- package/src/llama.cpp/examples/llava/llava.cpp +426 -0
- package/src/llama.cpp/examples/llava/llava.h +50 -0
- package/src/llama.cpp/examples/llava/requirements.txt +3 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
- package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
- package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/main/main.cpp +957 -0
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
- package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
- package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
- package/src/llama.cpp/examples/server/httplib.h +9465 -0
- package/src/llama.cpp/examples/server/server.cpp +3826 -0
- package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
- package/src/llama.cpp/examples/server/utils.hpp +653 -0
- package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple/simple.cpp +183 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
- package/src/llama.cpp/ggml-alloc.c +985 -0
- package/src/llama.cpp/ggml-alloc.h +76 -0
- package/src/llama.cpp/ggml-backend-impl.h +141 -0
- package/src/llama.cpp/ggml-backend.c +2099 -0
- package/src/llama.cpp/ggml-backend.h +233 -0
- package/src/llama.cpp/ggml-common.h +1853 -0
- package/src/llama.cpp/ggml-cuda.h +43 -0
- package/src/llama.cpp/ggml-impl.h +265 -0
- package/src/llama.cpp/ggml-kompute.cpp +2006 -0
- package/src/llama.cpp/ggml-kompute.h +46 -0
- package/src/llama.cpp/ggml-metal.h +66 -0
- package/src/llama.cpp/ggml-mpi.c +216 -0
- package/src/llama.cpp/ggml-mpi.h +39 -0
- package/src/llama.cpp/ggml-opencl.cpp +2301 -0
- package/src/llama.cpp/ggml-opencl.h +36 -0
- package/src/llama.cpp/ggml-quants.c +12678 -0
- package/src/llama.cpp/ggml-quants.h +133 -0
- package/src/llama.cpp/ggml-sycl.cpp +17882 -0
- package/src/llama.cpp/ggml-sycl.h +49 -0
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
- package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
- package/src/llama.cpp/ggml-vulkan.h +29 -0
- package/src/llama.cpp/ggml.c +21819 -0
- package/src/llama.cpp/ggml.h +2403 -0
- package/src/llama.cpp/llama.cpp +17468 -0
- package/src/llama.cpp/llama.h +1117 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
- package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
- package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
- package/src/llama.cpp/prompts/alpaca.txt +1 -0
- package/src/llama.cpp/prompts/assistant.txt +31 -0
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
- package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
- package/src/llama.cpp/prompts/chat.txt +28 -0
- package/src/llama.cpp/prompts/dan-modified.txt +1 -0
- package/src/llama.cpp/prompts/dan.txt +1 -0
- package/src/llama.cpp/prompts/mnemonics.txt +93 -0
- package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
- package/src/llama.cpp/prompts/reason-act.txt +18 -0
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
- package/src/llama.cpp/requirements.txt +12 -0
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
- package/src/llama.cpp/scripts/xxd.cmake +16 -0
- package/src/llama.cpp/sgemm.cpp +999 -0
- package/src/llama.cpp/sgemm.h +12 -0
- package/src/llama.cpp/tests/CMakeLists.txt +78 -0
- package/src/llama.cpp/tests/get-model.cpp +21 -0
- package/src/llama.cpp/tests/get-model.h +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
- package/src/llama.cpp/tests/test-c.c +7 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
- package/src/llama.cpp/tests/test-double-float.cpp +57 -0
- package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
- package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
- package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
- package/src/llama.cpp/tests/test-opt.cpp +181 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
- package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
- package/src/llama.cpp/tests/test-rope.cpp +221 -0
- package/src/llama.cpp/tests/test-sampling.cpp +301 -0
- package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
- package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
- package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
- package/src/llama.cpp/unicode-data.cpp +1651 -0
- package/src/llama.cpp/unicode-data.h +16 -0
- package/src/llama.cpp/unicode.cpp +277 -0
- package/src/llama.cpp/unicode.h +28 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
#ifdef NDEBUG
|
|
2
|
+
#undef NDEBUG
|
|
3
|
+
#endif
|
|
4
|
+
|
|
5
|
+
#define LLAMA_API_INTERNAL
|
|
6
|
+
|
|
7
|
+
#include "ggml.h"
|
|
8
|
+
#include "llama.h"
|
|
9
|
+
#include "grammar-parser.h"
|
|
10
|
+
#include "unicode.h"
|
|
11
|
+
#include <cassert>
|
|
12
|
+
#include <string>
|
|
13
|
+
|
|
14
|
+
static void test_simple_grammar() {
|
|
15
|
+
// Test case for a simple grammar
|
|
16
|
+
const std::string grammar_str = R"""(root ::= expr
|
|
17
|
+
expr ::= term ("+" term)*
|
|
18
|
+
term ::= number
|
|
19
|
+
number ::= [0-9]+)""";
|
|
20
|
+
|
|
21
|
+
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
22
|
+
|
|
23
|
+
// Ensure we parsed correctly
|
|
24
|
+
assert(!parsed_grammar.rules.empty());
|
|
25
|
+
|
|
26
|
+
// Ensure we have a root node
|
|
27
|
+
assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
|
|
28
|
+
|
|
29
|
+
std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
|
|
30
|
+
llama_grammar* grammar = llama_grammar_init(
|
|
31
|
+
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
|
32
|
+
|
|
33
|
+
std::string input = "123+456";
|
|
34
|
+
|
|
35
|
+
auto decoded = decode_utf8(input, {});
|
|
36
|
+
|
|
37
|
+
const auto & code_points = decoded.first;
|
|
38
|
+
|
|
39
|
+
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
|
40
|
+
auto prev_stacks = grammar->stacks;
|
|
41
|
+
llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
|
|
42
|
+
assert(!grammar->stacks.empty());
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
bool completed_grammar = false;
|
|
46
|
+
|
|
47
|
+
for (const auto & stack : grammar->stacks) {
|
|
48
|
+
if (stack.empty()) {
|
|
49
|
+
completed_grammar = true;
|
|
50
|
+
break;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
assert(completed_grammar);
|
|
55
|
+
|
|
56
|
+
// Clean up allocated memory
|
|
57
|
+
llama_grammar_free(grammar);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static void test_complex_grammar() {
|
|
61
|
+
// Test case for a more complex grammar, with both failure strings and success strings
|
|
62
|
+
const std::string grammar_str = R"""(root ::= expression
|
|
63
|
+
expression ::= term ws (("+"|"-") ws term)*
|
|
64
|
+
term ::= factor ws (("*"|"/") ws factor)*
|
|
65
|
+
factor ::= number | variable | "(" expression ")" | function-call
|
|
66
|
+
number ::= [0-9]+
|
|
67
|
+
variable ::= [a-zA-Z_][a-zA-Z0-9_]*
|
|
68
|
+
function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
|
|
69
|
+
ws ::= [ \t\n\r]?)""";
|
|
70
|
+
|
|
71
|
+
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
72
|
+
|
|
73
|
+
// Ensure we parsed correctly
|
|
74
|
+
assert(!parsed_grammar.rules.empty());
|
|
75
|
+
|
|
76
|
+
// Ensure we have a root node
|
|
77
|
+
assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
|
|
78
|
+
|
|
79
|
+
std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
|
|
80
|
+
llama_grammar* grammar = llama_grammar_init(
|
|
81
|
+
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
|
82
|
+
|
|
83
|
+
// Save the original grammar stacks so that we can reset after every new string we want to test
|
|
84
|
+
auto original_stacks = grammar->stacks;
|
|
85
|
+
|
|
86
|
+
// Test a few strings
|
|
87
|
+
std::vector<std::string> test_strings_pass = {
|
|
88
|
+
"42",
|
|
89
|
+
"1*2*3*4*5",
|
|
90
|
+
"x",
|
|
91
|
+
"x+10",
|
|
92
|
+
"x1+y2",
|
|
93
|
+
"(a+b)*(c-d)",
|
|
94
|
+
"func()",
|
|
95
|
+
"func(x,y+2)",
|
|
96
|
+
"a*(b+c)-d/e",
|
|
97
|
+
"f(g(x),h(y,z))",
|
|
98
|
+
"x + 10",
|
|
99
|
+
"x1 + y2",
|
|
100
|
+
"(a + b) * (c - d)",
|
|
101
|
+
"func()",
|
|
102
|
+
"func(x, y + 2)",
|
|
103
|
+
"a * (b + c) - d / e",
|
|
104
|
+
"f(g(x), h(y, z))",
|
|
105
|
+
"123+456",
|
|
106
|
+
"123*456*789-123/456+789*123",
|
|
107
|
+
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
std::vector<std::string> test_strings_fail = {
|
|
111
|
+
"+",
|
|
112
|
+
"/ 3x",
|
|
113
|
+
"x + + y",
|
|
114
|
+
"a * / b",
|
|
115
|
+
"func(,)",
|
|
116
|
+
"func(x y)",
|
|
117
|
+
"(a + b",
|
|
118
|
+
"x + y)",
|
|
119
|
+
"a + b * (c - d",
|
|
120
|
+
"42 +",
|
|
121
|
+
"x +",
|
|
122
|
+
"x + 10 +",
|
|
123
|
+
"(a + b) * (c - d",
|
|
124
|
+
"func(",
|
|
125
|
+
"func(x, y + 2",
|
|
126
|
+
"a * (b + c) - d /",
|
|
127
|
+
"f(g(x), h(y, z)",
|
|
128
|
+
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
// Passing strings
|
|
132
|
+
for (const auto & test_string : test_strings_pass) {
|
|
133
|
+
auto decoded = decode_utf8(test_string, {});
|
|
134
|
+
|
|
135
|
+
const auto & code_points = decoded.first;
|
|
136
|
+
|
|
137
|
+
int pos = 0;
|
|
138
|
+
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
|
139
|
+
++pos;
|
|
140
|
+
auto prev_stacks = grammar->stacks;
|
|
141
|
+
llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
|
|
142
|
+
|
|
143
|
+
// Expect that each code point will not cause the grammar to fail
|
|
144
|
+
if (grammar->stacks.empty()) {
|
|
145
|
+
fprintf(stdout, "Error at position %d\n", pos);
|
|
146
|
+
fprintf(stderr, "Unexpected character '%s'\n", unicode_cpt_to_utf8(*it).c_str());
|
|
147
|
+
fprintf(stderr, "Input string is %s:\n", test_string.c_str());
|
|
148
|
+
}
|
|
149
|
+
assert(!grammar->stacks.empty());
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
bool completed_grammar = false;
|
|
153
|
+
|
|
154
|
+
for (const auto & stack : grammar->stacks) {
|
|
155
|
+
if (stack.empty()) {
|
|
156
|
+
completed_grammar = true;
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
assert(completed_grammar);
|
|
162
|
+
|
|
163
|
+
// Reset the grammar stacks
|
|
164
|
+
grammar->stacks = original_stacks;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Failing strings
|
|
168
|
+
for (const auto & test_string : test_strings_fail) {
|
|
169
|
+
auto decoded = decode_utf8(test_string, {});
|
|
170
|
+
|
|
171
|
+
const auto & code_points = decoded.first;
|
|
172
|
+
bool parse_failed = false;
|
|
173
|
+
|
|
174
|
+
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
|
175
|
+
auto prev_stacks = grammar->stacks;
|
|
176
|
+
llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
|
|
177
|
+
if (grammar->stacks.empty()) {
|
|
178
|
+
parse_failed = true;
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
assert(!grammar->stacks.empty());
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
bool completed_grammar = false;
|
|
185
|
+
|
|
186
|
+
for (const auto & stack : grammar->stacks) {
|
|
187
|
+
if (stack.empty()) {
|
|
188
|
+
completed_grammar = true;
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Ensure that the grammar is not completed, or that each string failed to match as-expected
|
|
194
|
+
assert((!completed_grammar) || parse_failed);
|
|
195
|
+
|
|
196
|
+
// Reset the grammar stacks
|
|
197
|
+
grammar->stacks = original_stacks;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Clean up allocated memory
|
|
201
|
+
llama_grammar_free(grammar);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static void test_failure_missing_root() {
|
|
205
|
+
// Test case for a grammar that is missing a root rule
|
|
206
|
+
const std::string grammar_str = R"""(rot ::= expr
|
|
207
|
+
expr ::= term ("+" term)*
|
|
208
|
+
term ::= number
|
|
209
|
+
number ::= [0-9]+)""";
|
|
210
|
+
|
|
211
|
+
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
212
|
+
|
|
213
|
+
// Ensure we parsed correctly
|
|
214
|
+
assert(!parsed_grammar.rules.empty());
|
|
215
|
+
|
|
216
|
+
// Ensure we do NOT have a root node
|
|
217
|
+
assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
static void test_failure_missing_reference() {
|
|
221
|
+
// Test case for a grammar that is missing a referenced rule
|
|
222
|
+
const std::string grammar_str = R"""(root ::= expr
|
|
223
|
+
expr ::= term ("+" term)*
|
|
224
|
+
term ::= numero
|
|
225
|
+
number ::= [0-9]+)""";
|
|
226
|
+
|
|
227
|
+
fprintf(stderr, "Expected error: ");
|
|
228
|
+
|
|
229
|
+
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
230
|
+
|
|
231
|
+
// Ensure we did NOT parsed correctly
|
|
232
|
+
assert(parsed_grammar.rules.empty());
|
|
233
|
+
|
|
234
|
+
fprintf(stderr, "End of expected error. Test successful.\n");
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
int main() {
|
|
238
|
+
test_simple_grammar();
|
|
239
|
+
test_complex_grammar();
|
|
240
|
+
test_failure_missing_root();
|
|
241
|
+
test_failure_missing_reference();
|
|
242
|
+
return 0;
|
|
243
|
+
}
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
#ifdef NDEBUG
|
|
2
|
+
#undef NDEBUG
|
|
3
|
+
#endif
|
|
4
|
+
|
|
5
|
+
#include "llama.h"
|
|
6
|
+
#include "grammar-parser.h"
|
|
7
|
+
|
|
8
|
+
#include <cassert>
|
|
9
|
+
|
|
10
|
+
int main()
|
|
11
|
+
{
|
|
12
|
+
grammar_parser::parse_state parsed_grammar;
|
|
13
|
+
|
|
14
|
+
const char *grammar_bytes = R"""(root ::= (expr "=" term "\n")+
|
|
15
|
+
expr ::= term ([-+*/] term)*
|
|
16
|
+
term ::= [0-9]+)""";
|
|
17
|
+
|
|
18
|
+
parsed_grammar = grammar_parser::parse(grammar_bytes);
|
|
19
|
+
|
|
20
|
+
std::vector<std::pair<std::string, uint32_t>> expected = {
|
|
21
|
+
{"expr", 2},
|
|
22
|
+
{"expr_5", 5},
|
|
23
|
+
{"expr_6", 6},
|
|
24
|
+
{"root", 0},
|
|
25
|
+
{"root_1", 1},
|
|
26
|
+
{"root_4", 4},
|
|
27
|
+
{"term", 3},
|
|
28
|
+
{"term_7", 7},
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
uint32_t index = 0;
|
|
32
|
+
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
|
|
33
|
+
{
|
|
34
|
+
std::string key = it->first;
|
|
35
|
+
uint32_t value = it->second;
|
|
36
|
+
std::pair<std::string, uint32_t> expected_pair = expected[index];
|
|
37
|
+
|
|
38
|
+
// pretty print error message before asserting
|
|
39
|
+
if (expected_pair.first != key || expected_pair.second != value)
|
|
40
|
+
{
|
|
41
|
+
fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
|
|
42
|
+
fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
|
|
43
|
+
fprintf(stderr, "expected_pair != actual_pair\n");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
assert(expected_pair.first == key && expected_pair.second == value);
|
|
47
|
+
|
|
48
|
+
index++;
|
|
49
|
+
}
|
|
50
|
+
std::vector<llama_grammar_element> expected_rules = {
|
|
51
|
+
{LLAMA_GRETYPE_RULE_REF, 4},
|
|
52
|
+
{LLAMA_GRETYPE_END, 0},
|
|
53
|
+
{LLAMA_GRETYPE_RULE_REF, 2},
|
|
54
|
+
{LLAMA_GRETYPE_CHAR, 61},
|
|
55
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
56
|
+
{LLAMA_GRETYPE_CHAR, 10},
|
|
57
|
+
{LLAMA_GRETYPE_END, 0},
|
|
58
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
59
|
+
{LLAMA_GRETYPE_RULE_REF, 6},
|
|
60
|
+
{LLAMA_GRETYPE_END, 0},
|
|
61
|
+
{LLAMA_GRETYPE_RULE_REF, 7},
|
|
62
|
+
{LLAMA_GRETYPE_END, 0},
|
|
63
|
+
{LLAMA_GRETYPE_RULE_REF, 1},
|
|
64
|
+
{LLAMA_GRETYPE_RULE_REF, 4},
|
|
65
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
66
|
+
{LLAMA_GRETYPE_RULE_REF, 1},
|
|
67
|
+
{LLAMA_GRETYPE_END, 0},
|
|
68
|
+
{LLAMA_GRETYPE_CHAR, 45},
|
|
69
|
+
{LLAMA_GRETYPE_CHAR_ALT, 43},
|
|
70
|
+
{LLAMA_GRETYPE_CHAR_ALT, 42},
|
|
71
|
+
{LLAMA_GRETYPE_CHAR_ALT, 47},
|
|
72
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
73
|
+
{LLAMA_GRETYPE_END, 0},
|
|
74
|
+
{LLAMA_GRETYPE_RULE_REF, 5},
|
|
75
|
+
{LLAMA_GRETYPE_RULE_REF, 6},
|
|
76
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
77
|
+
{LLAMA_GRETYPE_END, 0},
|
|
78
|
+
{LLAMA_GRETYPE_CHAR, 48},
|
|
79
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
|
80
|
+
{LLAMA_GRETYPE_RULE_REF, 7},
|
|
81
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
82
|
+
{LLAMA_GRETYPE_CHAR, 48},
|
|
83
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
|
84
|
+
{LLAMA_GRETYPE_END, 0},
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
index = 0;
|
|
88
|
+
for (auto rule : parsed_grammar.rules)
|
|
89
|
+
{
|
|
90
|
+
// compare rule to expected rule
|
|
91
|
+
for (uint32_t i = 0; i < rule.size(); i++)
|
|
92
|
+
{
|
|
93
|
+
llama_grammar_element element = rule[i];
|
|
94
|
+
llama_grammar_element expected_element = expected_rules[index];
|
|
95
|
+
|
|
96
|
+
// pretty print error message before asserting
|
|
97
|
+
if (expected_element.type != element.type || expected_element.value != element.value)
|
|
98
|
+
{
|
|
99
|
+
fprintf(stderr, "index: %u\n", index);
|
|
100
|
+
fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
|
|
101
|
+
fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
|
|
102
|
+
fprintf(stderr, "expected_element != actual_element\n");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
assert(expected_element.type == element.type && expected_element.value == element.value);
|
|
106
|
+
index++;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const char *longer_grammar_bytes = R"""(
|
|
111
|
+
root ::= (expr "=" ws term "\n")+
|
|
112
|
+
expr ::= term ([-+*/] term)*
|
|
113
|
+
term ::= ident | num | "(" ws expr ")" ws
|
|
114
|
+
ident ::= [a-z] [a-z0-9_]* ws
|
|
115
|
+
num ::= [0-9]+ ws
|
|
116
|
+
ws ::= [ \t\n]*
|
|
117
|
+
)""";
|
|
118
|
+
|
|
119
|
+
parsed_grammar = grammar_parser::parse(longer_grammar_bytes);
|
|
120
|
+
|
|
121
|
+
expected = {
|
|
122
|
+
{"expr", 2},
|
|
123
|
+
{"expr_6", 6},
|
|
124
|
+
{"expr_7", 7},
|
|
125
|
+
{"ident", 8},
|
|
126
|
+
{"ident_10", 10},
|
|
127
|
+
{"num", 9},
|
|
128
|
+
{"num_11", 11},
|
|
129
|
+
{"root", 0},
|
|
130
|
+
{"root_1", 1},
|
|
131
|
+
{"root_5", 5},
|
|
132
|
+
{"term", 4},
|
|
133
|
+
{"ws", 3},
|
|
134
|
+
{"ws_12", 12},
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
index = 0;
|
|
138
|
+
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
|
|
139
|
+
{
|
|
140
|
+
std::string key = it->first;
|
|
141
|
+
uint32_t value = it->second;
|
|
142
|
+
std::pair<std::string, uint32_t> expected_pair = expected[index];
|
|
143
|
+
|
|
144
|
+
// pretty print error message before asserting
|
|
145
|
+
if (expected_pair.first != key || expected_pair.second != value)
|
|
146
|
+
{
|
|
147
|
+
fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
|
|
148
|
+
fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
|
|
149
|
+
fprintf(stderr, "expected_pair != actual_pair\n");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
assert(expected_pair.first == key && expected_pair.second == value);
|
|
153
|
+
|
|
154
|
+
index++;
|
|
155
|
+
}
|
|
156
|
+
expected_rules = {
|
|
157
|
+
{LLAMA_GRETYPE_RULE_REF, 5},
|
|
158
|
+
{LLAMA_GRETYPE_END, 0},
|
|
159
|
+
{LLAMA_GRETYPE_RULE_REF, 2},
|
|
160
|
+
{LLAMA_GRETYPE_CHAR, 61},
|
|
161
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
162
|
+
{LLAMA_GRETYPE_RULE_REF, 4},
|
|
163
|
+
{LLAMA_GRETYPE_CHAR, 10},
|
|
164
|
+
{LLAMA_GRETYPE_END, 0},
|
|
165
|
+
{LLAMA_GRETYPE_RULE_REF, 4},
|
|
166
|
+
{LLAMA_GRETYPE_RULE_REF, 7},
|
|
167
|
+
{LLAMA_GRETYPE_END, 0},
|
|
168
|
+
{LLAMA_GRETYPE_RULE_REF, 12},
|
|
169
|
+
{LLAMA_GRETYPE_END, 0},
|
|
170
|
+
{LLAMA_GRETYPE_RULE_REF, 8},
|
|
171
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
172
|
+
{LLAMA_GRETYPE_RULE_REF, 9},
|
|
173
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
174
|
+
{LLAMA_GRETYPE_CHAR, 40},
|
|
175
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
176
|
+
{LLAMA_GRETYPE_RULE_REF, 2},
|
|
177
|
+
{LLAMA_GRETYPE_CHAR, 41},
|
|
178
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
179
|
+
{LLAMA_GRETYPE_END, 0},
|
|
180
|
+
{LLAMA_GRETYPE_RULE_REF, 1},
|
|
181
|
+
{LLAMA_GRETYPE_RULE_REF, 5},
|
|
182
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
183
|
+
{LLAMA_GRETYPE_RULE_REF, 1},
|
|
184
|
+
{LLAMA_GRETYPE_END, 0},
|
|
185
|
+
{LLAMA_GRETYPE_CHAR, 45},
|
|
186
|
+
{LLAMA_GRETYPE_CHAR_ALT, 43},
|
|
187
|
+
{LLAMA_GRETYPE_CHAR_ALT, 42},
|
|
188
|
+
{LLAMA_GRETYPE_CHAR_ALT, 47},
|
|
189
|
+
{LLAMA_GRETYPE_RULE_REF, 4},
|
|
190
|
+
{LLAMA_GRETYPE_END, 0},
|
|
191
|
+
{LLAMA_GRETYPE_RULE_REF, 6},
|
|
192
|
+
{LLAMA_GRETYPE_RULE_REF, 7},
|
|
193
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
194
|
+
{LLAMA_GRETYPE_END, 0},
|
|
195
|
+
{LLAMA_GRETYPE_CHAR, 97},
|
|
196
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
|
|
197
|
+
{LLAMA_GRETYPE_RULE_REF, 10},
|
|
198
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
199
|
+
{LLAMA_GRETYPE_END, 0},
|
|
200
|
+
{LLAMA_GRETYPE_RULE_REF, 11},
|
|
201
|
+
{LLAMA_GRETYPE_RULE_REF, 3},
|
|
202
|
+
{LLAMA_GRETYPE_END, 0},
|
|
203
|
+
{LLAMA_GRETYPE_CHAR, 97},
|
|
204
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
|
|
205
|
+
{LLAMA_GRETYPE_CHAR_ALT, 48},
|
|
206
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
|
207
|
+
{LLAMA_GRETYPE_CHAR_ALT, 95},
|
|
208
|
+
{LLAMA_GRETYPE_RULE_REF, 10},
|
|
209
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
210
|
+
{LLAMA_GRETYPE_END, 0},
|
|
211
|
+
{LLAMA_GRETYPE_CHAR, 48},
|
|
212
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
|
213
|
+
{LLAMA_GRETYPE_RULE_REF, 11},
|
|
214
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
215
|
+
{LLAMA_GRETYPE_CHAR, 48},
|
|
216
|
+
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
|
217
|
+
{LLAMA_GRETYPE_END, 0},
|
|
218
|
+
{LLAMA_GRETYPE_CHAR, 32},
|
|
219
|
+
{LLAMA_GRETYPE_CHAR_ALT, 9},
|
|
220
|
+
{LLAMA_GRETYPE_CHAR_ALT, 10},
|
|
221
|
+
{LLAMA_GRETYPE_RULE_REF, 12},
|
|
222
|
+
{LLAMA_GRETYPE_ALT, 0},
|
|
223
|
+
{LLAMA_GRETYPE_END, 0},
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
index = 0;
|
|
227
|
+
for (auto rule : parsed_grammar.rules)
|
|
228
|
+
{
|
|
229
|
+
// compare rule to expected rule
|
|
230
|
+
for (uint32_t i = 0; i < rule.size(); i++)
|
|
231
|
+
{
|
|
232
|
+
llama_grammar_element element = rule[i];
|
|
233
|
+
llama_grammar_element expected_element = expected_rules[index];
|
|
234
|
+
|
|
235
|
+
// pretty print error message before asserting
|
|
236
|
+
if (expected_element.type != element.type || expected_element.value != element.value)
|
|
237
|
+
{
|
|
238
|
+
fprintf(stderr, "index: %u\n", index);
|
|
239
|
+
fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
|
|
240
|
+
fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
|
|
241
|
+
fprintf(stderr, "expected_element != actual_element\n");
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
assert(expected_element.type == element.type && expected_element.value == element.value);
|
|
245
|
+
index++;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return 0;
|
|
250
|
+
}
|