@fugood/llama.node 0.0.1-alpha.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/CMakeLists.txt +42 -7
  2. package/README.md +10 -0
  3. package/bin/darwin/arm64/default.metallib +0 -0
  4. package/bin/darwin/arm64/llama-node.node +0 -0
  5. package/bin/darwin/x64/default.metallib +0 -0
  6. package/bin/darwin/x64/llama-node.node +0 -0
  7. package/bin/linux/arm64/llama-node.node +0 -0
  8. package/bin/linux/x64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  10. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  11. package/lib/binding.js +1 -1
  12. package/lib/binding.ts +16 -2
  13. package/lib/index.ts +2 -2
  14. package/package.json +15 -3
  15. package/src/DetokenizeWorker.cpp +22 -0
  16. package/src/DetokenizeWorker.h +19 -0
  17. package/src/EmbeddingWorker.cpp +46 -0
  18. package/src/EmbeddingWorker.h +23 -0
  19. package/src/LlamaCompletionWorker.cpp +5 -1
  20. package/src/LlamaCompletionWorker.h +4 -0
  21. package/src/LlamaContext.cpp +80 -1
  22. package/src/LlamaContext.h +3 -0
  23. package/src/TokenizeWorker.cpp +26 -0
  24. package/src/TokenizeWorker.h +23 -0
  25. package/src/common.hpp +12 -7
  26. package/src/llama.cpp/CMakeLists.txt +13 -7
  27. package/src/llama.cpp/common/common.cpp +221 -173
  28. package/src/llama.cpp/common/common.h +19 -8
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/log.h +2 -2
  31. package/src/llama.cpp/common/sampling.cpp +17 -1
  32. package/src/llama.cpp/common/sampling.h +28 -20
  33. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +17 -11
  34. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +5 -5
  35. package/src/llama.cpp/examples/finetune/finetune.cpp +1 -1
  36. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -4
  37. package/src/llama.cpp/examples/imatrix/imatrix.cpp +72 -39
  38. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -3
  39. package/src/llama.cpp/examples/llava/clip.cpp +74 -23
  40. package/src/llama.cpp/examples/llava/llava-cli.cpp +37 -28
  41. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -1
  42. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -1
  43. package/src/llama.cpp/examples/main/main.cpp +10 -8
  44. package/src/llama.cpp/examples/perplexity/perplexity.cpp +175 -55
  45. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/quantize/quantize.cpp +74 -47
  47. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  48. package/src/llama.cpp/examples/server/server.cpp +97 -86
  49. package/src/llama.cpp/examples/server/utils.hpp +17 -15
  50. package/src/llama.cpp/ggml-backend.c +7 -5
  51. package/src/llama.cpp/ggml-impl.h +339 -4
  52. package/src/llama.cpp/ggml-kompute.cpp +7 -0
  53. package/src/llama.cpp/ggml-opencl.cpp +1 -0
  54. package/src/llama.cpp/ggml-quants.c +302 -293
  55. package/src/llama.cpp/ggml-sycl.cpp +28 -16
  56. package/src/llama.cpp/ggml-vulkan-shaders.hpp +46843 -39205
  57. package/src/llama.cpp/ggml-vulkan.cpp +951 -263
  58. package/src/llama.cpp/ggml.c +1469 -116
  59. package/src/llama.cpp/ggml.h +37 -7
  60. package/src/llama.cpp/llama.cpp +969 -432
  61. package/src/llama.cpp/llama.h +46 -14
  62. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +2 -0
  63. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -1
  64. package/src/llama.cpp/requirements/requirements-convert.txt +2 -2
  65. package/src/llama.cpp/requirements.txt +1 -0
  66. package/src/llama.cpp/sgemm.cpp +134 -103
  67. package/src/llama.cpp/sgemm.h +4 -2
  68. package/src/llama.cpp/tests/CMakeLists.txt +96 -36
  69. package/src/llama.cpp/tests/test-backend-ops.cpp +56 -6
  70. package/src/llama.cpp/tests/test-chat-template.cpp +4 -0
  71. package/src/llama.cpp/tests/test-grammar-integration.cpp +225 -136
  72. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -0
  73. package/src/llama.cpp/tests/test-tokenizer-0.cpp +292 -0
  74. package/src/llama.cpp/tests/{test-tokenizer-1-llama.cpp → test-tokenizer-1-spm.cpp} +1 -1
  75. package/src/llama.cpp/unicode-data.cpp +1188 -656
  76. package/src/llama.cpp/unicode-data.h +4 -3
  77. package/src/llama.cpp/unicode.cpp +590 -49
  78. package/src/llama.cpp/unicode.h +6 -3
  79. package/bin/win32/arm64/llama-node.node +0 -0
  80. package/bin/win32/arm64/node.lib +0 -0
  81. package/bin/win32/x64/llama-node.node +0 -0
  82. package/bin/win32/x64/node.lib +0 -0
  83. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -187
  84. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -190
@@ -10,15 +10,10 @@
10
10
  #include "unicode.h"
11
11
  #include <cassert>
12
12
  #include <string>
13
+ #include <vector>
13
14
 
14
- static void test_simple_grammar() {
15
- // Test case for a simple grammar
16
- const std::string grammar_str = R"""(root ::= expr
17
- expr ::= term ("+" term)*
18
- term ::= number
19
- number ::= [0-9]+)""";
20
-
21
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
15
+ static llama_grammar* build_grammar(const std::string & grammar_str) {
16
+ auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
22
17
 
23
18
  // Ensure we parsed correctly
24
19
  assert(!parsed_grammar.rules.empty());
@@ -30,8 +25,10 @@ number ::= [0-9]+)""";
30
25
  llama_grammar* grammar = llama_grammar_init(
31
26
  grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
32
27
 
33
- std::string input = "123+456";
28
+ return grammar;
29
+ }
34
30
 
31
+ static bool match_string(const std::string & input, llama_grammar* grammar) {
35
32
  auto decoded = decode_utf8(input, {});
36
33
 
37
34
  const auto & code_points = decoded.first;
@@ -39,159 +36,67 @@ number ::= [0-9]+)""";
39
36
  for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
40
37
  auto prev_stacks = grammar->stacks;
41
38
  llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
42
- assert(!grammar->stacks.empty());
39
+ if (grammar->stacks.empty()) {
40
+ // no stacks means that the grammar failed to match at this point
41
+ return false;
42
+ }
43
43
  }
44
44
 
45
- bool completed_grammar = false;
46
-
47
45
  for (const auto & stack : grammar->stacks) {
48
46
  if (stack.empty()) {
49
- completed_grammar = true;
50
- break;
47
+ // An empty stack means that the grammar has been completed
48
+ return true;
51
49
  }
52
50
  }
53
51
 
54
- assert(completed_grammar);
55
-
56
- // Clean up allocated memory
57
- llama_grammar_free(grammar);
52
+ return false;
58
53
  }
59
54
 
60
- static void test_complex_grammar() {
61
- // Test case for a more complex grammar, with both failure strings and success strings
62
- const std::string grammar_str = R"""(root ::= expression
63
- expression ::= term ws (("+"|"-") ws term)*
64
- term ::= factor ws (("*"|"/") ws factor)*
65
- factor ::= number | variable | "(" expression ")" | function-call
66
- number ::= [0-9]+
67
- variable ::= [a-zA-Z_][a-zA-Z0-9_]*
68
- function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
69
- ws ::= [ \t\n\r]?)""";
70
-
71
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
72
-
73
- // Ensure we parsed correctly
74
- assert(!parsed_grammar.rules.empty());
55
+ static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
56
+ fprintf(stderr, "⚫ Testing %s. Grammar: %s\n", test_desc.c_str(), grammar_str.c_str());
57
+ fflush(stderr);
75
58
 
76
- // Ensure we have a root node
77
- assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
78
-
79
- std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
80
- llama_grammar* grammar = llama_grammar_init(
81
- grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
59
+ auto grammar = build_grammar(grammar_str);
82
60
 
83
61
  // Save the original grammar stacks so that we can reset after every new string we want to test
84
62
  auto original_stacks = grammar->stacks;
85
63
 
86
- // Test a few strings
87
- std::vector<std::string> test_strings_pass = {
88
- "42",
89
- "1*2*3*4*5",
90
- "x",
91
- "x+10",
92
- "x1+y2",
93
- "(a+b)*(c-d)",
94
- "func()",
95
- "func(x,y+2)",
96
- "a*(b+c)-d/e",
97
- "f(g(x),h(y,z))",
98
- "x + 10",
99
- "x1 + y2",
100
- "(a + b) * (c - d)",
101
- "func()",
102
- "func(x, y + 2)",
103
- "a * (b + c) - d / e",
104
- "f(g(x), h(y, z))",
105
- "123+456",
106
- "123*456*789-123/456+789*123",
107
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
108
- };
109
-
110
- std::vector<std::string> test_strings_fail = {
111
- "+",
112
- "/ 3x",
113
- "x + + y",
114
- "a * / b",
115
- "func(,)",
116
- "func(x y)",
117
- "(a + b",
118
- "x + y)",
119
- "a + b * (c - d",
120
- "42 +",
121
- "x +",
122
- "x + 10 +",
123
- "(a + b) * (c - d",
124
- "func(",
125
- "func(x, y + 2",
126
- "a * (b + c) - d /",
127
- "f(g(x), h(y, z)",
128
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
129
- };
64
+ fprintf(stderr, " 🔵 Valid strings:\n");
130
65
 
131
66
  // Passing strings
132
- for (const auto & test_string : test_strings_pass) {
133
- auto decoded = decode_utf8(test_string, {});
134
-
135
- const auto & code_points = decoded.first;
136
-
137
- int pos = 0;
138
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
139
- ++pos;
140
- auto prev_stacks = grammar->stacks;
141
- llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
142
-
143
- // Expect that each code point will not cause the grammar to fail
144
- if (grammar->stacks.empty()) {
145
- fprintf(stdout, "Error at position %d\n", pos);
146
- fprintf(stderr, "Unexpected character '%s'\n", unicode_cpt_to_utf8(*it).c_str());
147
- fprintf(stderr, "Input string is %s:\n", test_string.c_str());
148
- }
149
- assert(!grammar->stacks.empty());
150
- }
67
+ for (const auto & test_string : passing_strings) {
68
+ fprintf(stderr, " \"%s\" ", test_string.c_str());
69
+ fflush(stderr);
151
70
 
152
- bool completed_grammar = false;
71
+ bool matched = match_string(test_string, grammar);
153
72
 
154
- for (const auto & stack : grammar->stacks) {
155
- if (stack.empty()) {
156
- completed_grammar = true;
157
- break;
158
- }
73
+ if (!matched) {
74
+ fprintf(stderr, "❌ (failed to match)\n");
75
+ } else {
76
+ fprintf(stdout, "✅︎\n");
159
77
  }
160
78
 
161
- assert(completed_grammar);
79
+ assert(matched);
162
80
 
163
81
  // Reset the grammar stacks
164
82
  grammar->stacks = original_stacks;
165
83
  }
166
84
 
85
+ fprintf(stderr, " 🟠 Invalid strings:\n");
86
+
167
87
  // Failing strings
168
- for (const auto & test_string : test_strings_fail) {
169
- auto decoded = decode_utf8(test_string, {});
170
-
171
- const auto & code_points = decoded.first;
172
- bool parse_failed = false;
173
-
174
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
175
- auto prev_stacks = grammar->stacks;
176
- llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
177
- if (grammar->stacks.empty()) {
178
- parse_failed = true;
179
- break;
180
- }
181
- assert(!grammar->stacks.empty());
182
- }
88
+ for (const auto & test_string : failing_strings) {
89
+ fprintf(stderr, " \"%s\" ", test_string.c_str());
90
+ fflush(stderr);
183
91
 
184
- bool completed_grammar = false;
92
+ bool matched = match_string(test_string, grammar);
185
93
 
186
- for (const auto & stack : grammar->stacks) {
187
- if (stack.empty()) {
188
- completed_grammar = true;
189
- break;
190
- }
94
+ if (matched) {
95
+ fprintf(stderr, "❌ (incorrectly matched)\n");
96
+ } else {
97
+ fprintf(stdout, "✅︎\n");
191
98
  }
192
-
193
- // Ensure that the grammar is not completed, or that each string failed to match as-expected
194
- assert((!completed_grammar) || parse_failed);
99
+ assert(!matched);
195
100
 
196
101
  // Reset the grammar stacks
197
102
  grammar->stacks = original_stacks;
@@ -201,7 +106,183 @@ ws ::= [ \t\n\r]?)""";
201
106
  llama_grammar_free(grammar);
202
107
  }
203
108
 
109
+ static void test_simple_grammar() {
110
+ // Test case for a simple grammar
111
+ test_grammar(
112
+ "simple grammar",
113
+ R"""(
114
+ root ::= expr
115
+ expr ::= term ("+" term)*
116
+ term ::= number
117
+ number ::= [0-9]+)""",
118
+ // Passing strings
119
+ {
120
+ "42",
121
+ "1+2+3+4+5",
122
+ "123+456",
123
+ },
124
+ // Failing strings
125
+ {
126
+ "+",
127
+ "/ 3",
128
+ "1+2+3+4+5+",
129
+ "12a45",
130
+ }
131
+ );
132
+ }
133
+
134
+ static void test_complex_grammar() {
135
+ // Test case for a more complex grammar, with both failure strings and success strings
136
+ test_grammar(
137
+ "medium complexity grammar",
138
+ // Grammar
139
+ R"""(
140
+ root ::= expression
141
+ expression ::= term ws (("+"|"-") ws term)*
142
+ term ::= factor ws (("*"|"/") ws factor)*
143
+ factor ::= number | variable | "(" expression ")" | function-call
144
+ number ::= [0-9]+
145
+ variable ::= [a-zA-Z_][a-zA-Z0-9_]*
146
+ function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
147
+ ws ::= [ \t\n\r]?)""",
148
+ // Passing strings
149
+ {
150
+ "42",
151
+ "1*2*3*4*5",
152
+ "x",
153
+ "x+10",
154
+ "x1+y2",
155
+ "(a+b)*(c-d)",
156
+ "func()",
157
+ "func(x,y+2)",
158
+ "a*(b+c)-d/e",
159
+ "f(g(x),h(y,z))",
160
+ "x + 10",
161
+ "x1 + y2",
162
+ "(a + b) * (c - d)",
163
+ "func()",
164
+ "func(x, y + 2)",
165
+ "a * (b + c) - d / e",
166
+ "f(g(x), h(y, z))",
167
+ "123+456",
168
+ "123*456*789-123/456+789*123",
169
+ "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
170
+ },
171
+ // Failing strings
172
+ {
173
+ "+",
174
+ "/ 3x",
175
+ "x + + y",
176
+ "a * / b",
177
+ "func(,)",
178
+ "func(x y)",
179
+ "(a + b",
180
+ "x + y)",
181
+ "a + b * (c - d",
182
+ "42 +",
183
+ "x +",
184
+ "x + 10 +",
185
+ "(a + b) * (c - d",
186
+ "func(",
187
+ "func(x, y + 2",
188
+ "a * (b + c) - d /",
189
+ "f(g(x), h(y, z)",
190
+ "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
191
+ }
192
+ );
193
+ }
194
+
195
+ static void test_quantifiers() {
196
+ // A collection of tests to exercise * + and ? quantifiers
197
+
198
+ test_grammar(
199
+ "* quantifier",
200
+ // Grammar
201
+ R"""(root ::= "a"*)""",
202
+ // Passing strings
203
+ {
204
+ "",
205
+ "a",
206
+ "aaaaa",
207
+ "aaaaaaaaaaaaaaaaaa",
208
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
209
+ },
210
+ // Failing strings
211
+ {
212
+ "b",
213
+ "ab",
214
+ "aab",
215
+ "ba",
216
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
217
+ }
218
+ );
219
+ test_grammar(
220
+ "+ quantifier",
221
+ // Grammar
222
+ R"""(root ::= "a"+)""",
223
+ // Passing strings
224
+ {
225
+ "a",
226
+ "aaaaa",
227
+ "aaaaaaaaaaaaaaaaaa",
228
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
229
+ },
230
+ // Failing strings
231
+ {
232
+ "",
233
+ "b",
234
+ "ab",
235
+ "aab",
236
+ "ba",
237
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
238
+ }
239
+ );
240
+ test_grammar(
241
+ "? quantifier",
242
+ // Grammar
243
+ R"""(root ::= "a"?)""",
244
+ // Passing strings
245
+ {
246
+ "",
247
+ "a"
248
+ },
249
+ // Failing strings
250
+ {
251
+ "b",
252
+ "ab",
253
+ "aa",
254
+ "ba",
255
+ }
256
+ );
257
+ test_grammar(
258
+ "mixed quantifiers",
259
+ // Grammar
260
+ R"""(
261
+ root ::= cons+ vowel* cons? (vowel cons)*
262
+ vowel ::= [aeiouy]
263
+ cons ::= [bcdfghjklmnpqrstvwxyz]
264
+ )""",
265
+ // Passing strings
266
+ {
267
+ "yes",
268
+ "no",
269
+ "noyes",
270
+ "crwth",
271
+ "four",
272
+ "bryyyy",
273
+ },
274
+ // Failing strings
275
+ {
276
+ "yess",
277
+ "yesno",
278
+ "forty",
279
+ "catyyy",
280
+ }
281
+ );
282
+ }
283
+
204
284
  static void test_failure_missing_root() {
285
+ fprintf(stderr, "⚫ Testing missing root node:\n");
205
286
  // Test case for a grammar that is missing a root rule
206
287
  const std::string grammar_str = R"""(rot ::= expr
207
288
  expr ::= term ("+" term)*
@@ -215,29 +296,37 @@ number ::= [0-9]+)""";
215
296
 
216
297
  // Ensure we do NOT have a root node
217
298
  assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
299
+ fprintf(stderr, " ✅︎ Passed\n");
218
300
  }
219
301
 
220
302
  static void test_failure_missing_reference() {
303
+ fprintf(stderr, "⚫ Testing missing reference node:\n");
304
+
221
305
  // Test case for a grammar that is missing a referenced rule
222
- const std::string grammar_str = R"""(root ::= expr
306
+ const std::string grammar_str =
307
+ R"""(root ::= expr
223
308
  expr ::= term ("+" term)*
224
309
  term ::= numero
225
310
  number ::= [0-9]+)""";
226
311
 
227
- fprintf(stderr, "Expected error: ");
312
+ fprintf(stderr, " Expected error: ");
228
313
 
229
314
  grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
230
315
 
231
316
  // Ensure we did NOT parsed correctly
232
317
  assert(parsed_grammar.rules.empty());
233
318
 
234
- fprintf(stderr, "End of expected error. Test successful.\n");
319
+ fprintf(stderr, " End of expected error.\n");
320
+ fprintf(stderr, " ✅︎ Passed\n");
235
321
  }
236
322
 
237
323
  int main() {
324
+ fprintf(stdout, "Running grammar integration tests...\n");
238
325
  test_simple_grammar();
239
326
  test_complex_grammar();
327
+ test_quantifiers();
240
328
  test_failure_missing_root();
241
329
  test_failure_missing_reference();
330
+ fprintf(stdout, "All tests passed.\n");
242
331
  return 0;
243
332
  }
@@ -2,6 +2,7 @@
2
2
  #undef NDEBUG
3
3
  #endif
4
4
 
5
+ #include <cassert>
5
6
  #include <fstream>
6
7
  #include <sstream>
7
8
  #include <regex>