@fugood/llama.node 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CMakeLists.txt +9 -0
  2. package/README.md +1 -1
  3. package/bin/darwin/arm64/default.metallib +0 -0
  4. package/bin/darwin/arm64/llama-node.node +0 -0
  5. package/bin/darwin/x64/default.metallib +0 -0
  6. package/bin/darwin/x64/llama-node.node +0 -0
  7. package/bin/linux/arm64/llama-node.node +0 -0
  8. package/bin/linux/x64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  10. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  11. package/bin/win32/arm64/llama-node.node +0 -0
  12. package/bin/win32/arm64/node.lib +0 -0
  13. package/bin/win32/x64/llama-node.node +0 -0
  14. package/bin/win32/x64/node.lib +0 -0
  15. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/arm64/node.lib +0 -0
  17. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  18. package/bin/win32-vulkan/x64/node.lib +0 -0
  19. package/lib/binding.ts +1 -1
  20. package/package.json +2 -1
  21. package/patches/llama.patch +22 -0
  22. package/src/LlamaContext.cpp +2 -2
  23. package/src/TokenizeWorker.cpp +1 -1
  24. package/src/llama.cpp/CMakeLists.txt +82 -54
  25. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  26. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +6 -0
  27. package/src/llama.cpp/common/common.cpp +748 -754
  28. package/src/llama.cpp/common/common.h +49 -41
  29. package/src/llama.cpp/common/grammar-parser.cpp +10 -1
  30. package/src/llama.cpp/common/json-schema-to-grammar.cpp +6 -6
  31. package/src/llama.cpp/common/log.h +5 -5
  32. package/src/llama.cpp/common/sampling.cpp +92 -10
  33. package/src/llama.cpp/common/sampling.h +6 -1
  34. package/src/llama.cpp/common/train.cpp +2 -2
  35. package/src/llama.cpp/examples/CMakeLists.txt +3 -0
  36. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  37. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  38. package/src/llama.cpp/examples/embedding/embedding.cpp +13 -4
  39. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +2 -2
  40. package/src/llama.cpp/examples/finetune/finetune.cpp +4 -3
  41. package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -2
  42. package/src/llama.cpp/examples/infill/infill.cpp +8 -8
  43. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +57 -8
  44. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +55 -0
  45. package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/CMakeLists.txt +7 -8
  46. package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/llama-android.cpp +14 -14
  47. package/src/llama.cpp/examples/llava/clip.h +1 -1
  48. package/src/llama.cpp/examples/llava/llava-cli.cpp +27 -7
  49. package/src/llama.cpp/examples/llava/llava.cpp +0 -15
  50. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  51. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  52. package/src/llama.cpp/examples/main/main.cpp +29 -17
  53. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  54. package/src/llama.cpp/examples/perplexity/perplexity.cpp +9 -9
  55. package/src/llama.cpp/examples/quantize/quantize.cpp +2 -2
  56. package/src/llama.cpp/examples/retrieval/retrieval.cpp +2 -2
  57. package/src/llama.cpp/examples/rpc/CMakeLists.txt +2 -0
  58. package/src/llama.cpp/examples/rpc/rpc-server.cpp +134 -0
  59. package/src/llama.cpp/examples/server/server.cpp +33 -25
  60. package/src/llama.cpp/examples/server/utils.hpp +1 -1
  61. package/src/llama.cpp/examples/tokenize/tokenize.cpp +359 -9
  62. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +4 -3
  63. package/src/llama.cpp/ggml-backend.c +2 -3
  64. package/src/llama.cpp/ggml-common.h +0 -54
  65. package/src/llama.cpp/ggml-cuda.h +1 -0
  66. package/src/llama.cpp/ggml-impl.h +51 -0
  67. package/src/llama.cpp/ggml-kompute.cpp +13 -3
  68. package/src/llama.cpp/ggml-opencl.cpp +4 -1
  69. package/src/llama.cpp/ggml-quants.c +3715 -2050
  70. package/src/llama.cpp/ggml-rpc.cpp +1155 -0
  71. package/src/llama.cpp/ggml-rpc.h +24 -0
  72. package/src/llama.cpp/ggml-sycl.cpp +119 -673
  73. package/src/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
  74. package/src/llama.cpp/ggml-vulkan.cpp +203 -224
  75. package/src/llama.cpp/ggml.c +1208 -1483
  76. package/src/llama.cpp/ggml.h +71 -46
  77. package/src/llama.cpp/llama.cpp +1374 -938
  78. package/src/llama.cpp/llama.h +22 -6
  79. package/src/llama.cpp/requirements.txt +0 -2
  80. package/src/llama.cpp/tests/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/tests/test-backend-ops.cpp +120 -57
  82. package/src/llama.cpp/tests/test-chat-template.cpp +16 -4
  83. package/src/llama.cpp/tests/test-grad0.cpp +43 -83
  84. package/src/llama.cpp/tests/test-grammar-integration.cpp +46 -0
  85. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +27 -3
  86. package/src/llama.cpp/unicode-data.cpp +6969 -2169
  87. package/src/llama.cpp/unicode-data.h +15 -12
  88. package/src/llama.cpp/unicode.cpp +89 -111
  89. package/src/llama.cpp/unicode.h +44 -12
  90. package/src/llama.cpp/build.zig +0 -172
  91. package/src/llama.cpp/ggml-mpi.c +0 -216
  92. package/src/llama.cpp/ggml-mpi.h +0 -39
  93. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -2
  94. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -2
@@ -28,6 +28,19 @@ static llama_grammar* build_grammar(const std::string & grammar_str) {
28
28
  return grammar;
29
29
  }
30
30
 
31
+ static bool test_build_grammar_fails(const std::string & grammar_str) {
32
+ fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
33
+ bool grammar_fails = false;
34
+ try {
35
+ build_grammar(grammar_str);
36
+ fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
37
+ } catch (const std::exception & err) {
38
+ grammar_fails = true;
39
+ fprintf(stdout, " ✅︎\n");
40
+ }
41
+ return grammar_fails;
42
+ }
43
+
31
44
  static bool match_string(const std::string & input, llama_grammar* grammar) {
32
45
  auto decoded = decode_utf8(input, {});
33
46
 
@@ -320,6 +333,38 @@ number ::= [0-9]+)""";
320
333
  fprintf(stderr, " ✅︎ Passed\n");
321
334
  }
322
335
 
336
+ static void test_failure_left_recursion() {
337
+ fprintf(stderr, "⚫ Testing left recursion detection:\n");
338
+
339
+ // Test simple left recursion detection
340
+ const std::string simple_str = R"""(root ::= "a" | root "a")""";
341
+ assert(test_build_grammar_fails(simple_str));
342
+
343
+ // Test more complicated left recursion detection
344
+ const std::string medium_str = R"""(
345
+ root ::= asdf
346
+ asdf ::= "a" | asdf "a"
347
+ )""";
348
+ assert(test_build_grammar_fails(medium_str));
349
+
350
+ // Test even more complicated left recursion detection
351
+ const std::string hard_str = R"""(
352
+ root ::= asdf
353
+ asdf ::= "a" | foo "b"
354
+ foo ::= "c" | asdf "d" | "e")""";
355
+ assert(test_build_grammar_fails(hard_str));
356
+
357
+ // Test yet even more complicated left recursion detection
358
+ const std::string hardest_str = R"""(
359
+ root ::= asdf
360
+ asdf ::= "a" | foo "b"
361
+ foo ::= "c" | empty asdf "d" | "e"
362
+ empty ::= "blah" | )""";
363
+ assert(test_build_grammar_fails(hardest_str));
364
+
365
+ fprintf(stderr, " ✅︎ Passed\n");
366
+ }
367
+
323
368
  int main() {
324
369
  fprintf(stdout, "Running grammar integration tests...\n");
325
370
  test_simple_grammar();
@@ -327,6 +372,7 @@ int main() {
327
372
  test_quantifiers();
328
373
  test_failure_missing_root();
329
374
  test_failure_missing_reference();
375
+ test_failure_left_recursion();
330
376
  fprintf(stdout, "All tests passed.\n");
331
377
  return 0;
332
378
  }
@@ -13,15 +13,27 @@
13
13
  #include <vector>
14
14
 
15
15
  int main(int argc, char **argv) {
16
- if (argc < 2) {
17
- fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
16
+ if (argc < 2 || argc > 3) {
17
+ fprintf(stderr, "Usage: %s <vocab-file> [--ignore-merges]\n", argv[0]);
18
18
  return 1;
19
19
  }
20
20
 
21
21
  const std::string fname = argv[1];
22
+ bool ignore_merges = false;
23
+ if (argc == 3) {
24
+ if (std::strcmp(argv[2], "--ignore-merges") != 0) {
25
+ fprintf(stderr, "Usage: %s <vocab-file> [--ignore-merges]\n", argv[0]);
26
+ return 1;
27
+ }
28
+ ignore_merges = true;
29
+ }
22
30
 
23
31
  fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
24
32
 
33
+ if (ignore_merges) {
34
+ fprintf(stderr, "%s : ignoring merges for tokens inside vocab\n", __func__);
35
+ }
36
+
25
37
  llama_model * model;
26
38
  llama_context * ctx;
27
39
 
@@ -65,7 +77,19 @@ int main(int argc, char **argv) {
65
77
  std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
66
78
  try {
67
79
  auto cps = unicode_cpts_from_utf8(str);
68
- std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
80
+ std::vector<llama_token> tokens = llama_tokenize(ctx, str, false, true);
81
+ if (ignore_merges && tokens.size() > 1) {
82
+ fprintf(stderr,
83
+ "%s : error: token %d detokenizes to '%s'(%zu) but "
84
+ "tokenization of this to multiple tokens: [",
85
+ __func__, i, str.c_str(), str.length());
86
+ fprintf(stderr, "%d", tokens[0]);
87
+ for (size_t i = 1; i < tokens.size(); i++) {
88
+ fprintf(stderr, ", %d", tokens[i]);
89
+ }
90
+ fprintf(stderr, "]\n");
91
+ return 2;
92
+ }
69
93
  std::string check = llama_detokenize_bpe(ctx, tokens);
70
94
  if (check != str) {
71
95
  fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",