@fugood/llama.node 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +9 -0
- package/README.md +1 -1
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +2 -1
- package/patches/llama.patch +22 -0
- package/src/LlamaContext.cpp +2 -2
- package/src/TokenizeWorker.cpp +1 -1
- package/src/llama.cpp/CMakeLists.txt +82 -54
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +6 -0
- package/src/llama.cpp/common/common.cpp +748 -754
- package/src/llama.cpp/common/common.h +49 -41
- package/src/llama.cpp/common/grammar-parser.cpp +10 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +6 -6
- package/src/llama.cpp/common/log.h +5 -5
- package/src/llama.cpp/common/sampling.cpp +92 -10
- package/src/llama.cpp/common/sampling.h +6 -1
- package/src/llama.cpp/common/train.cpp +2 -2
- package/src/llama.cpp/examples/CMakeLists.txt +3 -0
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +13 -4
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +2 -2
- package/src/llama.cpp/examples/finetune/finetune.cpp +4 -3
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -2
- package/src/llama.cpp/examples/infill/infill.cpp +8 -8
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +57 -8
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +55 -0
- package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/CMakeLists.txt +7 -8
- package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/llama-android.cpp +14 -14
- package/src/llama.cpp/examples/llava/clip.h +1 -1
- package/src/llama.cpp/examples/llava/llava-cli.cpp +27 -7
- package/src/llama.cpp/examples/llava/llava.cpp +0 -15
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +29 -17
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +9 -9
- package/src/llama.cpp/examples/quantize/quantize.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +2 -2
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +2 -0
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +134 -0
- package/src/llama.cpp/examples/server/server.cpp +33 -25
- package/src/llama.cpp/examples/server/utils.hpp +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +359 -9
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +4 -3
- package/src/llama.cpp/ggml-backend.c +2 -3
- package/src/llama.cpp/ggml-common.h +0 -54
- package/src/llama.cpp/ggml-cuda.h +1 -0
- package/src/llama.cpp/ggml-impl.h +51 -0
- package/src/llama.cpp/ggml-kompute.cpp +13 -3
- package/src/llama.cpp/ggml-opencl.cpp +4 -1
- package/src/llama.cpp/ggml-quants.c +3715 -2050
- package/src/llama.cpp/ggml-rpc.cpp +1155 -0
- package/src/llama.cpp/ggml-rpc.h +24 -0
- package/src/llama.cpp/ggml-sycl.cpp +119 -673
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- package/src/llama.cpp/ggml-vulkan.cpp +203 -224
- package/src/llama.cpp/ggml.c +1208 -1483
- package/src/llama.cpp/ggml.h +71 -46
- package/src/llama.cpp/llama.cpp +1374 -938
- package/src/llama.cpp/llama.h +22 -6
- package/src/llama.cpp/requirements.txt +0 -2
- package/src/llama.cpp/tests/CMakeLists.txt +1 -1
- package/src/llama.cpp/tests/test-backend-ops.cpp +120 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +16 -4
- package/src/llama.cpp/tests/test-grad0.cpp +43 -83
- package/src/llama.cpp/tests/test-grammar-integration.cpp +46 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +27 -3
- package/src/llama.cpp/unicode-data.cpp +6969 -2169
- package/src/llama.cpp/unicode-data.h +15 -12
- package/src/llama.cpp/unicode.cpp +89 -111
- package/src/llama.cpp/unicode.h +44 -12
- package/src/llama.cpp/build.zig +0 -172
- package/src/llama.cpp/ggml-mpi.c +0 -216
- package/src/llama.cpp/ggml-mpi.h +0 -39
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -2
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -2
|
@@ -28,6 +28,19 @@ static llama_grammar* build_grammar(const std::string & grammar_str) {
|
|
|
28
28
|
return grammar;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
static bool test_build_grammar_fails(const std::string & grammar_str) {
|
|
32
|
+
fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
|
|
33
|
+
bool grammar_fails = false;
|
|
34
|
+
try {
|
|
35
|
+
build_grammar(grammar_str);
|
|
36
|
+
fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
|
|
37
|
+
} catch (const std::exception & err) {
|
|
38
|
+
grammar_fails = true;
|
|
39
|
+
fprintf(stdout, " ✅︎\n");
|
|
40
|
+
}
|
|
41
|
+
return grammar_fails;
|
|
42
|
+
}
|
|
43
|
+
|
|
31
44
|
static bool match_string(const std::string & input, llama_grammar* grammar) {
|
|
32
45
|
auto decoded = decode_utf8(input, {});
|
|
33
46
|
|
|
@@ -320,6 +333,38 @@ number ::= [0-9]+)""";
|
|
|
320
333
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
321
334
|
}
|
|
322
335
|
|
|
336
|
+
static void test_failure_left_recursion() {
|
|
337
|
+
fprintf(stderr, "⚫ Testing left recursion detection:\n");
|
|
338
|
+
|
|
339
|
+
// Test simple left recursion detection
|
|
340
|
+
const std::string simple_str = R"""(root ::= "a" | root "a")""";
|
|
341
|
+
assert(test_build_grammar_fails(simple_str));
|
|
342
|
+
|
|
343
|
+
// Test more complicated left recursion detection
|
|
344
|
+
const std::string medium_str = R"""(
|
|
345
|
+
root ::= asdf
|
|
346
|
+
asdf ::= "a" | asdf "a"
|
|
347
|
+
)""";
|
|
348
|
+
assert(test_build_grammar_fails(medium_str));
|
|
349
|
+
|
|
350
|
+
// Test even more complicated left recursion detection
|
|
351
|
+
const std::string hard_str = R"""(
|
|
352
|
+
root ::= asdf
|
|
353
|
+
asdf ::= "a" | foo "b"
|
|
354
|
+
foo ::= "c" | asdf "d" | "e")""";
|
|
355
|
+
assert(test_build_grammar_fails(hard_str));
|
|
356
|
+
|
|
357
|
+
// Test yet even more complicated left recursion detection
|
|
358
|
+
const std::string hardest_str = R"""(
|
|
359
|
+
root ::= asdf
|
|
360
|
+
asdf ::= "a" | foo "b"
|
|
361
|
+
foo ::= "c" | empty asdf "d" | "e"
|
|
362
|
+
empty ::= "blah" | )""";
|
|
363
|
+
assert(test_build_grammar_fails(hardest_str));
|
|
364
|
+
|
|
365
|
+
fprintf(stderr, " ✅︎ Passed\n");
|
|
366
|
+
}
|
|
367
|
+
|
|
323
368
|
int main() {
|
|
324
369
|
fprintf(stdout, "Running grammar integration tests...\n");
|
|
325
370
|
test_simple_grammar();
|
|
@@ -327,6 +372,7 @@ int main() {
|
|
|
327
372
|
test_quantifiers();
|
|
328
373
|
test_failure_missing_root();
|
|
329
374
|
test_failure_missing_reference();
|
|
375
|
+
test_failure_left_recursion();
|
|
330
376
|
fprintf(stdout, "All tests passed.\n");
|
|
331
377
|
return 0;
|
|
332
378
|
}
|
|
@@ -13,15 +13,27 @@
|
|
|
13
13
|
#include <vector>
|
|
14
14
|
|
|
15
15
|
int main(int argc, char **argv) {
|
|
16
|
-
if (argc < 2) {
|
|
17
|
-
fprintf(stderr, "Usage: %s <vocab-file
|
|
16
|
+
if (argc < 2 || argc > 3) {
|
|
17
|
+
fprintf(stderr, "Usage: %s <vocab-file> [--ignore-merges]\n", argv[0]);
|
|
18
18
|
return 1;
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
const std::string fname = argv[1];
|
|
22
|
+
bool ignore_merges = false;
|
|
23
|
+
if (argc == 3) {
|
|
24
|
+
if (std::strcmp(argv[2], "--ignore-merges") != 0) {
|
|
25
|
+
fprintf(stderr, "Usage: %s <vocab-file> [--ignore-merges]\n", argv[0]);
|
|
26
|
+
return 1;
|
|
27
|
+
}
|
|
28
|
+
ignore_merges = true;
|
|
29
|
+
}
|
|
22
30
|
|
|
23
31
|
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
24
32
|
|
|
33
|
+
if (ignore_merges) {
|
|
34
|
+
fprintf(stderr, "%s : ignoring merges for tokens inside vocab\n", __func__);
|
|
35
|
+
}
|
|
36
|
+
|
|
25
37
|
llama_model * model;
|
|
26
38
|
llama_context * ctx;
|
|
27
39
|
|
|
@@ -65,7 +77,19 @@ int main(int argc, char **argv) {
|
|
|
65
77
|
std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
|
|
66
78
|
try {
|
|
67
79
|
auto cps = unicode_cpts_from_utf8(str);
|
|
68
|
-
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
|
80
|
+
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false, true);
|
|
81
|
+
if (ignore_merges && tokens.size() > 1) {
|
|
82
|
+
fprintf(stderr,
|
|
83
|
+
"%s : error: token %d detokenizes to '%s'(%zu) but "
|
|
84
|
+
"tokenization of this to multiple tokens: [",
|
|
85
|
+
__func__, i, str.c_str(), str.length());
|
|
86
|
+
fprintf(stderr, "%d", tokens[0]);
|
|
87
|
+
for (size_t i = 1; i < tokens.size(); i++) {
|
|
88
|
+
fprintf(stderr, ", %d", tokens[i]);
|
|
89
|
+
}
|
|
90
|
+
fprintf(stderr, "]\n");
|
|
91
|
+
return 2;
|
|
92
|
+
}
|
|
69
93
|
std::string check = llama_detokenize_bpe(ctx, tokens);
|
|
70
94
|
if (check != str) {
|
|
71
95
|
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
|