@fugood/llama.node 0.0.1-alpha.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CMakeLists.txt +36 -7
  2. package/README.md +9 -0
  3. package/bin/darwin/arm64/default.metallib +0 -0
  4. package/bin/darwin/arm64/llama-node.node +0 -0
  5. package/bin/darwin/x64/default.metallib +0 -0
  6. package/bin/darwin/x64/llama-node.node +0 -0
  7. package/bin/linux/arm64/llama-node.node +0 -0
  8. package/bin/linux/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/lib/binding.js +1 -1
  14. package/lib/binding.ts +5 -2
  15. package/lib/index.ts +2 -2
  16. package/package.json +15 -3
  17. package/src/LlamaCompletionWorker.cpp +5 -1
  18. package/src/LlamaCompletionWorker.h +4 -0
  19. package/src/LlamaContext.cpp +18 -1
  20. package/src/common.hpp +11 -7
  21. package/src/llama.cpp/CMakeLists.txt +13 -7
  22. package/src/llama.cpp/common/common.cpp +221 -173
  23. package/src/llama.cpp/common/common.h +19 -8
  24. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  25. package/src/llama.cpp/common/log.h +2 -2
  26. package/src/llama.cpp/common/sampling.cpp +17 -1
  27. package/src/llama.cpp/common/sampling.h +28 -20
  28. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +17 -11
  29. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +5 -5
  30. package/src/llama.cpp/examples/finetune/finetune.cpp +1 -1
  31. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -4
  32. package/src/llama.cpp/examples/imatrix/imatrix.cpp +72 -39
  33. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -3
  34. package/src/llama.cpp/examples/llava/clip.cpp +74 -23
  35. package/src/llama.cpp/examples/llava/llava-cli.cpp +37 -28
  36. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -1
  37. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -1
  38. package/src/llama.cpp/examples/main/main.cpp +10 -8
  39. package/src/llama.cpp/examples/perplexity/perplexity.cpp +175 -55
  40. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  41. package/src/llama.cpp/examples/quantize/quantize.cpp +74 -47
  42. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  43. package/src/llama.cpp/examples/server/server.cpp +97 -86
  44. package/src/llama.cpp/examples/server/utils.hpp +17 -15
  45. package/src/llama.cpp/ggml-backend.c +7 -5
  46. package/src/llama.cpp/ggml-impl.h +339 -4
  47. package/src/llama.cpp/ggml-kompute.cpp +7 -0
  48. package/src/llama.cpp/ggml-opencl.cpp +1 -0
  49. package/src/llama.cpp/ggml-quants.c +302 -293
  50. package/src/llama.cpp/ggml-sycl.cpp +28 -16
  51. package/src/llama.cpp/ggml-vulkan-shaders.hpp +46843 -39205
  52. package/src/llama.cpp/ggml-vulkan.cpp +951 -263
  53. package/src/llama.cpp/ggml.c +1469 -116
  54. package/src/llama.cpp/ggml.h +37 -7
  55. package/src/llama.cpp/llama.cpp +969 -432
  56. package/src/llama.cpp/llama.h +46 -14
  57. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +2 -0
  58. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -1
  59. package/src/llama.cpp/requirements/requirements-convert.txt +2 -2
  60. package/src/llama.cpp/requirements.txt +1 -0
  61. package/src/llama.cpp/sgemm.cpp +134 -103
  62. package/src/llama.cpp/sgemm.h +4 -2
  63. package/src/llama.cpp/tests/CMakeLists.txt +96 -36
  64. package/src/llama.cpp/tests/test-backend-ops.cpp +56 -6
  65. package/src/llama.cpp/tests/test-chat-template.cpp +4 -0
  66. package/src/llama.cpp/tests/test-grammar-integration.cpp +225 -136
  67. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -0
  68. package/src/llama.cpp/tests/test-tokenizer-0.cpp +292 -0
  69. package/src/llama.cpp/tests/{test-tokenizer-1-llama.cpp → test-tokenizer-1-spm.cpp} +1 -1
  70. package/src/llama.cpp/unicode-data.cpp +1188 -656
  71. package/src/llama.cpp/unicode-data.h +4 -3
  72. package/src/llama.cpp/unicode.cpp +590 -49
  73. package/src/llama.cpp/unicode.h +6 -3
  74. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -187
  75. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -190
@@ -5,9 +5,9 @@
5
5
  #include <vector>
6
6
 
7
7
  #define CODEPOINT_TYPE_UNIDENTIFIED 0
8
- #define CODEPOINT_TYPE_DIGIT 1
8
+ #define CODEPOINT_TYPE_NUMBER 1
9
9
  #define CODEPOINT_TYPE_LETTER 2
10
- #define CODEPOINT_TYPE_WHITESPACE 3
10
+ #define CODEPOINT_TYPE_SEPARATOR 3
11
11
  #define CODEPOINT_TYPE_ACCENT_MARK 4
12
12
  #define CODEPOINT_TYPE_PUNCTUATION 5
13
13
  #define CODEPOINT_TYPE_SYMBOL 6
@@ -21,8 +21,11 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
21
21
  int unicode_cpt_type(uint32_t cp);
22
22
  int unicode_cpt_type(const std::string & utf8);
23
23
 
24
+ bool unicode_cpt_is_whitespace(uint32_t cp);
25
+
24
26
  std::string unicode_byte_to_utf8(uint8_t byte);
25
27
  uint8_t unicode_utf8_to_byte(const std::string & utf8);
26
28
 
27
- // simple tolower that only implements one-to-one mapping, not one-to-many
28
29
  char32_t unicode_tolower(char32_t cp);
30
+
31
+ std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
@@ -1,187 +0,0 @@
1
- #include "llama.h"
2
- #include "common.h"
3
- #include "console.h"
4
-
5
- #include <cstdio>
6
- #include <string>
7
- #include <map>
8
- #include <vector>
9
- #include <fstream>
10
-
11
- // generate using test-tokenizer-0-falcon.py
12
- static const std::map<std::string, std::vector<llama_token>> & k_tests() {
13
- static std::map<std::string, std::vector<llama_token>> _k_tests = {
14
- { "" , { }, },
15
- { " " , { 204, }, },
16
- { " " , { 258, }, },
17
- { " " , { 466, }, },
18
- { "\t" , { 192, }, },
19
- { "\n" , { 193, }, },
20
- { "\t\n" , { 19125, }, },
21
- { "Hello world" , { 9856, 1079, }, },
22
- { " Hello world" , { 23090, 1079, }, },
23
- { "Hello World" , { 9856, 2889, }, },
24
- { " Hello World" , { 23090, 2889, }, },
25
- { " Hello World!" , { 23090, 2889, 12, }, },
26
- { "Hello, world!" , { 9856, 23, 1079, 12, }, },
27
- { " Hello, world!" , { 23090, 23, 1079, 12, }, },
28
- { " this is 🦙.cpp" , { 414, 304, 3346, 111, 231, 25, 29247, }, },
29
- { "w048 7tuijk dsdfhu" , { 98, 55866, 204, 34, 16682, 7149, 36190, 6869, 11481, }, },
30
- { "нещо на Български" , { 150, 133, 6207, 151, 215, 150, 134, 5052, 133, 6279, 5052, 223, 151, 216, 49679, 123, 53110, 47043, 7795, }, },
31
- { "កាន់តែពិសេសអាចខលចេញ" , { 38154, 206, 38154, 126, 38154, 225, 167, 237, 217, 38154, 221, 167, 237, 208, 38154, 228, 38154, 127, 38154, 237, 167, 237, 207, 38154, 237, 38154, 107, 38154, 126, 38154, 211, 38154, 207, 38154, 233, 38154, 211, 167, 237, 207, 38154, 215, }, },
32
- { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 2571, 232, 206, 204, 19, 11003, 20, 8196, 126, 283, 219, 48778, 116, 13392, 204, 19, 51831, 732, 63209, 1741, 7955, 522, 20, 22438, 211, 204, 19, 7927, 53360, 325, 504, 701, 946, 10930, 20, }, },
33
- { "Hello" , { 9856, }, },
34
- { " Hello" , { 23090, }, },
35
- { " Hello" , { 204, 23090, }, },
36
- { " Hello" , { 258, 23090, }, },
37
- { " Hello" , { 466, 23090, }, },
38
- { " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
39
- { "\n =" , { 1212, 40, }, },
40
- { "' era" , { 18, 4932, }, },
41
- };
42
-
43
- return _k_tests;
44
- }
45
-
46
- int main(int argc, char **argv) {
47
- if (argc < 2) {
48
- fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
49
- return 1;
50
- }
51
-
52
- const std::string fname = argv[1];
53
-
54
- std::string fname_text;
55
- if (argc > 2) {
56
- fname_text = argv[2];
57
- }
58
-
59
- fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
60
-
61
- llama_model * model;
62
- llama_context * ctx;
63
-
64
- llama_backend_init();
65
-
66
- // load the vocab
67
- {
68
- auto mparams = llama_model_default_params();
69
-
70
- mparams.vocab_only = true;
71
-
72
- model = llama_load_model_from_file(fname.c_str(), mparams);
73
-
74
- if (model == NULL) {
75
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
76
- return 1;
77
- }
78
-
79
- auto cparams = llama_context_default_params();
80
-
81
- ctx = llama_new_context_with_model(model, cparams);
82
-
83
- if (ctx == NULL) {
84
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
85
- llama_free_model(model);
86
- return 1;
87
- }
88
- }
89
-
90
- if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
91
- fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
92
- llama_free_model(model);
93
- llama_free(ctx);
94
- return 2;
95
- }
96
-
97
- #ifdef _WIN32
98
- // We need this for unicode console support
99
- console::init(false, false);
100
- atexit([]() { console::cleanup(); });
101
- #endif
102
-
103
- bool success = true;
104
-
105
- for (const auto & test_kv : k_tests()) {
106
- const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, false);
107
-
108
- printf("\n");
109
- printf("src: '%s'\n", test_kv.first.c_str());
110
- printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str());
111
- printf("tok: ");
112
- for (const auto & tok : res) {
113
- printf("%d ", tok);
114
- }
115
- printf("\n");
116
-
117
- bool correct = res.size() == test_kv.second.size();
118
-
119
- for (int i = 0; i < (int) res.size() && correct; ++i) {
120
- if (test_kv.second[i] != res[i]) {
121
- correct = false;
122
- }
123
- }
124
-
125
- if (!correct) {
126
- fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
127
- fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
128
- llama_detokenize_bpe(ctx, res).c_str(),
129
- llama_detokenize_bpe(ctx, test_kv.second).c_str());
130
- fprintf(stderr, "%s : expected tokens: ", __func__);
131
- for (const auto & t : test_kv.second) {
132
- fprintf(stderr, "%6d, ", t);
133
- }
134
- fprintf(stderr, "\n");
135
- fprintf(stderr, "%s : got tokens: ", __func__);
136
- for (const auto & t : res) {
137
- fprintf(stderr, "%6d, ", t);
138
- }
139
- fprintf(stderr, "\n");
140
-
141
- success = false;
142
- }
143
- }
144
-
145
- if (!fname_text.empty()) {
146
- fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
147
-
148
- std::string text;
149
- {
150
- std::ifstream ifs(fname_text);
151
- if (!ifs) {
152
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
153
- return 1;
154
- }
155
- text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
156
- }
157
-
158
- fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
159
-
160
- const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
161
-
162
- fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
163
-
164
- {
165
- const std::string fname_out = fname_text + ".tokcpp";
166
-
167
- std::ofstream ofs(fname_out);
168
- if (!ofs) {
169
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
170
- return 1;
171
- }
172
-
173
- for (const auto & tok : res) {
174
- ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
175
- }
176
- }
177
-
178
- fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
179
- }
180
-
181
- llama_free_model(model);
182
- llama_free(ctx);
183
-
184
- llama_backend_free();
185
-
186
- return success ? 0 : 3;
187
- }
@@ -1,190 +0,0 @@
1
- #include "llama.h"
2
- #include "common.h"
3
- #include "console.h"
4
-
5
- #include <cstdio>
6
- #include <string>
7
- #include <map>
8
- #include <vector>
9
- #include <fstream>
10
-
11
- // generate using test-tokenizer-0-llama.py
12
- static const std::map<std::string, std::vector<llama_token>> & k_tests() {
13
- static std::map<std::string, std::vector<llama_token>> _k_tests = {
14
- { "" , { }, },
15
- { " " , { 259, }, },
16
- { " " , { 1678, }, },
17
- { " " , { 268, }, },
18
- { "\t" , { 29871, 12, }, },
19
- { "\n" , { 29871, 13, }, },
20
- { "\t\n" , { 29871, 12, 13, }, },
21
- { "Hello world" , { 15043, 3186, }, },
22
- { " Hello world" , { 29871, 15043, 3186, }, },
23
- { "Hello World" , { 15043, 2787, }, },
24
- { " Hello World" , { 29871, 15043, 2787, }, },
25
- { " Hello World!" , { 29871, 15043, 2787, 29991, }, },
26
- { "Hello, world!" , { 15043, 29892, 3186, 29991, }, },
27
- { " Hello, world!" , { 29871, 15043, 29892, 3186, 29991, }, },
28
- { " this is 🦙.cpp" , { 29871, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
29
- { "w048 7tuijk dsdfhu" , { 281, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
30
- { "нещо на Български" , { 1538, 4851, 665, 1386, 29713, 1305, }, },
31
- { "កាន់តែពិសេសអាចខលចេញ" , { 29871, 31849, 31324, 31934, 228, 162, 142, 228, 161, 146, 228, 162, 133, 228, 161, 153, 228, 161, 186, 31708, 228, 162, 132, 31708, 228, 161, 165, 31324, 228, 161, 136, 228, 161, 132, 228, 161, 158, 228, 161, 136, 228, 162, 132, 228, 161, 140, }, },
32
- { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 29871, 243, 162, 157, 131, 313, 8945, 29897, 29871, 243, 162, 155, 185, 30722, 243, 162, 143, 174, 30598, 313, 20787, 953, 3848, 275, 16125, 630, 29897, 29871, 31681, 313, 6194, 953, 29877, 2397, 393, 756, 967, 1914, 5993, 29897, }, },
33
- { "Hello" , { 15043, }, },
34
- { " Hello" , { 29871, 15043, }, },
35
- { " Hello" , { 259, 15043, }, },
36
- { " Hello" , { 1678, 15043, }, },
37
- { " Hello" , { 268, 15043, }, },
38
- { " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
39
- { " (" , { 29871, 313, }, },
40
- };
41
-
42
- return _k_tests;
43
- }
44
-
45
- int main(int argc, char **argv) {
46
- if (argc < 2) {
47
- fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
48
- return 1;
49
- }
50
-
51
- const std::string fname = argv[1];
52
-
53
- std::string fname_text;
54
- if (argc > 2) {
55
- fname_text = argv[2];
56
- }
57
-
58
- fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
59
-
60
- llama_model * model;
61
- llama_context * ctx;
62
-
63
- llama_backend_init();
64
-
65
- // load the vocab
66
- {
67
- auto mparams = llama_model_default_params();
68
-
69
- mparams.vocab_only = true;
70
-
71
- model = llama_load_model_from_file(fname.c_str(), mparams);
72
-
73
- if (model == NULL) {
74
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
75
- return 1;
76
- }
77
-
78
- auto cparams = llama_context_default_params();
79
-
80
- ctx = llama_new_context_with_model(model, cparams);
81
-
82
- if (ctx == NULL) {
83
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
84
- llama_free_model(model);
85
- return 1;
86
- }
87
- }
88
-
89
- if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
90
- fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
91
- llama_free_model(model);
92
- llama_free(ctx);
93
- return 2;
94
- }
95
-
96
- #ifdef _WIN32
97
- // We need this for unicode console support
98
- console::init(false, false);
99
- atexit([]() { console::cleanup(); });
100
- #endif
101
-
102
- bool success = true;
103
-
104
- for (const auto & test_kv : k_tests()) {
105
- const std::vector<llama_token> res_bos = llama_tokenize(ctx, test_kv.first, true);
106
- const std::vector<llama_token> res_nobos = llama_tokenize(ctx, test_kv.first, false);
107
-
108
- printf("\n");
109
- printf("src: '%s'\n", test_kv.first.c_str());
110
- printf("res: '%s'\n", llama_detokenize_spm(ctx, res_bos).c_str());
111
- printf("tok: ");
112
- for (const auto & tok : res_bos) {
113
- printf("%d ", tok);
114
- }
115
- printf("\n");
116
-
117
- bool correct = res_nobos.size() == test_kv.second.size() && res_bos.size() == res_nobos.size() + 1 && res_bos[0] == 1;
118
-
119
- for (int i = 0; i < (int) res_nobos.size() && correct; ++i) {
120
- if (test_kv.second[i] != res_bos[i + 1]) {
121
- correct = false;
122
- }
123
- if (test_kv.second[i] != res_nobos[i]) {
124
- correct = false;
125
- }
126
- }
127
-
128
- if (!correct) {
129
- fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
130
- fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
131
- llama_detokenize_spm(ctx, res_nobos).c_str(),
132
- llama_detokenize_spm(ctx, test_kv.second).c_str());
133
- fprintf(stderr, "%s : expected tokens: ", __func__);
134
- for (const auto & t : test_kv.second) {
135
- fprintf(stderr, "%6d, ", t);
136
- }
137
- fprintf(stderr, "\n");
138
- fprintf(stderr, "%s : got tokens: ", __func__);
139
- for (const auto & t : res_nobos) {
140
- fprintf(stderr, "%6d, ", t);
141
- }
142
- fprintf(stderr, "\n");
143
-
144
- success = false;
145
- }
146
- }
147
-
148
- if (!fname_text.empty()) {
149
- fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
150
-
151
- std::string text;
152
- {
153
- std::ifstream ifs(fname_text);
154
- if (!ifs) {
155
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
156
- return 1;
157
- }
158
- text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
159
- }
160
-
161
- fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
162
-
163
- const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
164
-
165
- fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
166
-
167
- {
168
- const std::string fname_out = fname_text + ".tokcpp";
169
-
170
- std::ofstream ofs(fname_out);
171
- if (!ofs) {
172
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
173
- return 1;
174
- }
175
-
176
- for (const auto & tok : res) {
177
- ofs << tok << " '" << llama_detokenize_spm(ctx, std::vector<int>{tok}) << "'" << std::endl;
178
- }
179
- }
180
-
181
- fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
182
- }
183
-
184
- llama_free_model(model);
185
- llama_free(ctx);
186
-
187
- llama_backend_free();
188
-
189
- return success ? 0 : 3;
190
- }