@fugood/llama.node 0.0.1-alpha.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/CMakeLists.txt +42 -7
  2. package/README.md +10 -0
  3. package/bin/darwin/arm64/default.metallib +0 -0
  4. package/bin/darwin/arm64/llama-node.node +0 -0
  5. package/bin/darwin/x64/default.metallib +0 -0
  6. package/bin/darwin/x64/llama-node.node +0 -0
  7. package/bin/linux/arm64/llama-node.node +0 -0
  8. package/bin/linux/x64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  10. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  11. package/lib/binding.js +1 -1
  12. package/lib/binding.ts +16 -2
  13. package/lib/index.ts +2 -2
  14. package/package.json +15 -3
  15. package/src/DetokenizeWorker.cpp +22 -0
  16. package/src/DetokenizeWorker.h +19 -0
  17. package/src/EmbeddingWorker.cpp +46 -0
  18. package/src/EmbeddingWorker.h +23 -0
  19. package/src/LlamaCompletionWorker.cpp +5 -1
  20. package/src/LlamaCompletionWorker.h +4 -0
  21. package/src/LlamaContext.cpp +80 -1
  22. package/src/LlamaContext.h +3 -0
  23. package/src/TokenizeWorker.cpp +26 -0
  24. package/src/TokenizeWorker.h +23 -0
  25. package/src/common.hpp +12 -7
  26. package/src/llama.cpp/CMakeLists.txt +13 -7
  27. package/src/llama.cpp/common/common.cpp +221 -173
  28. package/src/llama.cpp/common/common.h +19 -8
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/log.h +2 -2
  31. package/src/llama.cpp/common/sampling.cpp +17 -1
  32. package/src/llama.cpp/common/sampling.h +28 -20
  33. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +17 -11
  34. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +5 -5
  35. package/src/llama.cpp/examples/finetune/finetune.cpp +1 -1
  36. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -4
  37. package/src/llama.cpp/examples/imatrix/imatrix.cpp +72 -39
  38. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -3
  39. package/src/llama.cpp/examples/llava/clip.cpp +74 -23
  40. package/src/llama.cpp/examples/llava/llava-cli.cpp +37 -28
  41. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -1
  42. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -1
  43. package/src/llama.cpp/examples/main/main.cpp +10 -8
  44. package/src/llama.cpp/examples/perplexity/perplexity.cpp +175 -55
  45. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/quantize/quantize.cpp +74 -47
  47. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  48. package/src/llama.cpp/examples/server/server.cpp +97 -86
  49. package/src/llama.cpp/examples/server/utils.hpp +17 -15
  50. package/src/llama.cpp/ggml-backend.c +7 -5
  51. package/src/llama.cpp/ggml-impl.h +339 -4
  52. package/src/llama.cpp/ggml-kompute.cpp +7 -0
  53. package/src/llama.cpp/ggml-opencl.cpp +1 -0
  54. package/src/llama.cpp/ggml-quants.c +302 -293
  55. package/src/llama.cpp/ggml-sycl.cpp +28 -16
  56. package/src/llama.cpp/ggml-vulkan-shaders.hpp +46843 -39205
  57. package/src/llama.cpp/ggml-vulkan.cpp +951 -263
  58. package/src/llama.cpp/ggml.c +1469 -116
  59. package/src/llama.cpp/ggml.h +37 -7
  60. package/src/llama.cpp/llama.cpp +969 -432
  61. package/src/llama.cpp/llama.h +46 -14
  62. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +2 -0
  63. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -1
  64. package/src/llama.cpp/requirements/requirements-convert.txt +2 -2
  65. package/src/llama.cpp/requirements.txt +1 -0
  66. package/src/llama.cpp/sgemm.cpp +134 -103
  67. package/src/llama.cpp/sgemm.h +4 -2
  68. package/src/llama.cpp/tests/CMakeLists.txt +96 -36
  69. package/src/llama.cpp/tests/test-backend-ops.cpp +56 -6
  70. package/src/llama.cpp/tests/test-chat-template.cpp +4 -0
  71. package/src/llama.cpp/tests/test-grammar-integration.cpp +225 -136
  72. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -0
  73. package/src/llama.cpp/tests/test-tokenizer-0.cpp +292 -0
  74. package/src/llama.cpp/tests/{test-tokenizer-1-llama.cpp → test-tokenizer-1-spm.cpp} +1 -1
  75. package/src/llama.cpp/unicode-data.cpp +1188 -656
  76. package/src/llama.cpp/unicode-data.h +4 -3
  77. package/src/llama.cpp/unicode.cpp +590 -49
  78. package/src/llama.cpp/unicode.h +6 -3
  79. package/bin/win32/arm64/llama-node.node +0 -0
  80. package/bin/win32/arm64/node.lib +0 -0
  81. package/bin/win32/x64/llama-node.node +0 -0
  82. package/bin/win32/x64/node.lib +0 -0
  83. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -187
  84. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -190
@@ -5,9 +5,9 @@
5
5
  #include <vector>
6
6
 
7
7
  #define CODEPOINT_TYPE_UNIDENTIFIED 0
8
- #define CODEPOINT_TYPE_DIGIT 1
8
+ #define CODEPOINT_TYPE_NUMBER 1
9
9
  #define CODEPOINT_TYPE_LETTER 2
10
- #define CODEPOINT_TYPE_WHITESPACE 3
10
+ #define CODEPOINT_TYPE_SEPARATOR 3
11
11
  #define CODEPOINT_TYPE_ACCENT_MARK 4
12
12
  #define CODEPOINT_TYPE_PUNCTUATION 5
13
13
  #define CODEPOINT_TYPE_SYMBOL 6
@@ -21,8 +21,11 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
21
21
  int unicode_cpt_type(uint32_t cp);
22
22
  int unicode_cpt_type(const std::string & utf8);
23
23
 
24
+ bool unicode_cpt_is_whitespace(uint32_t cp);
25
+
24
26
  std::string unicode_byte_to_utf8(uint8_t byte);
25
27
  uint8_t unicode_utf8_to_byte(const std::string & utf8);
26
28
 
27
- // simple tolower that only implements one-to-one mapping, not one-to-many
28
29
  char32_t unicode_tolower(char32_t cp);
30
+
31
+ std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
Binary file
Binary file
Binary file
Binary file
@@ -1,187 +0,0 @@
1
- #include "llama.h"
2
- #include "common.h"
3
- #include "console.h"
4
-
5
- #include <cstdio>
6
- #include <string>
7
- #include <map>
8
- #include <vector>
9
- #include <fstream>
10
-
11
- // generate using test-tokenizer-0-falcon.py
12
- static const std::map<std::string, std::vector<llama_token>> & k_tests() {
13
- static std::map<std::string, std::vector<llama_token>> _k_tests = {
14
- { "" , { }, },
15
- { " " , { 204, }, },
16
- { " " , { 258, }, },
17
- { " " , { 466, }, },
18
- { "\t" , { 192, }, },
19
- { "\n" , { 193, }, },
20
- { "\t\n" , { 19125, }, },
21
- { "Hello world" , { 9856, 1079, }, },
22
- { " Hello world" , { 23090, 1079, }, },
23
- { "Hello World" , { 9856, 2889, }, },
24
- { " Hello World" , { 23090, 2889, }, },
25
- { " Hello World!" , { 23090, 2889, 12, }, },
26
- { "Hello, world!" , { 9856, 23, 1079, 12, }, },
27
- { " Hello, world!" , { 23090, 23, 1079, 12, }, },
28
- { " this is 🦙.cpp" , { 414, 304, 3346, 111, 231, 25, 29247, }, },
29
- { "w048 7tuijk dsdfhu" , { 98, 55866, 204, 34, 16682, 7149, 36190, 6869, 11481, }, },
30
- { "нещо на Български" , { 150, 133, 6207, 151, 215, 150, 134, 5052, 133, 6279, 5052, 223, 151, 216, 49679, 123, 53110, 47043, 7795, }, },
31
- { "កាន់តែពិសេសអាចខលចេញ" , { 38154, 206, 38154, 126, 38154, 225, 167, 237, 217, 38154, 221, 167, 237, 208, 38154, 228, 38154, 127, 38154, 237, 167, 237, 207, 38154, 237, 38154, 107, 38154, 126, 38154, 211, 38154, 207, 38154, 233, 38154, 211, 167, 237, 207, 38154, 215, }, },
32
- { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 2571, 232, 206, 204, 19, 11003, 20, 8196, 126, 283, 219, 48778, 116, 13392, 204, 19, 51831, 732, 63209, 1741, 7955, 522, 20, 22438, 211, 204, 19, 7927, 53360, 325, 504, 701, 946, 10930, 20, }, },
33
- { "Hello" , { 9856, }, },
34
- { " Hello" , { 23090, }, },
35
- { " Hello" , { 204, 23090, }, },
36
- { " Hello" , { 258, 23090, }, },
37
- { " Hello" , { 466, 23090, }, },
38
- { " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
39
- { "\n =" , { 1212, 40, }, },
40
- { "' era" , { 18, 4932, }, },
41
- };
42
-
43
- return _k_tests;
44
- }
45
-
46
- int main(int argc, char **argv) {
47
- if (argc < 2) {
48
- fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
49
- return 1;
50
- }
51
-
52
- const std::string fname = argv[1];
53
-
54
- std::string fname_text;
55
- if (argc > 2) {
56
- fname_text = argv[2];
57
- }
58
-
59
- fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
60
-
61
- llama_model * model;
62
- llama_context * ctx;
63
-
64
- llama_backend_init();
65
-
66
- // load the vocab
67
- {
68
- auto mparams = llama_model_default_params();
69
-
70
- mparams.vocab_only = true;
71
-
72
- model = llama_load_model_from_file(fname.c_str(), mparams);
73
-
74
- if (model == NULL) {
75
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
76
- return 1;
77
- }
78
-
79
- auto cparams = llama_context_default_params();
80
-
81
- ctx = llama_new_context_with_model(model, cparams);
82
-
83
- if (ctx == NULL) {
84
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
85
- llama_free_model(model);
86
- return 1;
87
- }
88
- }
89
-
90
- if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
91
- fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
92
- llama_free_model(model);
93
- llama_free(ctx);
94
- return 2;
95
- }
96
-
97
- #ifdef _WIN32
98
- // We need this for unicode console support
99
- console::init(false, false);
100
- atexit([]() { console::cleanup(); });
101
- #endif
102
-
103
- bool success = true;
104
-
105
- for (const auto & test_kv : k_tests()) {
106
- const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, false);
107
-
108
- printf("\n");
109
- printf("src: '%s'\n", test_kv.first.c_str());
110
- printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str());
111
- printf("tok: ");
112
- for (const auto & tok : res) {
113
- printf("%d ", tok);
114
- }
115
- printf("\n");
116
-
117
- bool correct = res.size() == test_kv.second.size();
118
-
119
- for (int i = 0; i < (int) res.size() && correct; ++i) {
120
- if (test_kv.second[i] != res[i]) {
121
- correct = false;
122
- }
123
- }
124
-
125
- if (!correct) {
126
- fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
127
- fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
128
- llama_detokenize_bpe(ctx, res).c_str(),
129
- llama_detokenize_bpe(ctx, test_kv.second).c_str());
130
- fprintf(stderr, "%s : expected tokens: ", __func__);
131
- for (const auto & t : test_kv.second) {
132
- fprintf(stderr, "%6d, ", t);
133
- }
134
- fprintf(stderr, "\n");
135
- fprintf(stderr, "%s : got tokens: ", __func__);
136
- for (const auto & t : res) {
137
- fprintf(stderr, "%6d, ", t);
138
- }
139
- fprintf(stderr, "\n");
140
-
141
- success = false;
142
- }
143
- }
144
-
145
- if (!fname_text.empty()) {
146
- fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
147
-
148
- std::string text;
149
- {
150
- std::ifstream ifs(fname_text);
151
- if (!ifs) {
152
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
153
- return 1;
154
- }
155
- text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
156
- }
157
-
158
- fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
159
-
160
- const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
161
-
162
- fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
163
-
164
- {
165
- const std::string fname_out = fname_text + ".tokcpp";
166
-
167
- std::ofstream ofs(fname_out);
168
- if (!ofs) {
169
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
170
- return 1;
171
- }
172
-
173
- for (const auto & tok : res) {
174
- ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
175
- }
176
- }
177
-
178
- fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
179
- }
180
-
181
- llama_free_model(model);
182
- llama_free(ctx);
183
-
184
- llama_backend_free();
185
-
186
- return success ? 0 : 3;
187
- }
@@ -1,190 +0,0 @@
1
- #include "llama.h"
2
- #include "common.h"
3
- #include "console.h"
4
-
5
- #include <cstdio>
6
- #include <string>
7
- #include <map>
8
- #include <vector>
9
- #include <fstream>
10
-
11
- // generate using test-tokenizer-0-llama.py
12
- static const std::map<std::string, std::vector<llama_token>> & k_tests() {
13
- static std::map<std::string, std::vector<llama_token>> _k_tests = {
14
- { "" , { }, },
15
- { " " , { 259, }, },
16
- { " " , { 1678, }, },
17
- { " " , { 268, }, },
18
- { "\t" , { 29871, 12, }, },
19
- { "\n" , { 29871, 13, }, },
20
- { "\t\n" , { 29871, 12, 13, }, },
21
- { "Hello world" , { 15043, 3186, }, },
22
- { " Hello world" , { 29871, 15043, 3186, }, },
23
- { "Hello World" , { 15043, 2787, }, },
24
- { " Hello World" , { 29871, 15043, 2787, }, },
25
- { " Hello World!" , { 29871, 15043, 2787, 29991, }, },
26
- { "Hello, world!" , { 15043, 29892, 3186, 29991, }, },
27
- { " Hello, world!" , { 29871, 15043, 29892, 3186, 29991, }, },
28
- { " this is 🦙.cpp" , { 29871, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
29
- { "w048 7tuijk dsdfhu" , { 281, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
30
- { "нещо на Български" , { 1538, 4851, 665, 1386, 29713, 1305, }, },
31
- { "កាន់តែពិសេសអាចខលចេញ" , { 29871, 31849, 31324, 31934, 228, 162, 142, 228, 161, 146, 228, 162, 133, 228, 161, 153, 228, 161, 186, 31708, 228, 162, 132, 31708, 228, 161, 165, 31324, 228, 161, 136, 228, 161, 132, 228, 161, 158, 228, 161, 136, 228, 162, 132, 228, 161, 140, }, },
32
- { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 29871, 243, 162, 157, 131, 313, 8945, 29897, 29871, 243, 162, 155, 185, 30722, 243, 162, 143, 174, 30598, 313, 20787, 953, 3848, 275, 16125, 630, 29897, 29871, 31681, 313, 6194, 953, 29877, 2397, 393, 756, 967, 1914, 5993, 29897, }, },
33
- { "Hello" , { 15043, }, },
34
- { " Hello" , { 29871, 15043, }, },
35
- { " Hello" , { 259, 15043, }, },
36
- { " Hello" , { 1678, 15043, }, },
37
- { " Hello" , { 268, 15043, }, },
38
- { " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
39
- { " (" , { 29871, 313, }, },
40
- };
41
-
42
- return _k_tests;
43
- }
44
-
45
- int main(int argc, char **argv) {
46
- if (argc < 2) {
47
- fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
48
- return 1;
49
- }
50
-
51
- const std::string fname = argv[1];
52
-
53
- std::string fname_text;
54
- if (argc > 2) {
55
- fname_text = argv[2];
56
- }
57
-
58
- fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
59
-
60
- llama_model * model;
61
- llama_context * ctx;
62
-
63
- llama_backend_init();
64
-
65
- // load the vocab
66
- {
67
- auto mparams = llama_model_default_params();
68
-
69
- mparams.vocab_only = true;
70
-
71
- model = llama_load_model_from_file(fname.c_str(), mparams);
72
-
73
- if (model == NULL) {
74
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
75
- return 1;
76
- }
77
-
78
- auto cparams = llama_context_default_params();
79
-
80
- ctx = llama_new_context_with_model(model, cparams);
81
-
82
- if (ctx == NULL) {
83
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
84
- llama_free_model(model);
85
- return 1;
86
- }
87
- }
88
-
89
- if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
90
- fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
91
- llama_free_model(model);
92
- llama_free(ctx);
93
- return 2;
94
- }
95
-
96
- #ifdef _WIN32
97
- // We need this for unicode console support
98
- console::init(false, false);
99
- atexit([]() { console::cleanup(); });
100
- #endif
101
-
102
- bool success = true;
103
-
104
- for (const auto & test_kv : k_tests()) {
105
- const std::vector<llama_token> res_bos = llama_tokenize(ctx, test_kv.first, true);
106
- const std::vector<llama_token> res_nobos = llama_tokenize(ctx, test_kv.first, false);
107
-
108
- printf("\n");
109
- printf("src: '%s'\n", test_kv.first.c_str());
110
- printf("res: '%s'\n", llama_detokenize_spm(ctx, res_bos).c_str());
111
- printf("tok: ");
112
- for (const auto & tok : res_bos) {
113
- printf("%d ", tok);
114
- }
115
- printf("\n");
116
-
117
- bool correct = res_nobos.size() == test_kv.second.size() && res_bos.size() == res_nobos.size() + 1 && res_bos[0] == 1;
118
-
119
- for (int i = 0; i < (int) res_nobos.size() && correct; ++i) {
120
- if (test_kv.second[i] != res_bos[i + 1]) {
121
- correct = false;
122
- }
123
- if (test_kv.second[i] != res_nobos[i]) {
124
- correct = false;
125
- }
126
- }
127
-
128
- if (!correct) {
129
- fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
130
- fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
131
- llama_detokenize_spm(ctx, res_nobos).c_str(),
132
- llama_detokenize_spm(ctx, test_kv.second).c_str());
133
- fprintf(stderr, "%s : expected tokens: ", __func__);
134
- for (const auto & t : test_kv.second) {
135
- fprintf(stderr, "%6d, ", t);
136
- }
137
- fprintf(stderr, "\n");
138
- fprintf(stderr, "%s : got tokens: ", __func__);
139
- for (const auto & t : res_nobos) {
140
- fprintf(stderr, "%6d, ", t);
141
- }
142
- fprintf(stderr, "\n");
143
-
144
- success = false;
145
- }
146
- }
147
-
148
- if (!fname_text.empty()) {
149
- fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
150
-
151
- std::string text;
152
- {
153
- std::ifstream ifs(fname_text);
154
- if (!ifs) {
155
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
156
- return 1;
157
- }
158
- text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
159
- }
160
-
161
- fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
162
-
163
- const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
164
-
165
- fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
166
-
167
- {
168
- const std::string fname_out = fname_text + ".tokcpp";
169
-
170
- std::ofstream ofs(fname_out);
171
- if (!ofs) {
172
- fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
173
- return 1;
174
- }
175
-
176
- for (const auto & tok : res) {
177
- ofs << tok << " '" << llama_detokenize_spm(ctx, std::vector<int>{tok}) << "'" << std::endl;
178
- }
179
- }
180
-
181
- fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
182
- }
183
-
184
- llama_free_model(model);
185
- llama_free(ctx);
186
-
187
- llama_backend_free();
188
-
189
- return success ? 0 : 3;
190
- }