@fugood/llama.node 0.0.1-alpha.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +42 -7
- package/README.md +10 -0
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/lib/binding.js +1 -1
- package/lib/binding.ts +16 -2
- package/lib/index.ts +2 -2
- package/package.json +15 -3
- package/src/DetokenizeWorker.cpp +22 -0
- package/src/DetokenizeWorker.h +19 -0
- package/src/EmbeddingWorker.cpp +46 -0
- package/src/EmbeddingWorker.h +23 -0
- package/src/LlamaCompletionWorker.cpp +5 -1
- package/src/LlamaCompletionWorker.h +4 -0
- package/src/LlamaContext.cpp +80 -1
- package/src/LlamaContext.h +3 -0
- package/src/TokenizeWorker.cpp +26 -0
- package/src/TokenizeWorker.h +23 -0
- package/src/common.hpp +12 -7
- package/src/llama.cpp/CMakeLists.txt +13 -7
- package/src/llama.cpp/common/common.cpp +221 -173
- package/src/llama.cpp/common/common.h +19 -8
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
- package/src/llama.cpp/common/log.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +17 -1
- package/src/llama.cpp/common/sampling.h +28 -20
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +17 -11
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +5 -5
- package/src/llama.cpp/examples/finetune/finetune.cpp +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -4
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +72 -39
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -3
- package/src/llama.cpp/examples/llava/clip.cpp +74 -23
- package/src/llama.cpp/examples/llava/llava-cli.cpp +37 -28
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -1
- package/src/llama.cpp/examples/main/main.cpp +10 -8
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +175 -55
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +74 -47
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
- package/src/llama.cpp/examples/server/server.cpp +97 -86
- package/src/llama.cpp/examples/server/utils.hpp +17 -15
- package/src/llama.cpp/ggml-backend.c +7 -5
- package/src/llama.cpp/ggml-impl.h +339 -4
- package/src/llama.cpp/ggml-kompute.cpp +7 -0
- package/src/llama.cpp/ggml-opencl.cpp +1 -0
- package/src/llama.cpp/ggml-quants.c +302 -293
- package/src/llama.cpp/ggml-sycl.cpp +28 -16
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +46843 -39205
- package/src/llama.cpp/ggml-vulkan.cpp +951 -263
- package/src/llama.cpp/ggml.c +1469 -116
- package/src/llama.cpp/ggml.h +37 -7
- package/src/llama.cpp/llama.cpp +969 -432
- package/src/llama.cpp/llama.h +46 -14
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -1
- package/src/llama.cpp/requirements/requirements-convert.txt +2 -2
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/sgemm.cpp +134 -103
- package/src/llama.cpp/sgemm.h +4 -2
- package/src/llama.cpp/tests/CMakeLists.txt +96 -36
- package/src/llama.cpp/tests/test-backend-ops.cpp +56 -6
- package/src/llama.cpp/tests/test-chat-template.cpp +4 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +225 -136
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -0
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +292 -0
- package/src/llama.cpp/tests/{test-tokenizer-1-llama.cpp → test-tokenizer-1-spm.cpp} +1 -1
- package/src/llama.cpp/unicode-data.cpp +1188 -656
- package/src/llama.cpp/unicode-data.h +4 -3
- package/src/llama.cpp/unicode.cpp +590 -49
- package/src/llama.cpp/unicode.h +6 -3
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -187
- package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -190
package/src/llama.cpp/unicode.h
CHANGED
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
#include <vector>
|
|
6
6
|
|
|
7
7
|
#define CODEPOINT_TYPE_UNIDENTIFIED 0
|
|
8
|
-
#define
|
|
8
|
+
#define CODEPOINT_TYPE_NUMBER 1
|
|
9
9
|
#define CODEPOINT_TYPE_LETTER 2
|
|
10
|
-
#define
|
|
10
|
+
#define CODEPOINT_TYPE_SEPARATOR 3
|
|
11
11
|
#define CODEPOINT_TYPE_ACCENT_MARK 4
|
|
12
12
|
#define CODEPOINT_TYPE_PUNCTUATION 5
|
|
13
13
|
#define CODEPOINT_TYPE_SYMBOL 6
|
|
@@ -21,8 +21,11 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|
|
21
21
|
int unicode_cpt_type(uint32_t cp);
|
|
22
22
|
int unicode_cpt_type(const std::string & utf8);
|
|
23
23
|
|
|
24
|
+
bool unicode_cpt_is_whitespace(uint32_t cp);
|
|
25
|
+
|
|
24
26
|
std::string unicode_byte_to_utf8(uint8_t byte);
|
|
25
27
|
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
|
26
28
|
|
|
27
|
-
// simple tolower that only implements one-to-one mapping, not one-to-many
|
|
28
29
|
char32_t unicode_tolower(char32_t cp);
|
|
30
|
+
|
|
31
|
+
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
|
Binary file
|
package/bin/win32/arm64/node.lib
DELETED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
DELETED
|
Binary file
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
#include "llama.h"
|
|
2
|
-
#include "common.h"
|
|
3
|
-
#include "console.h"
|
|
4
|
-
|
|
5
|
-
#include <cstdio>
|
|
6
|
-
#include <string>
|
|
7
|
-
#include <map>
|
|
8
|
-
#include <vector>
|
|
9
|
-
#include <fstream>
|
|
10
|
-
|
|
11
|
-
// generate using test-tokenizer-0-falcon.py
|
|
12
|
-
static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|
13
|
-
static std::map<std::string, std::vector<llama_token>> _k_tests = {
|
|
14
|
-
{ "" , { }, },
|
|
15
|
-
{ " " , { 204, }, },
|
|
16
|
-
{ " " , { 258, }, },
|
|
17
|
-
{ " " , { 466, }, },
|
|
18
|
-
{ "\t" , { 192, }, },
|
|
19
|
-
{ "\n" , { 193, }, },
|
|
20
|
-
{ "\t\n" , { 19125, }, },
|
|
21
|
-
{ "Hello world" , { 9856, 1079, }, },
|
|
22
|
-
{ " Hello world" , { 23090, 1079, }, },
|
|
23
|
-
{ "Hello World" , { 9856, 2889, }, },
|
|
24
|
-
{ " Hello World" , { 23090, 2889, }, },
|
|
25
|
-
{ " Hello World!" , { 23090, 2889, 12, }, },
|
|
26
|
-
{ "Hello, world!" , { 9856, 23, 1079, 12, }, },
|
|
27
|
-
{ " Hello, world!" , { 23090, 23, 1079, 12, }, },
|
|
28
|
-
{ " this is 🦙.cpp" , { 414, 304, 3346, 111, 231, 25, 29247, }, },
|
|
29
|
-
{ "w048 7tuijk dsdfhu" , { 98, 55866, 204, 34, 16682, 7149, 36190, 6869, 11481, }, },
|
|
30
|
-
{ "нещо на Български" , { 150, 133, 6207, 151, 215, 150, 134, 5052, 133, 6279, 5052, 223, 151, 216, 49679, 123, 53110, 47043, 7795, }, },
|
|
31
|
-
{ "កាន់តែពិសេសអាចខលចេញ" , { 38154, 206, 38154, 126, 38154, 225, 167, 237, 217, 38154, 221, 167, 237, 208, 38154, 228, 38154, 127, 38154, 237, 167, 237, 207, 38154, 237, 38154, 107, 38154, 126, 38154, 211, 38154, 207, 38154, 233, 38154, 211, 167, 237, 207, 38154, 215, }, },
|
|
32
|
-
{ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 2571, 232, 206, 204, 19, 11003, 20, 8196, 126, 283, 219, 48778, 116, 13392, 204, 19, 51831, 732, 63209, 1741, 7955, 522, 20, 22438, 211, 204, 19, 7927, 53360, 325, 504, 701, 946, 10930, 20, }, },
|
|
33
|
-
{ "Hello" , { 9856, }, },
|
|
34
|
-
{ " Hello" , { 23090, }, },
|
|
35
|
-
{ " Hello" , { 204, 23090, }, },
|
|
36
|
-
{ " Hello" , { 258, 23090, }, },
|
|
37
|
-
{ " Hello" , { 466, 23090, }, },
|
|
38
|
-
{ " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
|
|
39
|
-
{ "\n =" , { 1212, 40, }, },
|
|
40
|
-
{ "' era" , { 18, 4932, }, },
|
|
41
|
-
};
|
|
42
|
-
|
|
43
|
-
return _k_tests;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
int main(int argc, char **argv) {
|
|
47
|
-
if (argc < 2) {
|
|
48
|
-
fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
|
|
49
|
-
return 1;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const std::string fname = argv[1];
|
|
53
|
-
|
|
54
|
-
std::string fname_text;
|
|
55
|
-
if (argc > 2) {
|
|
56
|
-
fname_text = argv[2];
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
60
|
-
|
|
61
|
-
llama_model * model;
|
|
62
|
-
llama_context * ctx;
|
|
63
|
-
|
|
64
|
-
llama_backend_init();
|
|
65
|
-
|
|
66
|
-
// load the vocab
|
|
67
|
-
{
|
|
68
|
-
auto mparams = llama_model_default_params();
|
|
69
|
-
|
|
70
|
-
mparams.vocab_only = true;
|
|
71
|
-
|
|
72
|
-
model = llama_load_model_from_file(fname.c_str(), mparams);
|
|
73
|
-
|
|
74
|
-
if (model == NULL) {
|
|
75
|
-
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
76
|
-
return 1;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
auto cparams = llama_context_default_params();
|
|
80
|
-
|
|
81
|
-
ctx = llama_new_context_with_model(model, cparams);
|
|
82
|
-
|
|
83
|
-
if (ctx == NULL) {
|
|
84
|
-
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
85
|
-
llama_free_model(model);
|
|
86
|
-
return 1;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
|
|
91
|
-
fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
|
|
92
|
-
llama_free_model(model);
|
|
93
|
-
llama_free(ctx);
|
|
94
|
-
return 2;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
#ifdef _WIN32
|
|
98
|
-
// We need this for unicode console support
|
|
99
|
-
console::init(false, false);
|
|
100
|
-
atexit([]() { console::cleanup(); });
|
|
101
|
-
#endif
|
|
102
|
-
|
|
103
|
-
bool success = true;
|
|
104
|
-
|
|
105
|
-
for (const auto & test_kv : k_tests()) {
|
|
106
|
-
const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, false);
|
|
107
|
-
|
|
108
|
-
printf("\n");
|
|
109
|
-
printf("src: '%s'\n", test_kv.first.c_str());
|
|
110
|
-
printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str());
|
|
111
|
-
printf("tok: ");
|
|
112
|
-
for (const auto & tok : res) {
|
|
113
|
-
printf("%d ", tok);
|
|
114
|
-
}
|
|
115
|
-
printf("\n");
|
|
116
|
-
|
|
117
|
-
bool correct = res.size() == test_kv.second.size();
|
|
118
|
-
|
|
119
|
-
for (int i = 0; i < (int) res.size() && correct; ++i) {
|
|
120
|
-
if (test_kv.second[i] != res[i]) {
|
|
121
|
-
correct = false;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (!correct) {
|
|
126
|
-
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
|
127
|
-
fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
|
|
128
|
-
llama_detokenize_bpe(ctx, res).c_str(),
|
|
129
|
-
llama_detokenize_bpe(ctx, test_kv.second).c_str());
|
|
130
|
-
fprintf(stderr, "%s : expected tokens: ", __func__);
|
|
131
|
-
for (const auto & t : test_kv.second) {
|
|
132
|
-
fprintf(stderr, "%6d, ", t);
|
|
133
|
-
}
|
|
134
|
-
fprintf(stderr, "\n");
|
|
135
|
-
fprintf(stderr, "%s : got tokens: ", __func__);
|
|
136
|
-
for (const auto & t : res) {
|
|
137
|
-
fprintf(stderr, "%6d, ", t);
|
|
138
|
-
}
|
|
139
|
-
fprintf(stderr, "\n");
|
|
140
|
-
|
|
141
|
-
success = false;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
if (!fname_text.empty()) {
|
|
146
|
-
fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
|
|
147
|
-
|
|
148
|
-
std::string text;
|
|
149
|
-
{
|
|
150
|
-
std::ifstream ifs(fname_text);
|
|
151
|
-
if (!ifs) {
|
|
152
|
-
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
|
|
153
|
-
return 1;
|
|
154
|
-
}
|
|
155
|
-
text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
|
|
159
|
-
|
|
160
|
-
const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
|
|
161
|
-
|
|
162
|
-
fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
|
|
163
|
-
|
|
164
|
-
{
|
|
165
|
-
const std::string fname_out = fname_text + ".tokcpp";
|
|
166
|
-
|
|
167
|
-
std::ofstream ofs(fname_out);
|
|
168
|
-
if (!ofs) {
|
|
169
|
-
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
|
|
170
|
-
return 1;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
for (const auto & tok : res) {
|
|
174
|
-
ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
llama_free_model(model);
|
|
182
|
-
llama_free(ctx);
|
|
183
|
-
|
|
184
|
-
llama_backend_free();
|
|
185
|
-
|
|
186
|
-
return success ? 0 : 3;
|
|
187
|
-
}
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
#include "llama.h"
|
|
2
|
-
#include "common.h"
|
|
3
|
-
#include "console.h"
|
|
4
|
-
|
|
5
|
-
#include <cstdio>
|
|
6
|
-
#include <string>
|
|
7
|
-
#include <map>
|
|
8
|
-
#include <vector>
|
|
9
|
-
#include <fstream>
|
|
10
|
-
|
|
11
|
-
// generate using test-tokenizer-0-llama.py
|
|
12
|
-
static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|
13
|
-
static std::map<std::string, std::vector<llama_token>> _k_tests = {
|
|
14
|
-
{ "" , { }, },
|
|
15
|
-
{ " " , { 259, }, },
|
|
16
|
-
{ " " , { 1678, }, },
|
|
17
|
-
{ " " , { 268, }, },
|
|
18
|
-
{ "\t" , { 29871, 12, }, },
|
|
19
|
-
{ "\n" , { 29871, 13, }, },
|
|
20
|
-
{ "\t\n" , { 29871, 12, 13, }, },
|
|
21
|
-
{ "Hello world" , { 15043, 3186, }, },
|
|
22
|
-
{ " Hello world" , { 29871, 15043, 3186, }, },
|
|
23
|
-
{ "Hello World" , { 15043, 2787, }, },
|
|
24
|
-
{ " Hello World" , { 29871, 15043, 2787, }, },
|
|
25
|
-
{ " Hello World!" , { 29871, 15043, 2787, 29991, }, },
|
|
26
|
-
{ "Hello, world!" , { 15043, 29892, 3186, 29991, }, },
|
|
27
|
-
{ " Hello, world!" , { 29871, 15043, 29892, 3186, 29991, }, },
|
|
28
|
-
{ " this is 🦙.cpp" , { 29871, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
|
|
29
|
-
{ "w048 7tuijk dsdfhu" , { 281, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
|
|
30
|
-
{ "нещо на Български" , { 1538, 4851, 665, 1386, 29713, 1305, }, },
|
|
31
|
-
{ "កាន់តែពិសេសអាចខលចេញ" , { 29871, 31849, 31324, 31934, 228, 162, 142, 228, 161, 146, 228, 162, 133, 228, 161, 153, 228, 161, 186, 31708, 228, 162, 132, 31708, 228, 161, 165, 31324, 228, 161, 136, 228, 161, 132, 228, 161, 158, 228, 161, 136, 228, 162, 132, 228, 161, 140, }, },
|
|
32
|
-
{ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 29871, 243, 162, 157, 131, 313, 8945, 29897, 29871, 243, 162, 155, 185, 30722, 243, 162, 143, 174, 30598, 313, 20787, 953, 3848, 275, 16125, 630, 29897, 29871, 31681, 313, 6194, 953, 29877, 2397, 393, 756, 967, 1914, 5993, 29897, }, },
|
|
33
|
-
{ "Hello" , { 15043, }, },
|
|
34
|
-
{ " Hello" , { 29871, 15043, }, },
|
|
35
|
-
{ " Hello" , { 259, 15043, }, },
|
|
36
|
-
{ " Hello" , { 1678, 15043, }, },
|
|
37
|
-
{ " Hello" , { 268, 15043, }, },
|
|
38
|
-
{ " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
|
|
39
|
-
{ " (" , { 29871, 313, }, },
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
return _k_tests;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
int main(int argc, char **argv) {
|
|
46
|
-
if (argc < 2) {
|
|
47
|
-
fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
|
|
48
|
-
return 1;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const std::string fname = argv[1];
|
|
52
|
-
|
|
53
|
-
std::string fname_text;
|
|
54
|
-
if (argc > 2) {
|
|
55
|
-
fname_text = argv[2];
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
59
|
-
|
|
60
|
-
llama_model * model;
|
|
61
|
-
llama_context * ctx;
|
|
62
|
-
|
|
63
|
-
llama_backend_init();
|
|
64
|
-
|
|
65
|
-
// load the vocab
|
|
66
|
-
{
|
|
67
|
-
auto mparams = llama_model_default_params();
|
|
68
|
-
|
|
69
|
-
mparams.vocab_only = true;
|
|
70
|
-
|
|
71
|
-
model = llama_load_model_from_file(fname.c_str(), mparams);
|
|
72
|
-
|
|
73
|
-
if (model == NULL) {
|
|
74
|
-
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
75
|
-
return 1;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
auto cparams = llama_context_default_params();
|
|
79
|
-
|
|
80
|
-
ctx = llama_new_context_with_model(model, cparams);
|
|
81
|
-
|
|
82
|
-
if (ctx == NULL) {
|
|
83
|
-
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
84
|
-
llama_free_model(model);
|
|
85
|
-
return 1;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
|
|
90
|
-
fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
|
|
91
|
-
llama_free_model(model);
|
|
92
|
-
llama_free(ctx);
|
|
93
|
-
return 2;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
#ifdef _WIN32
|
|
97
|
-
// We need this for unicode console support
|
|
98
|
-
console::init(false, false);
|
|
99
|
-
atexit([]() { console::cleanup(); });
|
|
100
|
-
#endif
|
|
101
|
-
|
|
102
|
-
bool success = true;
|
|
103
|
-
|
|
104
|
-
for (const auto & test_kv : k_tests()) {
|
|
105
|
-
const std::vector<llama_token> res_bos = llama_tokenize(ctx, test_kv.first, true);
|
|
106
|
-
const std::vector<llama_token> res_nobos = llama_tokenize(ctx, test_kv.first, false);
|
|
107
|
-
|
|
108
|
-
printf("\n");
|
|
109
|
-
printf("src: '%s'\n", test_kv.first.c_str());
|
|
110
|
-
printf("res: '%s'\n", llama_detokenize_spm(ctx, res_bos).c_str());
|
|
111
|
-
printf("tok: ");
|
|
112
|
-
for (const auto & tok : res_bos) {
|
|
113
|
-
printf("%d ", tok);
|
|
114
|
-
}
|
|
115
|
-
printf("\n");
|
|
116
|
-
|
|
117
|
-
bool correct = res_nobos.size() == test_kv.second.size() && res_bos.size() == res_nobos.size() + 1 && res_bos[0] == 1;
|
|
118
|
-
|
|
119
|
-
for (int i = 0; i < (int) res_nobos.size() && correct; ++i) {
|
|
120
|
-
if (test_kv.second[i] != res_bos[i + 1]) {
|
|
121
|
-
correct = false;
|
|
122
|
-
}
|
|
123
|
-
if (test_kv.second[i] != res_nobos[i]) {
|
|
124
|
-
correct = false;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
if (!correct) {
|
|
129
|
-
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
|
130
|
-
fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
|
|
131
|
-
llama_detokenize_spm(ctx, res_nobos).c_str(),
|
|
132
|
-
llama_detokenize_spm(ctx, test_kv.second).c_str());
|
|
133
|
-
fprintf(stderr, "%s : expected tokens: ", __func__);
|
|
134
|
-
for (const auto & t : test_kv.second) {
|
|
135
|
-
fprintf(stderr, "%6d, ", t);
|
|
136
|
-
}
|
|
137
|
-
fprintf(stderr, "\n");
|
|
138
|
-
fprintf(stderr, "%s : got tokens: ", __func__);
|
|
139
|
-
for (const auto & t : res_nobos) {
|
|
140
|
-
fprintf(stderr, "%6d, ", t);
|
|
141
|
-
}
|
|
142
|
-
fprintf(stderr, "\n");
|
|
143
|
-
|
|
144
|
-
success = false;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
if (!fname_text.empty()) {
|
|
149
|
-
fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
|
|
150
|
-
|
|
151
|
-
std::string text;
|
|
152
|
-
{
|
|
153
|
-
std::ifstream ifs(fname_text);
|
|
154
|
-
if (!ifs) {
|
|
155
|
-
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
|
|
156
|
-
return 1;
|
|
157
|
-
}
|
|
158
|
-
text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
|
|
162
|
-
|
|
163
|
-
const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
|
|
164
|
-
|
|
165
|
-
fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
|
|
166
|
-
|
|
167
|
-
{
|
|
168
|
-
const std::string fname_out = fname_text + ".tokcpp";
|
|
169
|
-
|
|
170
|
-
std::ofstream ofs(fname_out);
|
|
171
|
-
if (!ofs) {
|
|
172
|
-
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
|
|
173
|
-
return 1;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
for (const auto & tok : res) {
|
|
177
|
-
ofs << tok << " '" << llama_detokenize_spm(ctx, std::vector<int>{tok}) << "'" << std::endl;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
llama_free_model(model);
|
|
185
|
-
llama_free(ctx);
|
|
186
|
-
|
|
187
|
-
llama_backend_free();
|
|
188
|
-
|
|
189
|
-
return success ? 0 : 3;
|
|
190
|
-
}
|