@fugood/llama.node 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +85 -0
- package/README.md +56 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/lib/binding.js +13 -0
- package/lib/binding.ts +57 -0
- package/lib/index.js +24 -0
- package/lib/index.ts +13 -0
- package/package.json +65 -0
- package/src/addons.cpp +506 -0
- package/src/llama.cpp/CMakeLists.txt +1320 -0
- package/src/llama.cpp/build.zig +172 -0
- package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
- package/src/llama.cpp/common/CMakeLists.txt +87 -0
- package/src/llama.cpp/common/base64.hpp +392 -0
- package/src/llama.cpp/common/common.cpp +2949 -0
- package/src/llama.cpp/common/common.h +324 -0
- package/src/llama.cpp/common/console.cpp +501 -0
- package/src/llama.cpp/common/console.h +19 -0
- package/src/llama.cpp/common/grammar-parser.cpp +440 -0
- package/src/llama.cpp/common/grammar-parser.h +29 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
- package/src/llama.cpp/common/json.hpp +24766 -0
- package/src/llama.cpp/common/log.h +724 -0
- package/src/llama.cpp/common/ngram-cache.cpp +282 -0
- package/src/llama.cpp/common/ngram-cache.h +94 -0
- package/src/llama.cpp/common/sampling.cpp +353 -0
- package/src/llama.cpp/common/sampling.h +147 -0
- package/src/llama.cpp/common/stb_image.h +8396 -0
- package/src/llama.cpp/common/train.cpp +1513 -0
- package/src/llama.cpp/common/train.h +233 -0
- package/src/llama.cpp/examples/CMakeLists.txt +52 -0
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
- package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched/batched.cpp +262 -0
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
- package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
- package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/infill/infill.cpp +767 -0
- package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
- package/src/llama.cpp/examples/llava/clip.h +85 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
- package/src/llama.cpp/examples/llava/llava.cpp +426 -0
- package/src/llama.cpp/examples/llava/llava.h +50 -0
- package/src/llama.cpp/examples/llava/requirements.txt +3 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
- package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
- package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/main/main.cpp +957 -0
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
- package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
- package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
- package/src/llama.cpp/examples/server/httplib.h +9465 -0
- package/src/llama.cpp/examples/server/server.cpp +3826 -0
- package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
- package/src/llama.cpp/examples/server/utils.hpp +653 -0
- package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple/simple.cpp +183 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
- package/src/llama.cpp/ggml-alloc.c +985 -0
- package/src/llama.cpp/ggml-alloc.h +76 -0
- package/src/llama.cpp/ggml-backend-impl.h +141 -0
- package/src/llama.cpp/ggml-backend.c +2099 -0
- package/src/llama.cpp/ggml-backend.h +233 -0
- package/src/llama.cpp/ggml-common.h +1853 -0
- package/src/llama.cpp/ggml-cuda.h +43 -0
- package/src/llama.cpp/ggml-impl.h +265 -0
- package/src/llama.cpp/ggml-kompute.cpp +2006 -0
- package/src/llama.cpp/ggml-kompute.h +46 -0
- package/src/llama.cpp/ggml-metal.h +66 -0
- package/src/llama.cpp/ggml-mpi.c +216 -0
- package/src/llama.cpp/ggml-mpi.h +39 -0
- package/src/llama.cpp/ggml-opencl.cpp +2301 -0
- package/src/llama.cpp/ggml-opencl.h +36 -0
- package/src/llama.cpp/ggml-quants.c +12678 -0
- package/src/llama.cpp/ggml-quants.h +133 -0
- package/src/llama.cpp/ggml-sycl.cpp +17882 -0
- package/src/llama.cpp/ggml-sycl.h +49 -0
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
- package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
- package/src/llama.cpp/ggml-vulkan.h +29 -0
- package/src/llama.cpp/ggml.c +21819 -0
- package/src/llama.cpp/ggml.h +2403 -0
- package/src/llama.cpp/llama.cpp +17468 -0
- package/src/llama.cpp/llama.h +1117 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
- package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
- package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
- package/src/llama.cpp/prompts/alpaca.txt +1 -0
- package/src/llama.cpp/prompts/assistant.txt +31 -0
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
- package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
- package/src/llama.cpp/prompts/chat.txt +28 -0
- package/src/llama.cpp/prompts/dan-modified.txt +1 -0
- package/src/llama.cpp/prompts/dan.txt +1 -0
- package/src/llama.cpp/prompts/mnemonics.txt +93 -0
- package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
- package/src/llama.cpp/prompts/reason-act.txt +18 -0
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
- package/src/llama.cpp/requirements.txt +12 -0
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
- package/src/llama.cpp/scripts/xxd.cmake +16 -0
- package/src/llama.cpp/sgemm.cpp +999 -0
- package/src/llama.cpp/sgemm.h +12 -0
- package/src/llama.cpp/tests/CMakeLists.txt +78 -0
- package/src/llama.cpp/tests/get-model.cpp +21 -0
- package/src/llama.cpp/tests/get-model.h +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
- package/src/llama.cpp/tests/test-c.c +7 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
- package/src/llama.cpp/tests/test-double-float.cpp +57 -0
- package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
- package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
- package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
- package/src/llama.cpp/tests/test-opt.cpp +181 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
- package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
- package/src/llama.cpp/tests/test-rope.cpp +221 -0
- package/src/llama.cpp/tests/test-sampling.cpp +301 -0
- package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
- package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
- package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
- package/src/llama.cpp/unicode-data.cpp +1651 -0
- package/src/llama.cpp/unicode-data.h +16 -0
- package/src/llama.cpp/unicode.cpp +277 -0
- package/src/llama.cpp/unicode.h +28 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#include "llama.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include "console.h"
|
|
4
|
+
|
|
5
|
+
#include <cstdio>
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <map>
|
|
8
|
+
#include <vector>
|
|
9
|
+
#include <fstream>
|
|
10
|
+
|
|
11
|
+
// generate using test-tokenizer-0-falcon.py
|
|
12
|
+
static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|
13
|
+
static std::map<std::string, std::vector<llama_token>> _k_tests = {
|
|
14
|
+
{ "" , { }, },
|
|
15
|
+
{ " " , { 204, }, },
|
|
16
|
+
{ " " , { 258, }, },
|
|
17
|
+
{ " " , { 466, }, },
|
|
18
|
+
{ "\t" , { 192, }, },
|
|
19
|
+
{ "\n" , { 193, }, },
|
|
20
|
+
{ "\t\n" , { 19125, }, },
|
|
21
|
+
{ "Hello world" , { 9856, 1079, }, },
|
|
22
|
+
{ " Hello world" , { 23090, 1079, }, },
|
|
23
|
+
{ "Hello World" , { 9856, 2889, }, },
|
|
24
|
+
{ " Hello World" , { 23090, 2889, }, },
|
|
25
|
+
{ " Hello World!" , { 23090, 2889, 12, }, },
|
|
26
|
+
{ "Hello, world!" , { 9856, 23, 1079, 12, }, },
|
|
27
|
+
{ " Hello, world!" , { 23090, 23, 1079, 12, }, },
|
|
28
|
+
{ " this is 🦙.cpp" , { 414, 304, 3346, 111, 231, 25, 29247, }, },
|
|
29
|
+
{ "w048 7tuijk dsdfhu" , { 98, 55866, 204, 34, 16682, 7149, 36190, 6869, 11481, }, },
|
|
30
|
+
{ "нещо на Български" , { 150, 133, 6207, 151, 215, 150, 134, 5052, 133, 6279, 5052, 223, 151, 216, 49679, 123, 53110, 47043, 7795, }, },
|
|
31
|
+
{ "កាន់តែពិសេសអាចខលចេញ" , { 38154, 206, 38154, 126, 38154, 225, 167, 237, 217, 38154, 221, 167, 237, 208, 38154, 228, 38154, 127, 38154, 237, 167, 237, 207, 38154, 237, 38154, 107, 38154, 126, 38154, 211, 38154, 207, 38154, 233, 38154, 211, 167, 237, 207, 38154, 215, }, },
|
|
32
|
+
{ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 2571, 232, 206, 204, 19, 11003, 20, 8196, 126, 283, 219, 48778, 116, 13392, 204, 19, 51831, 732, 63209, 1741, 7955, 522, 20, 22438, 211, 204, 19, 7927, 53360, 325, 504, 701, 946, 10930, 20, }, },
|
|
33
|
+
{ "Hello" , { 9856, }, },
|
|
34
|
+
{ " Hello" , { 23090, }, },
|
|
35
|
+
{ " Hello" , { 204, 23090, }, },
|
|
36
|
+
{ " Hello" , { 258, 23090, }, },
|
|
37
|
+
{ " Hello" , { 466, 23090, }, },
|
|
38
|
+
{ " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
|
|
39
|
+
{ "\n =" , { 1212, 40, }, },
|
|
40
|
+
{ "' era" , { 18, 4932, }, },
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
return _k_tests;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
int main(int argc, char **argv) {
|
|
47
|
+
if (argc < 2) {
|
|
48
|
+
fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
|
|
49
|
+
return 1;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const std::string fname = argv[1];
|
|
53
|
+
|
|
54
|
+
std::string fname_text;
|
|
55
|
+
if (argc > 2) {
|
|
56
|
+
fname_text = argv[2];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
60
|
+
|
|
61
|
+
llama_model * model;
|
|
62
|
+
llama_context * ctx;
|
|
63
|
+
|
|
64
|
+
llama_backend_init();
|
|
65
|
+
|
|
66
|
+
// load the vocab
|
|
67
|
+
{
|
|
68
|
+
auto mparams = llama_model_default_params();
|
|
69
|
+
|
|
70
|
+
mparams.vocab_only = true;
|
|
71
|
+
|
|
72
|
+
model = llama_load_model_from_file(fname.c_str(), mparams);
|
|
73
|
+
|
|
74
|
+
if (model == NULL) {
|
|
75
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
76
|
+
return 1;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
auto cparams = llama_context_default_params();
|
|
80
|
+
|
|
81
|
+
ctx = llama_new_context_with_model(model, cparams);
|
|
82
|
+
|
|
83
|
+
if (ctx == NULL) {
|
|
84
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
85
|
+
llama_free_model(model);
|
|
86
|
+
return 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
|
|
91
|
+
fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
|
|
92
|
+
llama_free_model(model);
|
|
93
|
+
llama_free(ctx);
|
|
94
|
+
return 2;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
#ifdef _WIN32
|
|
98
|
+
// We need this for unicode console support
|
|
99
|
+
console::init(false, false);
|
|
100
|
+
atexit([]() { console::cleanup(); });
|
|
101
|
+
#endif
|
|
102
|
+
|
|
103
|
+
bool success = true;
|
|
104
|
+
|
|
105
|
+
for (const auto & test_kv : k_tests()) {
|
|
106
|
+
const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, false);
|
|
107
|
+
|
|
108
|
+
printf("\n");
|
|
109
|
+
printf("src: '%s'\n", test_kv.first.c_str());
|
|
110
|
+
printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str());
|
|
111
|
+
printf("tok: ");
|
|
112
|
+
for (const auto & tok : res) {
|
|
113
|
+
printf("%d ", tok);
|
|
114
|
+
}
|
|
115
|
+
printf("\n");
|
|
116
|
+
|
|
117
|
+
bool correct = res.size() == test_kv.second.size();
|
|
118
|
+
|
|
119
|
+
for (int i = 0; i < (int) res.size() && correct; ++i) {
|
|
120
|
+
if (test_kv.second[i] != res[i]) {
|
|
121
|
+
correct = false;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (!correct) {
|
|
126
|
+
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
|
127
|
+
fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
|
|
128
|
+
llama_detokenize_bpe(ctx, res).c_str(),
|
|
129
|
+
llama_detokenize_bpe(ctx, test_kv.second).c_str());
|
|
130
|
+
fprintf(stderr, "%s : expected tokens: ", __func__);
|
|
131
|
+
for (const auto & t : test_kv.second) {
|
|
132
|
+
fprintf(stderr, "%6d, ", t);
|
|
133
|
+
}
|
|
134
|
+
fprintf(stderr, "\n");
|
|
135
|
+
fprintf(stderr, "%s : got tokens: ", __func__);
|
|
136
|
+
for (const auto & t : res) {
|
|
137
|
+
fprintf(stderr, "%6d, ", t);
|
|
138
|
+
}
|
|
139
|
+
fprintf(stderr, "\n");
|
|
140
|
+
|
|
141
|
+
success = false;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!fname_text.empty()) {
|
|
146
|
+
fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
|
|
147
|
+
|
|
148
|
+
std::string text;
|
|
149
|
+
{
|
|
150
|
+
std::ifstream ifs(fname_text);
|
|
151
|
+
if (!ifs) {
|
|
152
|
+
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
|
|
153
|
+
return 1;
|
|
154
|
+
}
|
|
155
|
+
text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
|
|
159
|
+
|
|
160
|
+
const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
|
|
161
|
+
|
|
162
|
+
fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
|
|
163
|
+
|
|
164
|
+
{
|
|
165
|
+
const std::string fname_out = fname_text + ".tokcpp";
|
|
166
|
+
|
|
167
|
+
std::ofstream ofs(fname_out);
|
|
168
|
+
if (!ofs) {
|
|
169
|
+
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
|
|
170
|
+
return 1;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
for (const auto & tok : res) {
|
|
174
|
+
ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
llama_free_model(model);
|
|
182
|
+
llama_free(ctx);
|
|
183
|
+
|
|
184
|
+
llama_backend_free();
|
|
185
|
+
|
|
186
|
+
return success ? 0 : 3;
|
|
187
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#include "llama.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include "console.h"
|
|
4
|
+
|
|
5
|
+
#include <cstdio>
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <map>
|
|
8
|
+
#include <vector>
|
|
9
|
+
#include <fstream>
|
|
10
|
+
|
|
11
|
+
// generate using test-tokenizer-0-llama.py
|
|
12
|
+
static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|
13
|
+
static std::map<std::string, std::vector<llama_token>> _k_tests = {
|
|
14
|
+
{ "" , { }, },
|
|
15
|
+
{ " " , { 259, }, },
|
|
16
|
+
{ " " , { 1678, }, },
|
|
17
|
+
{ " " , { 268, }, },
|
|
18
|
+
{ "\t" , { 29871, 12, }, },
|
|
19
|
+
{ "\n" , { 29871, 13, }, },
|
|
20
|
+
{ "\t\n" , { 29871, 12, 13, }, },
|
|
21
|
+
{ "Hello world" , { 15043, 3186, }, },
|
|
22
|
+
{ " Hello world" , { 29871, 15043, 3186, }, },
|
|
23
|
+
{ "Hello World" , { 15043, 2787, }, },
|
|
24
|
+
{ " Hello World" , { 29871, 15043, 2787, }, },
|
|
25
|
+
{ " Hello World!" , { 29871, 15043, 2787, 29991, }, },
|
|
26
|
+
{ "Hello, world!" , { 15043, 29892, 3186, 29991, }, },
|
|
27
|
+
{ " Hello, world!" , { 29871, 15043, 29892, 3186, 29991, }, },
|
|
28
|
+
{ " this is 🦙.cpp" , { 29871, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
|
|
29
|
+
{ "w048 7tuijk dsdfhu" , { 281, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
|
|
30
|
+
{ "нещо на Български" , { 1538, 4851, 665, 1386, 29713, 1305, }, },
|
|
31
|
+
{ "កាន់តែពិសេសអាចខលចេញ" , { 29871, 31849, 31324, 31934, 228, 162, 142, 228, 161, 146, 228, 162, 133, 228, 161, 153, 228, 161, 186, 31708, 228, 162, 132, 31708, 228, 161, 165, 31324, 228, 161, 136, 228, 161, 132, 228, 161, 158, 228, 161, 136, 228, 162, 132, 228, 161, 140, }, },
|
|
32
|
+
{ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 29871, 243, 162, 157, 131, 313, 8945, 29897, 29871, 243, 162, 155, 185, 30722, 243, 162, 143, 174, 30598, 313, 20787, 953, 3848, 275, 16125, 630, 29897, 29871, 31681, 313, 6194, 953, 29877, 2397, 393, 756, 967, 1914, 5993, 29897, }, },
|
|
33
|
+
{ "Hello" , { 15043, }, },
|
|
34
|
+
{ " Hello" , { 29871, 15043, }, },
|
|
35
|
+
{ " Hello" , { 259, 15043, }, },
|
|
36
|
+
{ " Hello" , { 1678, 15043, }, },
|
|
37
|
+
{ " Hello" , { 268, 15043, }, },
|
|
38
|
+
{ " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
|
|
39
|
+
{ " (" , { 29871, 313, }, },
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
return _k_tests;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
int main(int argc, char **argv) {
|
|
46
|
+
if (argc < 2) {
|
|
47
|
+
fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
|
|
48
|
+
return 1;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const std::string fname = argv[1];
|
|
52
|
+
|
|
53
|
+
std::string fname_text;
|
|
54
|
+
if (argc > 2) {
|
|
55
|
+
fname_text = argv[2];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
59
|
+
|
|
60
|
+
llama_model * model;
|
|
61
|
+
llama_context * ctx;
|
|
62
|
+
|
|
63
|
+
llama_backend_init();
|
|
64
|
+
|
|
65
|
+
// load the vocab
|
|
66
|
+
{
|
|
67
|
+
auto mparams = llama_model_default_params();
|
|
68
|
+
|
|
69
|
+
mparams.vocab_only = true;
|
|
70
|
+
|
|
71
|
+
model = llama_load_model_from_file(fname.c_str(), mparams);
|
|
72
|
+
|
|
73
|
+
if (model == NULL) {
|
|
74
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
75
|
+
return 1;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
auto cparams = llama_context_default_params();
|
|
79
|
+
|
|
80
|
+
ctx = llama_new_context_with_model(model, cparams);
|
|
81
|
+
|
|
82
|
+
if (ctx == NULL) {
|
|
83
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
84
|
+
llama_free_model(model);
|
|
85
|
+
return 1;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
|
|
90
|
+
fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
|
|
91
|
+
llama_free_model(model);
|
|
92
|
+
llama_free(ctx);
|
|
93
|
+
return 2;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
#ifdef _WIN32
|
|
97
|
+
// We need this for unicode console support
|
|
98
|
+
console::init(false, false);
|
|
99
|
+
atexit([]() { console::cleanup(); });
|
|
100
|
+
#endif
|
|
101
|
+
|
|
102
|
+
bool success = true;
|
|
103
|
+
|
|
104
|
+
for (const auto & test_kv : k_tests()) {
|
|
105
|
+
const std::vector<llama_token> res_bos = llama_tokenize(ctx, test_kv.first, true);
|
|
106
|
+
const std::vector<llama_token> res_nobos = llama_tokenize(ctx, test_kv.first, false);
|
|
107
|
+
|
|
108
|
+
printf("\n");
|
|
109
|
+
printf("src: '%s'\n", test_kv.first.c_str());
|
|
110
|
+
printf("res: '%s'\n", llama_detokenize_spm(ctx, res_bos).c_str());
|
|
111
|
+
printf("tok: ");
|
|
112
|
+
for (const auto & tok : res_bos) {
|
|
113
|
+
printf("%d ", tok);
|
|
114
|
+
}
|
|
115
|
+
printf("\n");
|
|
116
|
+
|
|
117
|
+
bool correct = res_nobos.size() == test_kv.second.size() && res_bos.size() == res_nobos.size() + 1 && res_bos[0] == 1;
|
|
118
|
+
|
|
119
|
+
for (int i = 0; i < (int) res_nobos.size() && correct; ++i) {
|
|
120
|
+
if (test_kv.second[i] != res_bos[i + 1]) {
|
|
121
|
+
correct = false;
|
|
122
|
+
}
|
|
123
|
+
if (test_kv.second[i] != res_nobos[i]) {
|
|
124
|
+
correct = false;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (!correct) {
|
|
129
|
+
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
|
130
|
+
fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
|
|
131
|
+
llama_detokenize_spm(ctx, res_nobos).c_str(),
|
|
132
|
+
llama_detokenize_spm(ctx, test_kv.second).c_str());
|
|
133
|
+
fprintf(stderr, "%s : expected tokens: ", __func__);
|
|
134
|
+
for (const auto & t : test_kv.second) {
|
|
135
|
+
fprintf(stderr, "%6d, ", t);
|
|
136
|
+
}
|
|
137
|
+
fprintf(stderr, "\n");
|
|
138
|
+
fprintf(stderr, "%s : got tokens: ", __func__);
|
|
139
|
+
for (const auto & t : res_nobos) {
|
|
140
|
+
fprintf(stderr, "%6d, ", t);
|
|
141
|
+
}
|
|
142
|
+
fprintf(stderr, "\n");
|
|
143
|
+
|
|
144
|
+
success = false;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (!fname_text.empty()) {
|
|
149
|
+
fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
|
|
150
|
+
|
|
151
|
+
std::string text;
|
|
152
|
+
{
|
|
153
|
+
std::ifstream ifs(fname_text);
|
|
154
|
+
if (!ifs) {
|
|
155
|
+
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
|
|
156
|
+
return 1;
|
|
157
|
+
}
|
|
158
|
+
text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
|
|
162
|
+
|
|
163
|
+
const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
|
|
164
|
+
|
|
165
|
+
fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
|
|
166
|
+
|
|
167
|
+
{
|
|
168
|
+
const std::string fname_out = fname_text + ".tokcpp";
|
|
169
|
+
|
|
170
|
+
std::ofstream ofs(fname_out);
|
|
171
|
+
if (!ofs) {
|
|
172
|
+
fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
|
|
173
|
+
return 1;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
for (const auto & tok : res) {
|
|
177
|
+
ofs << tok << " '" << llama_detokenize_spm(ctx, std::vector<int>{tok}) << "'" << std::endl;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
llama_free_model(model);
|
|
185
|
+
llama_free(ctx);
|
|
186
|
+
|
|
187
|
+
llama_backend_free();
|
|
188
|
+
|
|
189
|
+
return success ? 0 : 3;
|
|
190
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#include "llama.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include "unicode.h"
|
|
4
|
+
#include "console.h"
|
|
5
|
+
|
|
6
|
+
#include <cassert>
|
|
7
|
+
#include <codecvt>
|
|
8
|
+
#include <cstdio>
|
|
9
|
+
#include <cstring>
|
|
10
|
+
#include <locale>
|
|
11
|
+
#include <string>
|
|
12
|
+
#include <thread>
|
|
13
|
+
#include <vector>
|
|
14
|
+
|
|
15
|
+
int main(int argc, char **argv) {
|
|
16
|
+
if (argc < 2) {
|
|
17
|
+
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
|
18
|
+
return 1;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const std::string fname = argv[1];
|
|
22
|
+
|
|
23
|
+
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
24
|
+
|
|
25
|
+
llama_model * model;
|
|
26
|
+
llama_context * ctx;
|
|
27
|
+
|
|
28
|
+
llama_backend_init();
|
|
29
|
+
|
|
30
|
+
// load the vocab
|
|
31
|
+
{
|
|
32
|
+
auto mparams = llama_model_default_params();
|
|
33
|
+
|
|
34
|
+
mparams.vocab_only = true;
|
|
35
|
+
|
|
36
|
+
model = llama_load_model_from_file(fname.c_str(), mparams);
|
|
37
|
+
|
|
38
|
+
if (model == NULL) {
|
|
39
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
40
|
+
return 1;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
auto cparams = llama_context_default_params();
|
|
44
|
+
|
|
45
|
+
ctx = llama_new_context_with_model(model, cparams);
|
|
46
|
+
|
|
47
|
+
if (ctx == NULL) {
|
|
48
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
49
|
+
llama_free_model(model);
|
|
50
|
+
return 1;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
|
|
55
|
+
|
|
56
|
+
#ifdef _WIN32
|
|
57
|
+
// We need this for unicode console support
|
|
58
|
+
console::init(false, false);
|
|
59
|
+
atexit([]() { console::cleanup(); });
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
const int n_vocab = llama_n_vocab(model);
|
|
63
|
+
|
|
64
|
+
for (int i = 0; i < n_vocab; ++i) {
|
|
65
|
+
std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
|
|
66
|
+
try {
|
|
67
|
+
auto cps = unicode_cpts_from_utf8(str);
|
|
68
|
+
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
|
69
|
+
std::string check = llama_detokenize_bpe(ctx, tokens);
|
|
70
|
+
if (check != str) {
|
|
71
|
+
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
|
|
72
|
+
__func__, i, str.c_str(), str.length(), check.c_str(), check.length());
|
|
73
|
+
return 2;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
catch (const std::invalid_argument &) {
|
|
77
|
+
//fprintf(stderr, "%s : info: utf8 conversion %d '%s'\n", __func__, i, str.c_str());
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// unicode
|
|
82
|
+
{
|
|
83
|
+
const int nthread = std::thread::hardware_concurrency();
|
|
84
|
+
|
|
85
|
+
std::vector<std::thread> threads(nthread);
|
|
86
|
+
|
|
87
|
+
for (int i = 0; i < nthread; ++i) {
|
|
88
|
+
threads[i] = std::thread([i, nthread, ctx]() {
|
|
89
|
+
for (uint32_t cp = i; cp < 0x0010ffff; cp += nthread) {
|
|
90
|
+
if (!( // NOLINT
|
|
91
|
+
(cp < 0x03 || cp > 0x05) && cp != 0x0b && cp != 0x11 &&
|
|
92
|
+
(cp < 0x13 || cp > 0x17) && cp != 0x19 &&
|
|
93
|
+
(cp < 0x1c || cp > 0x1e) &&
|
|
94
|
+
(cp < 0xd800 || cp > 0xdfff) &&
|
|
95
|
+
(cp < 0x00040000 || cp >= 0x000e0000)
|
|
96
|
+
)) {
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
std::string str = unicode_cpt_to_utf8(cp);
|
|
101
|
+
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
|
102
|
+
std::string check = llama_detokenize_bpe(ctx, tokens);
|
|
103
|
+
if (cp != 9601 && str != check) {
|
|
104
|
+
fprintf(stderr, "error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
|
|
105
|
+
cp, check.c_str(), check.length(), str.c_str(), str.length());
|
|
106
|
+
std::exit(3);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
for (auto & t : threads) {
|
|
113
|
+
t.join();
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
llama_free_model(model);
|
|
118
|
+
llama_free(ctx);
|
|
119
|
+
|
|
120
|
+
llama_backend_free();
|
|
121
|
+
|
|
122
|
+
return 0;
|
|
123
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
#include "llama.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include "unicode.h"
|
|
4
|
+
#include "console.h"
|
|
5
|
+
|
|
6
|
+
#include <cassert>
|
|
7
|
+
#include <codecvt>
|
|
8
|
+
#include <cstdio>
|
|
9
|
+
#include <cstring>
|
|
10
|
+
#include <locale>
|
|
11
|
+
#include <string>
|
|
12
|
+
#include <thread>
|
|
13
|
+
#include <vector>
|
|
14
|
+
|
|
15
|
+
int main(int argc, char **argv) {
|
|
16
|
+
if (argc < 2) {
|
|
17
|
+
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
|
18
|
+
return 1;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const std::string fname = argv[1];
|
|
22
|
+
|
|
23
|
+
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
|
24
|
+
|
|
25
|
+
llama_model * model;
|
|
26
|
+
llama_context * ctx;
|
|
27
|
+
|
|
28
|
+
llama_backend_init();
|
|
29
|
+
|
|
30
|
+
// load the vocab
|
|
31
|
+
{
|
|
32
|
+
auto mparams = llama_model_default_params();
|
|
33
|
+
|
|
34
|
+
mparams.vocab_only = true;
|
|
35
|
+
|
|
36
|
+
model = llama_load_model_from_file(fname.c_str(), mparams);
|
|
37
|
+
|
|
38
|
+
if (model == NULL) {
|
|
39
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
40
|
+
return 1;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
auto cparams = llama_context_default_params();
|
|
44
|
+
|
|
45
|
+
ctx = llama_new_context_with_model(model, cparams);
|
|
46
|
+
|
|
47
|
+
if (ctx == NULL) {
|
|
48
|
+
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
|
49
|
+
llama_free_model(model);
|
|
50
|
+
return 1;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
|
55
|
+
|
|
56
|
+
#ifdef _WIN32
|
|
57
|
+
// We need this for unicode console support
|
|
58
|
+
console::init(false, false);
|
|
59
|
+
atexit([]() { console::cleanup(); });
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
const int n_vocab = llama_n_vocab(model);
|
|
63
|
+
|
|
64
|
+
for (int i = 0; i < n_vocab; ++i) {
|
|
65
|
+
std::string str = llama_detokenize_spm(ctx, std::vector<int>(1, i));
|
|
66
|
+
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
|
67
|
+
std::string check = llama_detokenize_spm(ctx, tokens);
|
|
68
|
+
if (check != str) {
|
|
69
|
+
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
|
|
70
|
+
__func__, i, str.c_str(), str.length(), check.c_str(), check.length());
|
|
71
|
+
return 2;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// unicode
|
|
76
|
+
{
|
|
77
|
+
const int nthread = std::thread::hardware_concurrency();
|
|
78
|
+
|
|
79
|
+
std::vector<std::thread> threads(nthread);
|
|
80
|
+
|
|
81
|
+
for (int i = 0; i < nthread; ++i) {
|
|
82
|
+
threads[i] = std::thread([i, nthread, ctx]() {
|
|
83
|
+
for (uint32_t cp = i; cp < 0x0010ffff; cp += nthread) {
|
|
84
|
+
if (cp >= 0xd800 && cp <= 0xdfff) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
std::string str = unicode_cpt_to_utf8(cp);
|
|
89
|
+
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
|
90
|
+
std::string check = llama_detokenize_spm(ctx, tokens);
|
|
91
|
+
if (cp != 9601 && str != check) {
|
|
92
|
+
fprintf(stderr, "error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
|
|
93
|
+
cp, check.c_str(), check.length(), str.c_str(), str.length());
|
|
94
|
+
std::exit(3);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
for (auto & t : threads) {
|
|
101
|
+
t.join();
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
llama_free_model(model);
|
|
106
|
+
llama_free(ctx);
|
|
107
|
+
|
|
108
|
+
llama_backend_free();
|
|
109
|
+
|
|
110
|
+
return 0;
|
|
111
|
+
}
|