@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,187 @@
1
+ #include "llama.h"
2
+ #include "common.h"
3
+ #include "console.h"
4
+
5
+ #include <cstdio>
6
+ #include <string>
7
+ #include <map>
8
+ #include <vector>
9
+ #include <fstream>
10
+
11
+ // generate using test-tokenizer-0-falcon.py
12
+ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
13
+ static std::map<std::string, std::vector<llama_token>> _k_tests = {
14
+ { "" , { }, },
15
+ { " " , { 204, }, },
16
+ { " " , { 258, }, },
17
+ { " " , { 466, }, },
18
+ { "\t" , { 192, }, },
19
+ { "\n" , { 193, }, },
20
+ { "\t\n" , { 19125, }, },
21
+ { "Hello world" , { 9856, 1079, }, },
22
+ { " Hello world" , { 23090, 1079, }, },
23
+ { "Hello World" , { 9856, 2889, }, },
24
+ { " Hello World" , { 23090, 2889, }, },
25
+ { " Hello World!" , { 23090, 2889, 12, }, },
26
+ { "Hello, world!" , { 9856, 23, 1079, 12, }, },
27
+ { " Hello, world!" , { 23090, 23, 1079, 12, }, },
28
+ { " this is 🦙.cpp" , { 414, 304, 3346, 111, 231, 25, 29247, }, },
29
+ { "w048 7tuijk dsdfhu" , { 98, 55866, 204, 34, 16682, 7149, 36190, 6869, 11481, }, },
30
+ { "нещо на Български" , { 150, 133, 6207, 151, 215, 150, 134, 5052, 133, 6279, 5052, 223, 151, 216, 49679, 123, 53110, 47043, 7795, }, },
31
+ { "កាន់តែពិសេសអាចខលចេញ" , { 38154, 206, 38154, 126, 38154, 225, 167, 237, 217, 38154, 221, 167, 237, 208, 38154, 228, 38154, 127, 38154, 237, 167, 237, 207, 38154, 237, 38154, 107, 38154, 126, 38154, 211, 38154, 207, 38154, 233, 38154, 211, 167, 237, 207, 38154, 215, }, },
32
+ { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 2571, 232, 206, 204, 19, 11003, 20, 8196, 126, 283, 219, 48778, 116, 13392, 204, 19, 51831, 732, 63209, 1741, 7955, 522, 20, 22438, 211, 204, 19, 7927, 53360, 325, 504, 701, 946, 10930, 20, }, },
33
+ { "Hello" , { 9856, }, },
34
+ { " Hello" , { 23090, }, },
35
+ { " Hello" , { 204, 23090, }, },
36
+ { " Hello" , { 258, 23090, }, },
37
+ { " Hello" , { 466, 23090, }, },
38
+ { " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
39
+ { "\n =" , { 1212, 40, }, },
40
+ { "' era" , { 18, 4932, }, },
41
+ };
42
+
43
+ return _k_tests;
44
+ }
45
+
46
+ int main(int argc, char **argv) {
47
+ if (argc < 2) {
48
+ fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
49
+ return 1;
50
+ }
51
+
52
+ const std::string fname = argv[1];
53
+
54
+ std::string fname_text;
55
+ if (argc > 2) {
56
+ fname_text = argv[2];
57
+ }
58
+
59
+ fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
60
+
61
+ llama_model * model;
62
+ llama_context * ctx;
63
+
64
+ llama_backend_init();
65
+
66
+ // load the vocab
67
+ {
68
+ auto mparams = llama_model_default_params();
69
+
70
+ mparams.vocab_only = true;
71
+
72
+ model = llama_load_model_from_file(fname.c_str(), mparams);
73
+
74
+ if (model == NULL) {
75
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
76
+ return 1;
77
+ }
78
+
79
+ auto cparams = llama_context_default_params();
80
+
81
+ ctx = llama_new_context_with_model(model, cparams);
82
+
83
+ if (ctx == NULL) {
84
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
85
+ llama_free_model(model);
86
+ return 1;
87
+ }
88
+ }
89
+
90
+ if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
91
+ fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
92
+ llama_free_model(model);
93
+ llama_free(ctx);
94
+ return 2;
95
+ }
96
+
97
+ #ifdef _WIN32
98
+ // We need this for unicode console support
99
+ console::init(false, false);
100
+ atexit([]() { console::cleanup(); });
101
+ #endif
102
+
103
+ bool success = true;
104
+
105
+ for (const auto & test_kv : k_tests()) {
106
+ const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, false);
107
+
108
+ printf("\n");
109
+ printf("src: '%s'\n", test_kv.first.c_str());
110
+ printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str());
111
+ printf("tok: ");
112
+ for (const auto & tok : res) {
113
+ printf("%d ", tok);
114
+ }
115
+ printf("\n");
116
+
117
+ bool correct = res.size() == test_kv.second.size();
118
+
119
+ for (int i = 0; i < (int) res.size() && correct; ++i) {
120
+ if (test_kv.second[i] != res[i]) {
121
+ correct = false;
122
+ }
123
+ }
124
+
125
+ if (!correct) {
126
+ fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
127
+ fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
128
+ llama_detokenize_bpe(ctx, res).c_str(),
129
+ llama_detokenize_bpe(ctx, test_kv.second).c_str());
130
+ fprintf(stderr, "%s : expected tokens: ", __func__);
131
+ for (const auto & t : test_kv.second) {
132
+ fprintf(stderr, "%6d, ", t);
133
+ }
134
+ fprintf(stderr, "\n");
135
+ fprintf(stderr, "%s : got tokens: ", __func__);
136
+ for (const auto & t : res) {
137
+ fprintf(stderr, "%6d, ", t);
138
+ }
139
+ fprintf(stderr, "\n");
140
+
141
+ success = false;
142
+ }
143
+ }
144
+
145
+ if (!fname_text.empty()) {
146
+ fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
147
+
148
+ std::string text;
149
+ {
150
+ std::ifstream ifs(fname_text);
151
+ if (!ifs) {
152
+ fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
153
+ return 1;
154
+ }
155
+ text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
156
+ }
157
+
158
+ fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
159
+
160
+ const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
161
+
162
+ fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
163
+
164
+ {
165
+ const std::string fname_out = fname_text + ".tokcpp";
166
+
167
+ std::ofstream ofs(fname_out);
168
+ if (!ofs) {
169
+ fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
170
+ return 1;
171
+ }
172
+
173
+ for (const auto & tok : res) {
174
+ ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
175
+ }
176
+ }
177
+
178
+ fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
179
+ }
180
+
181
+ llama_free_model(model);
182
+ llama_free(ctx);
183
+
184
+ llama_backend_free();
185
+
186
+ return success ? 0 : 3;
187
+ }
@@ -0,0 +1,190 @@
1
+ #include "llama.h"
2
+ #include "common.h"
3
+ #include "console.h"
4
+
5
+ #include <cstdio>
6
+ #include <string>
7
+ #include <map>
8
+ #include <vector>
9
+ #include <fstream>
10
+
11
+ // generate using test-tokenizer-0-llama.py
12
+ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
13
+ static std::map<std::string, std::vector<llama_token>> _k_tests = {
14
+ { "" , { }, },
15
+ { " " , { 259, }, },
16
+ { " " , { 1678, }, },
17
+ { " " , { 268, }, },
18
+ { "\t" , { 29871, 12, }, },
19
+ { "\n" , { 29871, 13, }, },
20
+ { "\t\n" , { 29871, 12, 13, }, },
21
+ { "Hello world" , { 15043, 3186, }, },
22
+ { " Hello world" , { 29871, 15043, 3186, }, },
23
+ { "Hello World" , { 15043, 2787, }, },
24
+ { " Hello World" , { 29871, 15043, 2787, }, },
25
+ { " Hello World!" , { 29871, 15043, 2787, 29991, }, },
26
+ { "Hello, world!" , { 15043, 29892, 3186, 29991, }, },
27
+ { " Hello, world!" , { 29871, 15043, 29892, 3186, 29991, }, },
28
+ { " this is 🦙.cpp" , { 29871, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
29
+ { "w048 7tuijk dsdfhu" , { 281, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
30
+ { "нещо на Български" , { 1538, 4851, 665, 1386, 29713, 1305, }, },
31
+ { "កាន់តែពិសេសអាចខលចេញ" , { 29871, 31849, 31324, 31934, 228, 162, 142, 228, 161, 146, 228, 162, 133, 228, 161, 153, 228, 161, 186, 31708, 228, 162, 132, 31708, 228, 161, 165, 31324, 228, 161, 136, 228, 161, 132, 228, 161, 158, 228, 161, 136, 228, 162, 132, 228, 161, 140, }, },
32
+ { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", { 29871, 243, 162, 157, 131, 313, 8945, 29897, 29871, 243, 162, 155, 185, 30722, 243, 162, 143, 174, 30598, 313, 20787, 953, 3848, 275, 16125, 630, 29897, 29871, 31681, 313, 6194, 953, 29877, 2397, 393, 756, 967, 1914, 5993, 29897, }, },
33
+ { "Hello" , { 15043, }, },
34
+ { " Hello" , { 29871, 15043, }, },
35
+ { " Hello" , { 259, 15043, }, },
36
+ { " Hello" , { 1678, 15043, }, },
37
+ { " Hello" , { 268, 15043, }, },
38
+ { " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
39
+ { " (" , { 29871, 313, }, },
40
+ };
41
+
42
+ return _k_tests;
43
+ }
44
+
45
+ int main(int argc, char **argv) {
46
+ if (argc < 2) {
47
+ fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
48
+ return 1;
49
+ }
50
+
51
+ const std::string fname = argv[1];
52
+
53
+ std::string fname_text;
54
+ if (argc > 2) {
55
+ fname_text = argv[2];
56
+ }
57
+
58
+ fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
59
+
60
+ llama_model * model;
61
+ llama_context * ctx;
62
+
63
+ llama_backend_init();
64
+
65
+ // load the vocab
66
+ {
67
+ auto mparams = llama_model_default_params();
68
+
69
+ mparams.vocab_only = true;
70
+
71
+ model = llama_load_model_from_file(fname.c_str(), mparams);
72
+
73
+ if (model == NULL) {
74
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
75
+ return 1;
76
+ }
77
+
78
+ auto cparams = llama_context_default_params();
79
+
80
+ ctx = llama_new_context_with_model(model, cparams);
81
+
82
+ if (ctx == NULL) {
83
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
84
+ llama_free_model(model);
85
+ return 1;
86
+ }
87
+ }
88
+
89
+ if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
90
+ fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
91
+ llama_free_model(model);
92
+ llama_free(ctx);
93
+ return 2;
94
+ }
95
+
96
+ #ifdef _WIN32
97
+ // We need this for unicode console support
98
+ console::init(false, false);
99
+ atexit([]() { console::cleanup(); });
100
+ #endif
101
+
102
+ bool success = true;
103
+
104
+ for (const auto & test_kv : k_tests()) {
105
+ const std::vector<llama_token> res_bos = llama_tokenize(ctx, test_kv.first, true);
106
+ const std::vector<llama_token> res_nobos = llama_tokenize(ctx, test_kv.first, false);
107
+
108
+ printf("\n");
109
+ printf("src: '%s'\n", test_kv.first.c_str());
110
+ printf("res: '%s'\n", llama_detokenize_spm(ctx, res_bos).c_str());
111
+ printf("tok: ");
112
+ for (const auto & tok : res_bos) {
113
+ printf("%d ", tok);
114
+ }
115
+ printf("\n");
116
+
117
+ bool correct = res_nobos.size() == test_kv.second.size() && res_bos.size() == res_nobos.size() + 1 && res_bos[0] == 1;
118
+
119
+ for (int i = 0; i < (int) res_nobos.size() && correct; ++i) {
120
+ if (test_kv.second[i] != res_bos[i + 1]) {
121
+ correct = false;
122
+ }
123
+ if (test_kv.second[i] != res_nobos[i]) {
124
+ correct = false;
125
+ }
126
+ }
127
+
128
+ if (!correct) {
129
+ fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
130
+ fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
131
+ llama_detokenize_spm(ctx, res_nobos).c_str(),
132
+ llama_detokenize_spm(ctx, test_kv.second).c_str());
133
+ fprintf(stderr, "%s : expected tokens: ", __func__);
134
+ for (const auto & t : test_kv.second) {
135
+ fprintf(stderr, "%6d, ", t);
136
+ }
137
+ fprintf(stderr, "\n");
138
+ fprintf(stderr, "%s : got tokens: ", __func__);
139
+ for (const auto & t : res_nobos) {
140
+ fprintf(stderr, "%6d, ", t);
141
+ }
142
+ fprintf(stderr, "\n");
143
+
144
+ success = false;
145
+ }
146
+ }
147
+
148
+ if (!fname_text.empty()) {
149
+ fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
150
+
151
+ std::string text;
152
+ {
153
+ std::ifstream ifs(fname_text);
154
+ if (!ifs) {
155
+ fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
156
+ return 1;
157
+ }
158
+ text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
159
+ }
160
+
161
+ fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
162
+
163
+ const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
164
+
165
+ fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
166
+
167
+ {
168
+ const std::string fname_out = fname_text + ".tokcpp";
169
+
170
+ std::ofstream ofs(fname_out);
171
+ if (!ofs) {
172
+ fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
173
+ return 1;
174
+ }
175
+
176
+ for (const auto & tok : res) {
177
+ ofs << tok << " '" << llama_detokenize_spm(ctx, std::vector<int>{tok}) << "'" << std::endl;
178
+ }
179
+ }
180
+
181
+ fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
182
+ }
183
+
184
+ llama_free_model(model);
185
+ llama_free(ctx);
186
+
187
+ llama_backend_free();
188
+
189
+ return success ? 0 : 3;
190
+ }
@@ -0,0 +1,123 @@
1
+ #include "llama.h"
2
+ #include "common.h"
3
+ #include "unicode.h"
4
+ #include "console.h"
5
+
6
+ #include <cassert>
7
+ #include <codecvt>
8
+ #include <cstdio>
9
+ #include <cstring>
10
+ #include <locale>
11
+ #include <string>
12
+ #include <thread>
13
+ #include <vector>
14
+
15
+ int main(int argc, char **argv) {
16
+ if (argc < 2) {
17
+ fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
18
+ return 1;
19
+ }
20
+
21
+ const std::string fname = argv[1];
22
+
23
+ fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
24
+
25
+ llama_model * model;
26
+ llama_context * ctx;
27
+
28
+ llama_backend_init();
29
+
30
+ // load the vocab
31
+ {
32
+ auto mparams = llama_model_default_params();
33
+
34
+ mparams.vocab_only = true;
35
+
36
+ model = llama_load_model_from_file(fname.c_str(), mparams);
37
+
38
+ if (model == NULL) {
39
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
40
+ return 1;
41
+ }
42
+
43
+ auto cparams = llama_context_default_params();
44
+
45
+ ctx = llama_new_context_with_model(model, cparams);
46
+
47
+ if (ctx == NULL) {
48
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
49
+ llama_free_model(model);
50
+ return 1;
51
+ }
52
+ }
53
+
54
+ GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
55
+
56
+ #ifdef _WIN32
57
+ // We need this for unicode console support
58
+ console::init(false, false);
59
+ atexit([]() { console::cleanup(); });
60
+ #endif
61
+
62
+ const int n_vocab = llama_n_vocab(model);
63
+
64
+ for (int i = 0; i < n_vocab; ++i) {
65
+ std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
66
+ try {
67
+ auto cps = unicode_cpts_from_utf8(str);
68
+ std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
69
+ std::string check = llama_detokenize_bpe(ctx, tokens);
70
+ if (check != str) {
71
+ fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
72
+ __func__, i, str.c_str(), str.length(), check.c_str(), check.length());
73
+ return 2;
74
+ }
75
+ }
76
+ catch (const std::invalid_argument &) {
77
+ //fprintf(stderr, "%s : info: utf8 conversion %d '%s'\n", __func__, i, str.c_str());
78
+ }
79
+ }
80
+
81
+ // unicode
82
+ {
83
+ const int nthread = std::thread::hardware_concurrency();
84
+
85
+ std::vector<std::thread> threads(nthread);
86
+
87
+ for (int i = 0; i < nthread; ++i) {
88
+ threads[i] = std::thread([i, nthread, ctx]() {
89
+ for (uint32_t cp = i; cp < 0x0010ffff; cp += nthread) {
90
+ if (!( // NOLINT
91
+ (cp < 0x03 || cp > 0x05) && cp != 0x0b && cp != 0x11 &&
92
+ (cp < 0x13 || cp > 0x17) && cp != 0x19 &&
93
+ (cp < 0x1c || cp > 0x1e) &&
94
+ (cp < 0xd800 || cp > 0xdfff) &&
95
+ (cp < 0x00040000 || cp >= 0x000e0000)
96
+ )) {
97
+ continue;
98
+ }
99
+
100
+ std::string str = unicode_cpt_to_utf8(cp);
101
+ std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
102
+ std::string check = llama_detokenize_bpe(ctx, tokens);
103
+ if (cp != 9601 && str != check) {
104
+ fprintf(stderr, "error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
105
+ cp, check.c_str(), check.length(), str.c_str(), str.length());
106
+ std::exit(3);
107
+ }
108
+ }
109
+ });
110
+ }
111
+
112
+ for (auto & t : threads) {
113
+ t.join();
114
+ }
115
+ }
116
+
117
+ llama_free_model(model);
118
+ llama_free(ctx);
119
+
120
+ llama_backend_free();
121
+
122
+ return 0;
123
+ }
@@ -0,0 +1,111 @@
1
+ #include "llama.h"
2
+ #include "common.h"
3
+ #include "unicode.h"
4
+ #include "console.h"
5
+
6
+ #include <cassert>
7
+ #include <codecvt>
8
+ #include <cstdio>
9
+ #include <cstring>
10
+ #include <locale>
11
+ #include <string>
12
+ #include <thread>
13
+ #include <vector>
14
+
15
+ int main(int argc, char **argv) {
16
+ if (argc < 2) {
17
+ fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
18
+ return 1;
19
+ }
20
+
21
+ const std::string fname = argv[1];
22
+
23
+ fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
24
+
25
+ llama_model * model;
26
+ llama_context * ctx;
27
+
28
+ llama_backend_init();
29
+
30
+ // load the vocab
31
+ {
32
+ auto mparams = llama_model_default_params();
33
+
34
+ mparams.vocab_only = true;
35
+
36
+ model = llama_load_model_from_file(fname.c_str(), mparams);
37
+
38
+ if (model == NULL) {
39
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
40
+ return 1;
41
+ }
42
+
43
+ auto cparams = llama_context_default_params();
44
+
45
+ ctx = llama_new_context_with_model(model, cparams);
46
+
47
+ if (ctx == NULL) {
48
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
49
+ llama_free_model(model);
50
+ return 1;
51
+ }
52
+ }
53
+
54
+ GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
55
+
56
+ #ifdef _WIN32
57
+ // We need this for unicode console support
58
+ console::init(false, false);
59
+ atexit([]() { console::cleanup(); });
60
+ #endif
61
+
62
+ const int n_vocab = llama_n_vocab(model);
63
+
64
+ for (int i = 0; i < n_vocab; ++i) {
65
+ std::string str = llama_detokenize_spm(ctx, std::vector<int>(1, i));
66
+ std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
67
+ std::string check = llama_detokenize_spm(ctx, tokens);
68
+ if (check != str) {
69
+ fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
70
+ __func__, i, str.c_str(), str.length(), check.c_str(), check.length());
71
+ return 2;
72
+ }
73
+ }
74
+
75
+ // unicode
76
+ {
77
+ const int nthread = std::thread::hardware_concurrency();
78
+
79
+ std::vector<std::thread> threads(nthread);
80
+
81
+ for (int i = 0; i < nthread; ++i) {
82
+ threads[i] = std::thread([i, nthread, ctx]() {
83
+ for (uint32_t cp = i; cp < 0x0010ffff; cp += nthread) {
84
+ if (cp >= 0xd800 && cp <= 0xdfff) {
85
+ continue;
86
+ }
87
+
88
+ std::string str = unicode_cpt_to_utf8(cp);
89
+ std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
90
+ std::string check = llama_detokenize_spm(ctx, tokens);
91
+ if (cp != 9601 && str != check) {
92
+ fprintf(stderr, "error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
93
+ cp, check.c_str(), check.length(), str.c_str(), str.length());
94
+ std::exit(3);
95
+ }
96
+ }
97
+ });
98
+ }
99
+
100
+ for (auto & t : threads) {
101
+ t.join();
102
+ }
103
+ }
104
+
105
+ llama_free_model(model);
106
+ llama_free(ctx);
107
+
108
+ llama_backend_free();
109
+
110
+ return 0;
111
+ }