@fugood/llama.node 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/llama.cpp/CMakeLists.txt +0 -1
- package/src/llama.cpp/common/CMakeLists.txt +4 -5
- package/src/llama.cpp/common/arg.cpp +44 -0
- package/src/llama.cpp/common/common.cpp +22 -6
- package/src/llama.cpp/common/common.h +15 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +10 -2
- package/src/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/src/llama.cpp/ggml/include/ggml.h +104 -10
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +12 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +749 -163
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +12 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +88 -9
- package/src/llama.cpp/include/llama.h +13 -47
- package/src/llama.cpp/src/llama-arch.cpp +298 -3
- package/src/llama.cpp/src/llama-arch.h +22 -1
- package/src/llama.cpp/src/llama-batch.cpp +103 -71
- package/src/llama.cpp/src/llama-batch.h +31 -18
- package/src/llama.cpp/src/llama-chat.cpp +59 -1
- package/src/llama.cpp/src/llama-chat.h +3 -0
- package/src/llama.cpp/src/llama-context.cpp +134 -95
- package/src/llama.cpp/src/llama-context.h +13 -16
- package/src/llama.cpp/src/llama-cparams.h +3 -2
- package/src/llama.cpp/src/llama-graph.cpp +279 -180
- package/src/llama.cpp/src/llama-graph.h +183 -122
- package/src/llama.cpp/src/llama-hparams.cpp +47 -1
- package/src/llama.cpp/src/llama-hparams.h +12 -1
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/src/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/src/llama.cpp/src/llama-kv-cells.h +62 -10
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +21 -11
- package/src/llama.cpp/src/llama-memory.cpp +17 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +3373 -743
- package/src/llama.cpp/src/llama-model.h +20 -4
- package/src/llama.cpp/src/llama-quant.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +376 -10
- package/src/llama.cpp/src/llama-vocab.h +43 -0
- package/src/llama.cpp/src/unicode.cpp +207 -0
- package/src/llama.cpp/src/unicode.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
|
@@ -32,17 +32,21 @@ enum llm_type {
|
|
|
32
32
|
LLM_TYPE_190M,
|
|
33
33
|
LLM_TYPE_220M,
|
|
34
34
|
LLM_TYPE_250M,
|
|
35
|
+
LLM_TYPE_256M,
|
|
35
36
|
LLM_TYPE_270M,
|
|
36
37
|
LLM_TYPE_335M,
|
|
38
|
+
LLM_TYPE_350M,
|
|
37
39
|
LLM_TYPE_410M,
|
|
38
40
|
LLM_TYPE_450M,
|
|
39
41
|
LLM_TYPE_475M,
|
|
42
|
+
LLM_TYPE_700M,
|
|
40
43
|
LLM_TYPE_770M,
|
|
41
44
|
LLM_TYPE_780M,
|
|
42
45
|
LLM_TYPE_0_3B,
|
|
43
46
|
LLM_TYPE_0_5B,
|
|
44
47
|
LLM_TYPE_0_6B,
|
|
45
48
|
LLM_TYPE_1B,
|
|
49
|
+
LLM_TYPE_1_2B,
|
|
46
50
|
LLM_TYPE_1_3B,
|
|
47
51
|
LLM_TYPE_1_4B,
|
|
48
52
|
LLM_TYPE_1_5B,
|
|
@@ -94,8 +98,11 @@ enum llm_type {
|
|
|
94
98
|
LLM_TYPE_57B_A14B,
|
|
95
99
|
LLM_TYPE_17B_16E, // llama4 Scout
|
|
96
100
|
LLM_TYPE_17B_128E, // llama4 Maverick
|
|
101
|
+
LLM_TYPE_A13B,
|
|
102
|
+
LLM_TYPE_21B_A3B, // Ernie MoE small
|
|
97
103
|
LLM_TYPE_30B_A3B,
|
|
98
104
|
LLM_TYPE_235B_A22B,
|
|
105
|
+
LLM_TYPE_300B_A47B, // Ernie MoE big
|
|
99
106
|
LLM_TYPE_E2B,
|
|
100
107
|
LLM_TYPE_E4B,
|
|
101
108
|
};
|
|
@@ -153,6 +160,12 @@ struct llama_layer_convnext {
|
|
|
153
160
|
struct ggml_tensor * gamma = nullptr;
|
|
154
161
|
};
|
|
155
162
|
|
|
163
|
+
struct llama_layer_shortconv {
|
|
164
|
+
struct ggml_tensor * in_proj = nullptr;
|
|
165
|
+
struct ggml_tensor * conv = nullptr;
|
|
166
|
+
struct ggml_tensor * out_proj = nullptr;
|
|
167
|
+
};
|
|
168
|
+
|
|
156
169
|
struct llama_layer {
|
|
157
170
|
// normalization
|
|
158
171
|
struct ggml_tensor * attn_norm = nullptr;
|
|
@@ -172,6 +185,10 @@ struct llama_layer {
|
|
|
172
185
|
struct ggml_tensor * ffn_sub_norm = nullptr;
|
|
173
186
|
struct ggml_tensor * attn_norm_cross = nullptr;
|
|
174
187
|
struct ggml_tensor * attn_norm_enc = nullptr;
|
|
188
|
+
struct ggml_tensor * ssm_norm = nullptr;
|
|
189
|
+
struct ggml_tensor * ssm_dt_norm = nullptr;
|
|
190
|
+
struct ggml_tensor * ssm_b_norm = nullptr;
|
|
191
|
+
struct ggml_tensor * ssm_c_norm = nullptr;
|
|
175
192
|
|
|
176
193
|
// attention
|
|
177
194
|
struct ggml_tensor * wq = nullptr;
|
|
@@ -335,6 +352,8 @@ struct llama_layer {
|
|
|
335
352
|
struct llama_layer_posnet posnet;
|
|
336
353
|
|
|
337
354
|
struct llama_layer_convnext convnext;
|
|
355
|
+
|
|
356
|
+
struct llama_layer_shortconv shortconv;
|
|
338
357
|
};
|
|
339
358
|
|
|
340
359
|
struct llama_model {
|
|
@@ -435,10 +454,7 @@ struct llama_model {
|
|
|
435
454
|
llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const;
|
|
436
455
|
|
|
437
456
|
// TODO: move this to new llm_arch_model_i interface
|
|
438
|
-
|
|
439
|
-
const llm_graph_params & params,
|
|
440
|
-
ggml_cgraph * gf,
|
|
441
|
-
llm_graph_type type) const;
|
|
457
|
+
ggml_cgraph * build_graph(const llm_graph_params & params) const;
|
|
442
458
|
|
|
443
459
|
private:
|
|
444
460
|
struct impl;
|
|
@@ -844,6 +844,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|
|
844
844
|
// do not quantize Mamba's small yet 2D weights
|
|
845
845
|
// NOTE: can't use LLM_TN here because the layer number is not known
|
|
846
846
|
quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
|
|
847
|
+
quantize &= name.find("shortconv.conv.weight") == std::string::npos;
|
|
847
848
|
|
|
848
849
|
// do not quantize RWKV's small yet 2D weights
|
|
849
850
|
quantize &= name.find("time_mix_first.weight") == std::string::npos;
|
|
@@ -883,8 +884,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|
|
883
884
|
if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) {
|
|
884
885
|
if (qtype != new_type) {
|
|
885
886
|
LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type));
|
|
886
|
-
new_type = qtype;
|
|
887
|
-
break; // if two or more types are specified for the tensor, first match wins
|
|
887
|
+
new_type = qtype; // if two or more types are specified for the same tensor, the last match wins
|
|
888
888
|
}
|
|
889
889
|
}
|
|
890
890
|
}
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
#include <cassert>
|
|
12
12
|
#include <cctype>
|
|
13
13
|
#include <cfloat>
|
|
14
|
+
#include <cmath>
|
|
14
15
|
#include <cstdarg>
|
|
15
16
|
#include <cstring>
|
|
16
17
|
#include <forward_list>
|
|
@@ -351,6 +352,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
|
351
352
|
break;
|
|
352
353
|
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
|
353
354
|
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
|
355
|
+
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
|
354
356
|
regex_exprs = {
|
|
355
357
|
// original regex from tokenizer.json
|
|
356
358
|
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
|
@@ -403,6 +405,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
|
403
405
|
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
|
404
406
|
};
|
|
405
407
|
break;
|
|
408
|
+
case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
|
|
409
|
+
regex_exprs = {
|
|
410
|
+
// K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
|
|
411
|
+
// The custom handler implements all K2 patterns with proper Han character exclusion
|
|
412
|
+
"\\p{Han}+",
|
|
413
|
+
};
|
|
414
|
+
break;
|
|
406
415
|
case LLAMA_VOCAB_PRE_TYPE_SUPERBPE:
|
|
407
416
|
regex_exprs = {
|
|
408
417
|
"\\p{N}+",
|
|
@@ -1195,6 +1204,284 @@ private:
|
|
|
1195
1204
|
const llm_tokenizer_rwkv & tokenizer;
|
|
1196
1205
|
};
|
|
1197
1206
|
|
|
1207
|
+
struct llm_tokenizer_plamo2 : llm_tokenizer {
|
|
1208
|
+
llm_tokenizer_plamo2(const llama_vocab & vocab) {
|
|
1209
|
+
build(vocab);
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
void build(const llama_vocab & vocab) {
|
|
1213
|
+
// Reset internal structures
|
|
1214
|
+
tokens_.clear();
|
|
1215
|
+
bytes_.assign(256, 0);
|
|
1216
|
+
to_suffix_id_.clear();
|
|
1217
|
+
table_.clear();
|
|
1218
|
+
|
|
1219
|
+
// Build token list and byte mapping
|
|
1220
|
+
std::unordered_map<std::string, float> suffix_to_score;
|
|
1221
|
+
std::unordered_map<std::string, llama_token> token_to_id;
|
|
1222
|
+
|
|
1223
|
+
for (size_t token_id = 0; token_id < vocab.n_tokens(); ++token_id) {
|
|
1224
|
+
const auto & entry = vocab.get_token_data(token_id);
|
|
1225
|
+
tokens_.push_back(entry.text);
|
|
1226
|
+
token_to_id[entry.text] = static_cast<llama_token>(token_id);
|
|
1227
|
+
|
|
1228
|
+
// Handle byte tokens
|
|
1229
|
+
if (vocab.is_byte(token_id)) {
|
|
1230
|
+
if (entry.text.length() == 6 && entry.text.substr(0, 3) == "<0x" && entry.text.back() == '>') {
|
|
1231
|
+
std::string hex_str = entry.text.substr(3, 2);
|
|
1232
|
+
int byte_val = std::stoi(hex_str, nullptr, 16);
|
|
1233
|
+
bytes_[byte_val] = static_cast<llama_token>(token_id);
|
|
1234
|
+
}
|
|
1235
|
+
continue;
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
// Add token and all its suffixes to suffix_to_score
|
|
1239
|
+
suffix_to_score[entry.text] = entry.score;
|
|
1240
|
+
|
|
1241
|
+
// Extract suffixes character by character (UTF-8 aware)
|
|
1242
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(entry.text);
|
|
1243
|
+
for (size_t i = 1; i < cpts.size(); ++i) {
|
|
1244
|
+
std::string suffix;
|
|
1245
|
+
for (size_t j = i; j < cpts.size(); ++j) {
|
|
1246
|
+
suffix += unicode_cpt_to_utf8(cpts[j]);
|
|
1247
|
+
}
|
|
1248
|
+
if (suffix_to_score.find(suffix) == suffix_to_score.end()) {
|
|
1249
|
+
suffix_to_score[suffix] = std::numeric_limits<float>::quiet_NaN();
|
|
1250
|
+
}
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
// Check that all byte tokens are set
|
|
1255
|
+
for (int i = 0; i < 256; ++i) {
|
|
1256
|
+
if (bytes_[i] == 0) {
|
|
1257
|
+
throw std::runtime_error("Byte token for <0x" + std::to_string(i) + "> is not set");
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
// Build suffix list in lexicographical order of reversed strings
|
|
1262
|
+
std::vector<std::string> suffixes;
|
|
1263
|
+
for (const auto & pair : suffix_to_score) {
|
|
1264
|
+
suffixes.push_back(pair.first);
|
|
1265
|
+
}
|
|
1266
|
+
suffixes.push_back(""); // Empty suffix
|
|
1267
|
+
|
|
1268
|
+
std::sort(suffixes.begin(), suffixes.end(), [](const std::string & a, const std::string & b) {
|
|
1269
|
+
std::string rev_a(a.rbegin(), a.rend());
|
|
1270
|
+
std::string rev_b(b.rbegin(), b.rend());
|
|
1271
|
+
return rev_a < rev_b;
|
|
1272
|
+
});
|
|
1273
|
+
|
|
1274
|
+
// Build suffix_to_id and to_suffix_id_
|
|
1275
|
+
std::unordered_map<std::string, int32_t> suffix_to_id;
|
|
1276
|
+
int32_t num_pieces = 0;
|
|
1277
|
+
|
|
1278
|
+
for (const auto & suffix : suffixes) {
|
|
1279
|
+
suffix_to_id[suffix] = num_pieces;
|
|
1280
|
+
if (!suffix.empty()) {
|
|
1281
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(suffix);
|
|
1282
|
+
|
|
1283
|
+
std::string remaining;
|
|
1284
|
+
for (size_t i = 1; i < cpts.size(); ++i) {
|
|
1285
|
+
remaining += unicode_cpt_to_utf8(cpts[i]);
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
int64_t piece_code = (static_cast<int64_t>(cpts[0]) << 32) | suffix_to_id[remaining];
|
|
1289
|
+
to_suffix_id_[piece_code] = num_pieces;
|
|
1290
|
+
|
|
1291
|
+
// Count number of pieces for this suffix
|
|
1292
|
+
int32_t pieces_for_suffix = 1; // sentinel row
|
|
1293
|
+
for (int32_t piece_length = static_cast<int32_t>(cpts.size()); piece_length > 0; --piece_length) {
|
|
1294
|
+
std::string piece;
|
|
1295
|
+
for (int32_t i = 0; i < piece_length; ++i) {
|
|
1296
|
+
piece += unicode_cpt_to_utf8(cpts[i]);
|
|
1297
|
+
}
|
|
1298
|
+
if (suffix_to_score.find(piece) != suffix_to_score.end()) {
|
|
1299
|
+
pieces_for_suffix++;
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
num_pieces += pieces_for_suffix;
|
|
1303
|
+
} else {
|
|
1304
|
+
num_pieces++; // Empty suffix contributes one piece (sentinel row)
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
// Build flattened table
|
|
1309
|
+
table_.resize(num_pieces, std::vector<int32_t>(4, 0));
|
|
1310
|
+
int32_t table_idx = 0;
|
|
1311
|
+
|
|
1312
|
+
for (const auto & suffix : suffixes) {
|
|
1313
|
+
// Add all prefixes of the suffix to the table (in decreasing order of length)
|
|
1314
|
+
std::vector<uint32_t> cpts = unicode_cpts_from_utf8(suffix);
|
|
1315
|
+
for (int32_t piece_length = static_cast<int32_t>(cpts.size()); piece_length > 0; --piece_length) {
|
|
1316
|
+
std::string piece;
|
|
1317
|
+
for (int32_t i = 0; i < piece_length; ++i) {
|
|
1318
|
+
piece += unicode_cpt_to_utf8(cpts[i]);
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
auto score_it = suffix_to_score.find(piece);
|
|
1322
|
+
if (score_it == suffix_to_score.end()) {
|
|
1323
|
+
continue;
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
table_[table_idx][TABLE_PIECE_LENGTH] = piece_length;
|
|
1327
|
+
auto token_it = token_to_id.find(piece);
|
|
1328
|
+
table_[table_idx][TABLE_TOKEN_ID] = (token_it != token_to_id.end()) ? token_it->second : -1;
|
|
1329
|
+
|
|
1330
|
+
float score = score_it->second;
|
|
1331
|
+
table_[table_idx][TABLE_SCORE] = std::isfinite(score) ?
|
|
1332
|
+
static_cast<int32_t>(std::round(score * 1e4)) : INVALID_SCORE;
|
|
1333
|
+
table_[table_idx][TABLE_PIECE_ID] = suffix_to_id[piece];
|
|
1334
|
+
|
|
1335
|
+
table_idx++;
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
// Add sentinel row
|
|
1339
|
+
table_[table_idx][TABLE_PIECE_LENGTH] = 1;
|
|
1340
|
+
table_[table_idx][TABLE_TOKEN_ID] = -1;
|
|
1341
|
+
table_[table_idx][TABLE_SCORE] = UNKNOWN_SCORE;
|
|
1342
|
+
table_idx++;
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
std::vector<llama_token> encode(const std::string & text) const {
|
|
1347
|
+
std::vector<uint32_t> unicode_data = unicode_cpts_from_utf8(text);
|
|
1348
|
+
// Skip the first code point if it is a BOM (Byte Order Mark)
|
|
1349
|
+
if (!unicode_data.empty() && unicode_data[0] == 0xFEFF) {
|
|
1350
|
+
unicode_data.erase(unicode_data.begin());
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
if (unicode_data.empty()) {
|
|
1354
|
+
return {};
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
const size_t data_len = unicode_data.size();
|
|
1358
|
+
|
|
1359
|
+
// Initialize scores array (dynamic programming)
|
|
1360
|
+
std::vector<int64_t> scores(data_len + 1, static_cast<int64_t>(1) << 60);
|
|
1361
|
+
scores[data_len] = 0;
|
|
1362
|
+
|
|
1363
|
+
// Path array to track best tokenization
|
|
1364
|
+
std::vector<std::vector<int32_t>> path(data_len + 1, std::vector<int32_t>(3, 0));
|
|
1365
|
+
|
|
1366
|
+
int32_t suffix_id = 0;
|
|
1367
|
+
|
|
1368
|
+
// Process from end to beginning
|
|
1369
|
+
for (int i = static_cast<int>(data_len) - 1; i >= 0; --i) {
|
|
1370
|
+
uint32_t c = unicode_data[i];
|
|
1371
|
+
|
|
1372
|
+
// Find next suffix ID
|
|
1373
|
+
for (size_t p = suffix_id; p < table_.size(); ++p) {
|
|
1374
|
+
int64_t piece_code = (static_cast<int64_t>(c) << 32) | table_[p][TABLE_PIECE_ID];
|
|
1375
|
+
auto it = to_suffix_id_.find(piece_code);
|
|
1376
|
+
suffix_id = (it != to_suffix_id_.end()) ? it->second : 0;
|
|
1377
|
+
|
|
1378
|
+
if (suffix_id > 0 || table_[p][TABLE_SCORE] == UNKNOWN_SCORE) {
|
|
1379
|
+
break;
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
// Update best path
|
|
1384
|
+
for (size_t p = suffix_id; p < table_.size(); ++p) {
|
|
1385
|
+
int32_t score = table_[p][TABLE_SCORE];
|
|
1386
|
+
if (score > INVALID_SCORE) {
|
|
1387
|
+
int32_t piece_length = table_[p][TABLE_PIECE_LENGTH];
|
|
1388
|
+
int64_t s = scores[i + piece_length] - score;
|
|
1389
|
+
|
|
1390
|
+
if (s < scores[i]) {
|
|
1391
|
+
scores[i] = s;
|
|
1392
|
+
path[i][PATH_TOKEN_LENGTH] = piece_length;
|
|
1393
|
+
path[i][PATH_TOKEN_ID] = table_[p][TABLE_TOKEN_ID];
|
|
1394
|
+
path[i][PATH_NUM_TOKENS] = path[i + piece_length][PATH_NUM_TOKENS] + 1;
|
|
1395
|
+
|
|
1396
|
+
if (score == UNKNOWN_SCORE) {
|
|
1397
|
+
// Add UTF-8 byte count
|
|
1398
|
+
path[i][PATH_NUM_TOKENS] += (c >= 0x80) + (c >= 0x800) + (c >= 0x10000);
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
if (score == UNKNOWN_SCORE) {
|
|
1404
|
+
break;
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
// Decode the best path
|
|
1410
|
+
std::vector<llama_token> token_ids;
|
|
1411
|
+
token_ids.reserve(path[0][PATH_NUM_TOKENS]);
|
|
1412
|
+
|
|
1413
|
+
int pos = 0;
|
|
1414
|
+
while (pos < static_cast<int>(data_len)) {
|
|
1415
|
+
if (path[pos][PATH_TOKEN_ID] >= 0) {
|
|
1416
|
+
token_ids.push_back(path[pos][PATH_TOKEN_ID]);
|
|
1417
|
+
} else {
|
|
1418
|
+
// Fall back to byte tokens
|
|
1419
|
+
uint32_t c = unicode_data[pos];
|
|
1420
|
+
int s = 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000);
|
|
1421
|
+
|
|
1422
|
+
for (int i = 0; i < s; ++i) {
|
|
1423
|
+
uint8_t b;
|
|
1424
|
+
if (s == 1) {
|
|
1425
|
+
b = c;
|
|
1426
|
+
} else {
|
|
1427
|
+
if (i == 0) {
|
|
1428
|
+
b = (0xF00 >> s) & 0xFF;
|
|
1429
|
+
} else {
|
|
1430
|
+
b = 0x80;
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
token_ids.push_back(bytes_[b | ((c >> ((s - i - 1) * 6)) & 0x3F)]);
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
assert(path[pos][PATH_TOKEN_LENGTH] > 0);
|
|
1438
|
+
pos += path[pos][PATH_TOKEN_LENGTH];
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
return token_ids;
|
|
1442
|
+
}
|
|
1443
|
+
private:
|
|
1444
|
+
// Constants for table structure
|
|
1445
|
+
static constexpr int32_t TABLE_PIECE_LENGTH = 0;
|
|
1446
|
+
static constexpr int32_t TABLE_TOKEN_ID = 1;
|
|
1447
|
+
static constexpr int32_t TABLE_SCORE = 2;
|
|
1448
|
+
static constexpr int32_t TABLE_PIECE_ID = 3;
|
|
1449
|
+
|
|
1450
|
+
// Constants for path array
|
|
1451
|
+
static constexpr int32_t PATH_TOKEN_LENGTH = 0;
|
|
1452
|
+
static constexpr int32_t PATH_TOKEN_ID = 1;
|
|
1453
|
+
static constexpr int32_t PATH_NUM_TOKENS = 2;
|
|
1454
|
+
|
|
1455
|
+
// Score constants
|
|
1456
|
+
static constexpr int32_t INVALID_SCORE = -20000000;
|
|
1457
|
+
static constexpr int32_t UNKNOWN_SCORE = -10000000;
|
|
1458
|
+
|
|
1459
|
+
// List of tokens in the vocabulary
|
|
1460
|
+
std::vector<std::string> tokens_;
|
|
1461
|
+
|
|
1462
|
+
// Mapping from byte code point to token ID (for byte fallback)
|
|
1463
|
+
std::vector<llama_token> bytes_;
|
|
1464
|
+
|
|
1465
|
+
// Mapping from piece code to suffix ID
|
|
1466
|
+
std::unordered_map<int64_t, int32_t> to_suffix_id_;
|
|
1467
|
+
|
|
1468
|
+
// Flattened table representing the Trie structure
|
|
1469
|
+
// Each row contains: [piece_length, token_id, score, piece_id]
|
|
1470
|
+
std::vector<std::vector<int32_t>> table_;
|
|
1471
|
+
};
|
|
1472
|
+
|
|
1473
|
+
struct llm_tokenizer_plamo2_session {
|
|
1474
|
+
llm_tokenizer_plamo2_session(const llm_tokenizer_plamo2 & tokenizer) : tokenizer(tokenizer) {}
|
|
1475
|
+
|
|
1476
|
+
void tokenize(const std::string & text, std::vector<llama_token> & output) {
|
|
1477
|
+
std::vector<llama_token> tokens = tokenizer.encode(text);
|
|
1478
|
+
output.insert(output.end(), tokens.begin(), tokens.end());
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
private:
|
|
1482
|
+
const llm_tokenizer_plamo2 & tokenizer;
|
|
1483
|
+
};
|
|
1484
|
+
|
|
1198
1485
|
//
|
|
1199
1486
|
// impl
|
|
1200
1487
|
//
|
|
@@ -1498,6 +1785,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1498
1785
|
special_unk_id = LLAMA_TOKEN_NULL;
|
|
1499
1786
|
special_sep_id = LLAMA_TOKEN_NULL;
|
|
1500
1787
|
special_pad_id = LLAMA_TOKEN_NULL;
|
|
1788
|
+
} else if (tokenizer_model == "plamo2") {
|
|
1789
|
+
type = LLAMA_VOCAB_TYPE_PLAMO2;
|
|
1790
|
+
|
|
1791
|
+
// PLaMo-2 default special tokens (these will be overridden by model config)
|
|
1792
|
+
special_bos_id = 1; // <|plamo:bos|>
|
|
1793
|
+
special_eos_id = 2; // <|plamo:eos|>
|
|
1794
|
+
special_unk_id = 0; // <|plamo:unk|>
|
|
1795
|
+
special_sep_id = LLAMA_TOKEN_NULL;
|
|
1796
|
+
special_pad_id = 3; // <|plamo:pad|>
|
|
1797
|
+
special_mask_id = LLAMA_TOKEN_NULL;
|
|
1501
1798
|
} else {
|
|
1502
1799
|
throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str()));
|
|
1503
1800
|
}
|
|
@@ -1522,7 +1819,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1522
1819
|
tokenizer_pre == "llama-v3" ||
|
|
1523
1820
|
tokenizer_pre == "llama-bpe"||
|
|
1524
1821
|
tokenizer_pre == "falcon3" ||
|
|
1525
|
-
tokenizer_pre == "
|
|
1822
|
+
tokenizer_pre == "falcon-h1" ||
|
|
1823
|
+
tokenizer_pre == "pixtral" ||
|
|
1824
|
+
tokenizer_pre == "midm-2.0" ||
|
|
1825
|
+
tokenizer_pre == "lfm2") {
|
|
1526
1826
|
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
|
|
1527
1827
|
ignore_merges = true;
|
|
1528
1828
|
add_bos = true;
|
|
@@ -1554,7 +1854,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1554
1854
|
tokenizer_pre == "jina-de" ||
|
|
1555
1855
|
tokenizer_pre == "gigachat" ||
|
|
1556
1856
|
tokenizer_pre == "jina-v2-es" ||
|
|
1557
|
-
tokenizer_pre == "jina-v2-de"
|
|
1857
|
+
tokenizer_pre == "jina-v2-de" ||
|
|
1858
|
+
tokenizer_pre == "a.x-4.0") {
|
|
1558
1859
|
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
|
1559
1860
|
} else if (
|
|
1560
1861
|
tokenizer_pre == "jina-v1-en" ||
|
|
@@ -1624,6 +1925,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1624
1925
|
} else if (
|
|
1625
1926
|
tokenizer_pre == "exaone") {
|
|
1626
1927
|
pre_type = LLAMA_VOCAB_PRE_TYPE_EXAONE;
|
|
1928
|
+
} else if (
|
|
1929
|
+
tokenizer_pre == "exaone4") {
|
|
1930
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
|
1627
1931
|
} else if (
|
|
1628
1932
|
tokenizer_pre == "chameleon") {
|
|
1629
1933
|
pre_type = LLAMA_VOCAB_PRE_TYPE_CHAMELEON;
|
|
@@ -1656,6 +1960,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1656
1960
|
tokenizer_pre == "seed-coder") {
|
|
1657
1961
|
pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
|
|
1658
1962
|
clean_spaces = false;
|
|
1963
|
+
} else if (
|
|
1964
|
+
tokenizer_pre == "hunyuan") {
|
|
1965
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
|
|
1966
|
+
clean_spaces = false;
|
|
1967
|
+
} else if (
|
|
1968
|
+
tokenizer_pre == "kimi-k2") {
|
|
1969
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;
|
|
1970
|
+
clean_spaces = false;
|
|
1659
1971
|
} else {
|
|
1660
1972
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
|
1661
1973
|
}
|
|
@@ -1839,6 +2151,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1839
2151
|
|| t.first == "<EOT>"
|
|
1840
2152
|
|| t.first == "_<EOT>"
|
|
1841
2153
|
|| t.first == "<|end▁of▁sentence|>" // DeepSeek
|
|
2154
|
+
|| t.first == "<end_of_utterance>" // smoldocling
|
|
1842
2155
|
) {
|
|
1843
2156
|
special_eot_id = t.second;
|
|
1844
2157
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
@@ -1998,6 +2311,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1998
2311
|
|| t.first == "<EOT>"
|
|
1999
2312
|
|| t.first == "_<EOT>"
|
|
2000
2313
|
|| t.first == "<|end_of_text|>"
|
|
2314
|
+
|| t.first == "<end_of_utterance>" // smoldocling
|
|
2001
2315
|
) {
|
|
2002
2316
|
special_eog_ids.insert(t.second);
|
|
2003
2317
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
@@ -2134,13 +2448,14 @@ enum llama_vocab_type llama_vocab::impl::get_type() const {
|
|
|
2134
2448
|
|
|
2135
2449
|
std::string llama_vocab::impl::type_name() const{
|
|
2136
2450
|
switch (type) {
|
|
2137
|
-
case LLAMA_VOCAB_TYPE_NONE:
|
|
2138
|
-
case LLAMA_VOCAB_TYPE_SPM:
|
|
2139
|
-
case LLAMA_VOCAB_TYPE_BPE:
|
|
2140
|
-
case LLAMA_VOCAB_TYPE_WPM:
|
|
2141
|
-
case LLAMA_VOCAB_TYPE_UGM:
|
|
2142
|
-
case LLAMA_VOCAB_TYPE_RWKV:
|
|
2143
|
-
|
|
2451
|
+
case LLAMA_VOCAB_TYPE_NONE: return "no vocab";
|
|
2452
|
+
case LLAMA_VOCAB_TYPE_SPM: return "SPM";
|
|
2453
|
+
case LLAMA_VOCAB_TYPE_BPE: return "BPE";
|
|
2454
|
+
case LLAMA_VOCAB_TYPE_WPM: return "WPM";
|
|
2455
|
+
case LLAMA_VOCAB_TYPE_UGM: return "UGM";
|
|
2456
|
+
case LLAMA_VOCAB_TYPE_RWKV: return "RWKV";
|
|
2457
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: return "PLaMo2";
|
|
2458
|
+
default: return "unknown";
|
|
2144
2459
|
}
|
|
2145
2460
|
}
|
|
2146
2461
|
|
|
@@ -2223,6 +2538,9 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) {
|
|
|
2223
2538
|
case LLAMA_VOCAB_TYPE_RWKV:
|
|
2224
2539
|
tokenizer = std::make_unique<llm_tokenizer_rwkv>(vocab);
|
|
2225
2540
|
break;
|
|
2541
|
+
case LLAMA_VOCAB_TYPE_PLAMO2:
|
|
2542
|
+
tokenizer = std::make_unique<llm_tokenizer_plamo2>(vocab);
|
|
2543
|
+
break;
|
|
2226
2544
|
default:
|
|
2227
2545
|
GGML_ABORT("unsupported vocab type");
|
|
2228
2546
|
}
|
|
@@ -2555,6 +2873,23 @@ std::vector<llama_token> llama_vocab::impl::tokenize(
|
|
|
2555
2873
|
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
|
|
2556
2874
|
std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
|
2557
2875
|
|
|
2876
|
+
#ifdef PRETOKENIZERDEBUG
|
|
2877
|
+
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
|
|
2878
|
+
#endif
|
|
2879
|
+
|
|
2880
|
+
session.tokenize(text, output);
|
|
2881
|
+
} else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN)
|
|
2882
|
+
output.push_back(fragment.token);
|
|
2883
|
+
}
|
|
2884
|
+
}
|
|
2885
|
+
} break;
|
|
2886
|
+
case LLAMA_VOCAB_TYPE_PLAMO2:
|
|
2887
|
+
{
|
|
2888
|
+
llm_tokenizer_plamo2_session session(*static_cast<const llm_tokenizer_plamo2 *>(tokenizer.get()));
|
|
2889
|
+
for (const auto & fragment : fragment_buffer) {
|
|
2890
|
+
if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
|
|
2891
|
+
std::string text = fragment.raw_text.substr(fragment.offset, fragment.length);
|
|
2892
|
+
|
|
2558
2893
|
#ifdef PRETOKENIZERDEBUG
|
|
2559
2894
|
LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str());
|
|
2560
2895
|
#endif
|
|
@@ -2653,6 +2988,24 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
|
|
|
2653
2988
|
memcpy(buf, result.data(), result.size());
|
|
2654
2989
|
return (int)result.size();
|
|
2655
2990
|
}
|
|
2991
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: {
|
|
2992
|
+
// PLaMo-2 uses similar token handling as BPE/SPM
|
|
2993
|
+
if (vocab.is_byte(token)) {
|
|
2994
|
+
// Handle byte tokens like <0xXX>
|
|
2995
|
+
if (token_text.length() == 6 && token_text.substr(0, 3) == "<0x" && token_text.back() == '>') {
|
|
2996
|
+
int hex_val = std::stoi(token_text.substr(3, 2), nullptr, 16);
|
|
2997
|
+
if (length < 1) {
|
|
2998
|
+
return -1;
|
|
2999
|
+
}
|
|
3000
|
+
buf[0] = static_cast<char>(hex_val);
|
|
3001
|
+
return 1;
|
|
3002
|
+
}
|
|
3003
|
+
}
|
|
3004
|
+
|
|
3005
|
+
// Normal token - just copy the text
|
|
3006
|
+
std::string result = token_text;
|
|
3007
|
+
return _try_copy(result.data(), result.size());
|
|
3008
|
+
}
|
|
2656
3009
|
default:
|
|
2657
3010
|
GGML_ABORT("fatal error");
|
|
2658
3011
|
}
|
|
@@ -2897,6 +3250,12 @@ llama_token llama_vocab::byte_to_token(uint8_t ch) const {
|
|
|
2897
3250
|
case LLAMA_VOCAB_TYPE_BPE: {
|
|
2898
3251
|
return pimpl->token_to_id.at(unicode_byte_to_utf8(ch));
|
|
2899
3252
|
}
|
|
3253
|
+
case LLAMA_VOCAB_TYPE_PLAMO2: {
|
|
3254
|
+
// PLaMo-2 uses byte tokens in format <0xXX>
|
|
3255
|
+
char hex_str[8];
|
|
3256
|
+
snprintf(hex_str, sizeof(hex_str), "<0x%02X>", ch);
|
|
3257
|
+
return pimpl->token_to_id.at(hex_str);
|
|
3258
|
+
}
|
|
2900
3259
|
default:
|
|
2901
3260
|
GGML_ABORT("fatal error");
|
|
2902
3261
|
}
|
|
@@ -2998,6 +3357,10 @@ llama_token llama_vocab::token_fim_sep() const {
|
|
|
2998
3357
|
return pimpl->special_fim_sep_id;
|
|
2999
3358
|
}
|
|
3000
3359
|
|
|
3360
|
+
llama_token llama_vocab::token_mask() const {
|
|
3361
|
+
return pimpl->special_mask_id;
|
|
3362
|
+
}
|
|
3363
|
+
|
|
3001
3364
|
bool llama_vocab::get_add_space_prefix() const {
|
|
3002
3365
|
return pimpl->add_space_prefix;
|
|
3003
3366
|
}
|
|
@@ -3238,6 +3601,10 @@ llama_token llama_vocab_fim_sep(const struct llama_vocab * vocab) {
|
|
|
3238
3601
|
return vocab->token_fim_sep();
|
|
3239
3602
|
}
|
|
3240
3603
|
|
|
3604
|
+
llama_token llama_vocab_mask(const struct llama_vocab* vocab) {
|
|
3605
|
+
return vocab->token_mask();
|
|
3606
|
+
}
|
|
3607
|
+
|
|
3241
3608
|
// deprecated
|
|
3242
3609
|
const char * llama_token_get_text(const struct llama_vocab * vocab, llama_token token) {
|
|
3243
3610
|
return llama_vocab_get_text(vocab, token);
|
|
@@ -3374,4 +3741,3 @@ int32_t llama_detokenize(
|
|
|
3374
3741
|
bool unparse_special) {
|
|
3375
3742
|
return vocab->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
|
|
3376
3743
|
}
|
|
3377
|
-
|
|
@@ -6,6 +6,48 @@
|
|
|
6
6
|
#include <vector>
|
|
7
7
|
#include <memory>
|
|
8
8
|
|
|
9
|
+
// pre-tokenization types
|
|
10
|
+
enum llama_vocab_pre_type {
|
|
11
|
+
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
|
|
12
|
+
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
|
|
13
|
+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
|
|
14
|
+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
|
|
15
|
+
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
|
|
16
|
+
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
|
|
17
|
+
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
|
|
18
|
+
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
|
19
|
+
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
|
20
|
+
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
|
21
|
+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
|
22
|
+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
|
23
|
+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
|
24
|
+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
|
25
|
+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
|
26
|
+
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
|
27
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
|
|
28
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
|
29
|
+
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
|
30
|
+
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
|
31
|
+
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
|
|
32
|
+
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
|
|
33
|
+
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
|
34
|
+
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
|
35
|
+
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
|
36
|
+
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
|
37
|
+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
|
38
|
+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
39
|
+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
40
|
+
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
41
|
+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
|
42
|
+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
|
43
|
+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
|
44
|
+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
|
45
|
+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
|
|
46
|
+
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
|
|
47
|
+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
|
|
48
|
+
LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37,
|
|
49
|
+
};
|
|
50
|
+
|
|
9
51
|
struct LLM_KV;
|
|
10
52
|
struct llama_model_loader;
|
|
11
53
|
|
|
@@ -59,6 +101,7 @@ struct llama_vocab {
|
|
|
59
101
|
llama_token token_sep() const;
|
|
60
102
|
llama_token token_nl () const;
|
|
61
103
|
llama_token token_pad() const;
|
|
104
|
+
llama_token token_mask() const;
|
|
62
105
|
|
|
63
106
|
llama_token token_prefix() const;
|
|
64
107
|
llama_token token_middle() const;
|