@fugood/llama.node 1.4.11 → 1.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +31 -31
- package/src/llama.cpp/common/arg.cpp +128 -59
- package/src/llama.cpp/common/arg.h +1 -0
- package/src/llama.cpp/common/chat-parser.cpp +11 -0
- package/src/llama.cpp/common/chat.cpp +36 -7
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +42 -23
- package/src/llama.cpp/common/common.h +11 -1
- package/src/llama.cpp/common/llguidance.cpp +10 -6
- package/src/llama.cpp/common/regex-partial.cpp +13 -13
- package/src/llama.cpp/common/sampling.cpp +58 -14
- package/src/llama.cpp/common/sampling.h +3 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +23 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +86 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +15 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +768 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +0 -4
- package/src/llama.cpp/include/llama.h +100 -12
- package/src/llama.cpp/src/CMakeLists.txt +4 -0
- package/src/llama.cpp/src/llama-adapter.cpp +12 -3
- package/src/llama.cpp/src/llama-adapter.h +7 -1
- package/src/llama.cpp/src/llama-arch.cpp +78 -0
- package/src/llama.cpp/src/llama-arch.h +8 -0
- package/src/llama.cpp/src/llama-chat.cpp +11 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +637 -49
- package/src/llama.cpp/src/llama-context.h +43 -1
- package/src/llama.cpp/src/llama-grammar.cpp +40 -13
- package/src/llama.cpp/src/llama-grammar.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +173 -5
- package/src/llama.cpp/src/llama-graph.h +71 -6
- package/src/llama.cpp/src/llama-hparams.cpp +4 -0
- package/src/llama.cpp/src/llama-hparams.h +12 -5
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -4
- package/src/llama.cpp/src/llama-model-loader.cpp +23 -0
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model-saver.cpp +3 -0
- package/src/llama.cpp/src/llama-model.cpp +337 -26
- package/src/llama.cpp/src/llama-model.h +13 -2
- package/src/llama.cpp/src/llama-sampling.cpp +1259 -186
- package/src/llama.cpp/src/llama-sampling.h +19 -7
- package/src/llama.cpp/src/llama-vocab.cpp +101 -33
- package/src/llama.cpp/src/llama-vocab.h +2 -0
- package/src/llama.cpp/src/llama.cpp +87 -64
- package/src/llama.cpp/src/models/afmoe.cpp +9 -5
- package/src/llama.cpp/src/models/bert.cpp +4 -2
- package/src/llama.cpp/src/models/cogvlm.cpp +5 -3
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +3 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +1 -1
- package/src/llama.cpp/src/models/gemma-embedding.cpp +2 -6
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +5 -2
- package/src/llama.cpp/src/models/gemma3.cpp +3 -4
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +4 -7
- package/src/llama.cpp/src/models/llama-iswa.cpp +6 -2
- package/src/llama.cpp/src/models/llama.cpp +19 -6
- package/src/llama.cpp/src/models/maincoder.cpp +117 -0
- package/src/llama.cpp/src/models/mimo2-iswa.cpp +123 -0
- package/src/llama.cpp/src/models/models.h +18 -0
- package/src/llama.cpp/src/models/modern-bert.cpp +116 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +5 -2
- package/src/llama.cpp/src/models/plamo3.cpp +128 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +11 -5
- package/src/llama.cpp/src/unicode.cpp +23 -14
|
@@ -14,7 +14,19 @@ struct llama_grammar;
|
|
|
14
14
|
struct llama_sampler_chain {
|
|
15
15
|
llama_sampler_chain_params params;
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
// has .backend_init() been called?
|
|
18
|
+
bool is_init = false;
|
|
19
|
+
|
|
20
|
+
struct info {
|
|
21
|
+
bool is_backend;
|
|
22
|
+
|
|
23
|
+
llama_sampler * ptr;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
std::vector<info> samplers;
|
|
27
|
+
|
|
28
|
+
// pre-allocated buffer for llama_sampler_sample to avoid repeated allocations
|
|
29
|
+
std::vector<llama_token_data> cur;
|
|
18
30
|
|
|
19
31
|
// timing
|
|
20
32
|
|
|
@@ -24,9 +36,9 @@ struct llama_sampler_chain {
|
|
|
24
36
|
};
|
|
25
37
|
|
|
26
38
|
struct llama_sampler * llama_sampler_init_dry_testing(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
39
|
+
int32_t context_size,
|
|
40
|
+
float dry_multiplier,
|
|
41
|
+
float dry_base,
|
|
42
|
+
int32_t dry_allowed_length,
|
|
43
|
+
int32_t dry_penalty_last_n,
|
|
44
|
+
const std::vector<std::vector<llama_token>> & seq_breakers);
|
|
@@ -314,6 +314,12 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
|
314
314
|
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
|
|
315
315
|
};
|
|
316
316
|
break;
|
|
317
|
+
case LLAMA_VOCAB_PRE_TYPE_YOUTU:
|
|
318
|
+
regex_exprs = {
|
|
319
|
+
"[가-힣ㄱ-ㆎ]+|[!…“”‘’—:;,、-〿︰-﹏]+|[ㄅ-ㄯ]+|[一-龥-ゟ゠-ヿ]+",
|
|
320
|
+
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
|
321
|
+
};
|
|
322
|
+
break;
|
|
317
323
|
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
|
|
318
324
|
regex_exprs = {
|
|
319
325
|
"[\r\n]",
|
|
@@ -355,6 +361,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
|
355
361
|
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
|
356
362
|
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
|
357
363
|
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
|
364
|
+
case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
|
|
358
365
|
regex_exprs = {
|
|
359
366
|
// original regex from tokenizer.json
|
|
360
367
|
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
|
@@ -1860,6 +1867,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1860
1867
|
tokenizer_pre == "deepseek-v3") {
|
|
1861
1868
|
pre_type = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM;
|
|
1862
1869
|
clean_spaces = false;
|
|
1870
|
+
} else if (
|
|
1871
|
+
tokenizer_pre == "youtu") {
|
|
1872
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_YOUTU;
|
|
1873
|
+
clean_spaces = false;
|
|
1874
|
+
ignore_merges = true;
|
|
1863
1875
|
} else if (
|
|
1864
1876
|
tokenizer_pre == "falcon") {
|
|
1865
1877
|
pre_type = LLAMA_VOCAB_PRE_TYPE_FALCON;
|
|
@@ -1878,7 +1890,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1878
1890
|
tokenizer_pre == "jina-v2-es" ||
|
|
1879
1891
|
tokenizer_pre == "jina-v2-de" ||
|
|
1880
1892
|
tokenizer_pre == "a.x-4.0" ||
|
|
1881
|
-
tokenizer_pre == "mellum"
|
|
1893
|
+
tokenizer_pre == "mellum" ||
|
|
1894
|
+
tokenizer_pre == "modern-bert" ) {
|
|
1882
1895
|
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
|
1883
1896
|
} else if (
|
|
1884
1897
|
tokenizer_pre == "jina-v1-en" ||
|
|
@@ -2014,6 +2027,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2014
2027
|
tokenizer_pre == "minimax-m2") {
|
|
2015
2028
|
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
|
|
2016
2029
|
clean_spaces = false;
|
|
2030
|
+
} else if (
|
|
2031
|
+
tokenizer_pre == "solar-open") {
|
|
2032
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
|
|
2033
|
+
clean_spaces = false;
|
|
2017
2034
|
} else {
|
|
2018
2035
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
|
2019
2036
|
}
|
|
@@ -2186,6 +2203,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2186
2203
|
// for now, we apply this workaround to find the tokens based on their text
|
|
2187
2204
|
|
|
2188
2205
|
for (const auto & t : token_to_id) {
|
|
2206
|
+
auto & attr = id_to_token[t.second].attr;
|
|
2207
|
+
|
|
2189
2208
|
// find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
|
|
2190
2209
|
if (special_eot_id == LLAMA_TOKEN_NULL) {
|
|
2191
2210
|
if (false
|
|
@@ -2201,10 +2220,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2201
2220
|
|| t.first == "<end_of_utterance>" // smoldocling
|
|
2202
2221
|
) {
|
|
2203
2222
|
special_eot_id = t.second;
|
|
2204
|
-
if ((
|
|
2223
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2205
2224
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2206
2225
|
__func__, t.second, t.first.c_str());
|
|
2207
|
-
|
|
2226
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2208
2227
|
}
|
|
2209
2228
|
}
|
|
2210
2229
|
}
|
|
@@ -2215,10 +2234,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2215
2234
|
|| t.first == "<|eom_id|>"
|
|
2216
2235
|
) {
|
|
2217
2236
|
special_eom_id = t.second;
|
|
2218
|
-
if ((
|
|
2237
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2219
2238
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2220
2239
|
__func__, t.second, t.first.c_str());
|
|
2221
|
-
|
|
2240
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2222
2241
|
}
|
|
2223
2242
|
}
|
|
2224
2243
|
}
|
|
@@ -2235,10 +2254,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2235
2254
|
|| t.first == "<|code_prefix|>" // GLM-4.5
|
|
2236
2255
|
) {
|
|
2237
2256
|
special_fim_pre_id = t.second;
|
|
2238
|
-
if ((
|
|
2257
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2239
2258
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2240
2259
|
__func__, t.second, t.first.c_str());
|
|
2241
|
-
|
|
2260
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2242
2261
|
}
|
|
2243
2262
|
}
|
|
2244
2263
|
}
|
|
@@ -2255,10 +2274,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2255
2274
|
|| t.first == "<|code_suffix|>" // GLM-4.5
|
|
2256
2275
|
) {
|
|
2257
2276
|
special_fim_suf_id = t.second;
|
|
2258
|
-
if ((
|
|
2277
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2259
2278
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2260
2279
|
__func__, t.second, t.first.c_str());
|
|
2261
|
-
|
|
2280
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2262
2281
|
}
|
|
2263
2282
|
}
|
|
2264
2283
|
}
|
|
@@ -2275,10 +2294,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2275
2294
|
|| t.first == "<|code_middle|>" // GLM-4.5
|
|
2276
2295
|
) {
|
|
2277
2296
|
special_fim_mid_id = t.second;
|
|
2278
|
-
if ((
|
|
2297
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2279
2298
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2280
2299
|
__func__, t.second, t.first.c_str());
|
|
2281
|
-
|
|
2300
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2282
2301
|
}
|
|
2283
2302
|
}
|
|
2284
2303
|
}
|
|
@@ -2292,10 +2311,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2292
2311
|
|| t.first == "<PAD>"
|
|
2293
2312
|
) {
|
|
2294
2313
|
special_fim_pad_id = t.second;
|
|
2295
|
-
if ((
|
|
2314
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2296
2315
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2297
2316
|
__func__, t.second, t.first.c_str());
|
|
2298
|
-
|
|
2317
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2299
2318
|
}
|
|
2300
2319
|
}
|
|
2301
2320
|
}
|
|
@@ -2310,10 +2329,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2310
2329
|
|| t.first == "<reponame>" // Granite
|
|
2311
2330
|
) {
|
|
2312
2331
|
special_fim_rep_id = t.second;
|
|
2313
|
-
if ((
|
|
2332
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2314
2333
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2315
2334
|
__func__, t.second, t.first.c_str());
|
|
2316
|
-
|
|
2335
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2317
2336
|
}
|
|
2318
2337
|
}
|
|
2319
2338
|
}
|
|
@@ -2324,15 +2343,41 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2324
2343
|
|| t.first == "<|file_sep|>" // Qwen
|
|
2325
2344
|
) {
|
|
2326
2345
|
special_fim_sep_id = t.second;
|
|
2327
|
-
if ((
|
|
2346
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2328
2347
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2329
2348
|
__func__, t.second, t.first.c_str());
|
|
2330
|
-
|
|
2349
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2331
2350
|
}
|
|
2332
2351
|
}
|
|
2333
2352
|
}
|
|
2334
2353
|
}
|
|
2335
2354
|
|
|
2355
|
+
// auto-detect unused tokens: e.g. control tokens with the word "unused"
|
|
2356
|
+
// ideally, these tokens should be marked as unused during conversion
|
|
2357
|
+
{
|
|
2358
|
+
uint32_t n_unused = 0;
|
|
2359
|
+
|
|
2360
|
+
for (const auto & t : token_to_id) {
|
|
2361
|
+
auto & attr = id_to_token[t.second].attr;
|
|
2362
|
+
|
|
2363
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2364
|
+
continue;
|
|
2365
|
+
}
|
|
2366
|
+
|
|
2367
|
+
if ((attr & LLAMA_TOKEN_ATTR_UNUSED) == 0) {
|
|
2368
|
+
if (strstr(t.first.c_str(), "unused") != NULL) {
|
|
2369
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_UNUSED);
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
|
|
2373
|
+
if (attr & LLAMA_TOKEN_ATTR_UNUSED) {
|
|
2374
|
+
n_unused++;
|
|
2375
|
+
}
|
|
2376
|
+
}
|
|
2377
|
+
|
|
2378
|
+
LLAMA_LOG_INFO("%s: %u unused tokens\n", __func__, n_unused);
|
|
2379
|
+
}
|
|
2380
|
+
|
|
2336
2381
|
// maintain a list of tokens that cause end-of-generation
|
|
2337
2382
|
// this is currently determined based on the token text, which is obviously not ideal
|
|
2338
2383
|
// ref: https://github.com/ggerganov/llama.cpp/issues/9606
|
|
@@ -2351,12 +2396,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2351
2396
|
}
|
|
2352
2397
|
|
|
2353
2398
|
for (const auto & t : token_to_id) {
|
|
2399
|
+
auto & attr = id_to_token[t.second].attr;
|
|
2400
|
+
|
|
2354
2401
|
if (false
|
|
2355
2402
|
|| t.first == "<|eot_id|>"
|
|
2356
2403
|
|| t.first == "<|im_end|>"
|
|
2357
2404
|
|| t.first == "<|end|>"
|
|
2358
2405
|
|| t.first == "<|return|>" // o200k_harmony
|
|
2359
2406
|
|| t.first == "<|call|>" // o200k_harmony
|
|
2407
|
+
|| t.first == "<|flush|>" // solar-open
|
|
2408
|
+
|| t.first == "<|calls|>" // solar-open
|
|
2360
2409
|
|| t.first == "<end_of_turn>"
|
|
2361
2410
|
|| t.first == "<|endoftext|>"
|
|
2362
2411
|
|| t.first == "<|eom_id|>"
|
|
@@ -2366,24 +2415,28 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2366
2415
|
|| t.first == "<end_of_utterance>" // smoldocling
|
|
2367
2416
|
) {
|
|
2368
2417
|
special_eog_ids.insert(t.second);
|
|
2369
|
-
if ((
|
|
2418
|
+
if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
|
2370
2419
|
LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
|
2371
2420
|
__func__, t.second, t.first.c_str());
|
|
2372
|
-
|
|
2421
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
|
|
2373
2422
|
}
|
|
2374
2423
|
} else {
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2424
|
+
if (attr & LLAMA_TOKEN_ATTR_CONTROL && !(attr & LLAMA_TOKEN_ATTR_UNUSED)) {
|
|
2425
|
+
// token is control, but not marked as EOG -> print a debug log
|
|
2426
|
+
if (special_eog_ids.count(t.second) == 0) {
|
|
2427
|
+
LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
|
|
2428
|
+
__func__, t.second, t.first.c_str());
|
|
2429
|
+
}
|
|
2379
2430
|
}
|
|
2380
2431
|
}
|
|
2381
2432
|
}
|
|
2382
2433
|
|
|
2383
2434
|
// @ngxson : quick hack for gpt-oss, always render these tokens
|
|
2384
2435
|
for (const auto & t : token_to_id) {
|
|
2436
|
+
auto & attr = id_to_token[t.second].attr;
|
|
2437
|
+
|
|
2385
2438
|
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
|
|
2386
|
-
|
|
2439
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_USER_DEFINED);
|
|
2387
2440
|
}
|
|
2388
2441
|
}
|
|
2389
2442
|
|
|
@@ -2403,34 +2456,42 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2403
2456
|
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
|
|
2404
2457
|
}
|
|
2405
2458
|
|
|
2406
|
-
// TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
|
|
2407
|
-
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
|
|
2459
|
+
// TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
|
|
2460
|
+
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
|
|
2408
2461
|
// we remove the "<|end|>" token from the EOG list
|
|
2409
2462
|
{
|
|
2410
2463
|
bool has_return = false;
|
|
2411
2464
|
bool has_call = false;
|
|
2412
2465
|
bool has_end = false;
|
|
2466
|
+
bool has_flush = false;
|
|
2413
2467
|
|
|
2414
2468
|
llama_token end_id = LLAMA_TOKEN_NULL;
|
|
2415
2469
|
|
|
2416
2470
|
LLAMA_LOG_INFO("%s: printing all EOG tokens:\n", __func__);
|
|
2417
2471
|
for (auto tid : special_eog_ids) {
|
|
2418
|
-
|
|
2472
|
+
auto & text = id_to_token[tid].text;
|
|
2473
|
+
|
|
2474
|
+
LLAMA_LOG_INFO("%s: - %d ('%s')\n", __func__, tid, text.c_str());
|
|
2419
2475
|
|
|
2420
|
-
if (
|
|
2476
|
+
if (text == "<|return|>") {
|
|
2421
2477
|
has_return = true;
|
|
2422
|
-
} else if (
|
|
2478
|
+
} else if (text == "<|call|>" || text == "<|calls|>") {
|
|
2423
2479
|
has_call = true;
|
|
2424
|
-
} else if (
|
|
2480
|
+
} else if (text == "<|flush|>") {
|
|
2481
|
+
has_flush = true;
|
|
2482
|
+
} else if (text == "<|end|>") {
|
|
2425
2483
|
has_end = true;
|
|
2426
2484
|
end_id = tid;
|
|
2427
2485
|
}
|
|
2428
2486
|
}
|
|
2429
2487
|
|
|
2430
|
-
if (has_return && has_call && has_end) {
|
|
2488
|
+
if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
|
|
2431
2489
|
special_eog_ids.erase(end_id);
|
|
2432
|
-
|
|
2433
|
-
|
|
2490
|
+
|
|
2491
|
+
auto & attr = id_to_token[end_id].attr;
|
|
2492
|
+
attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_USER_DEFINED);
|
|
2493
|
+
|
|
2494
|
+
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
|
|
2434
2495
|
}
|
|
2435
2496
|
}
|
|
2436
2497
|
}
|
|
@@ -2528,6 +2589,13 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2528
2589
|
for (const auto * token : {"<unk>", "<s>", "<|endoftext|>"}) {
|
|
2529
2590
|
_set_token_attr(token, LLAMA_TOKEN_ATTR_RSTRIP, false);
|
|
2530
2591
|
}
|
|
2592
|
+
} else if (_contains_any(model_name, {"modern-bert"})) {
|
|
2593
|
+
if (token_to_id.count("[MASK]") == 0 ) {
|
|
2594
|
+
LLAMA_LOG_WARN("%s: Mask token missing in vocab!\n", __func__);
|
|
2595
|
+
}
|
|
2596
|
+
else {
|
|
2597
|
+
_set_token_attr("[MASK]", LLAMA_TOKEN_ATTR_LSTRIP, true);
|
|
2598
|
+
}
|
|
2531
2599
|
}
|
|
2532
2600
|
}
|
|
2533
2601
|
}
|