@fugood/llama.node 1.2.3 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +33 -11
- package/src/llama.cpp/CMakeLists.txt +1 -0
- package/src/llama.cpp/common/CMakeLists.txt +46 -2
- package/src/llama.cpp/common/arg.cpp +484 -204
- package/src/llama.cpp/common/arg.h +0 -1
- package/src/llama.cpp/common/chat-parser.cpp +156 -15
- package/src/llama.cpp/common/chat-parser.h +3 -0
- package/src/llama.cpp/common/chat.cpp +217 -6
- package/src/llama.cpp/common/chat.h +5 -3
- package/src/llama.cpp/common/common.cpp +22 -6
- package/src/llama.cpp/common/common.h +6 -4
- package/src/llama.cpp/common/http.h +73 -0
- package/src/llama.cpp/common/json-partial.cpp +51 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -6
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +8 -9
- package/src/llama.cpp/ggml/include/ggml.h +22 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +12 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +100 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +209 -96
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +32 -44
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +107 -83
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +17 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +103 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +11 -9
- package/src/llama.cpp/include/llama.h +8 -0
- package/src/llama.cpp/src/llama-arch.cpp +93 -0
- package/src/llama.cpp/src/llama-arch.h +22 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -1
- package/src/llama.cpp/src/llama-context.cpp +6 -0
- package/src/llama.cpp/src/llama-graph.cpp +57 -22
- package/src/llama.cpp/src/llama-graph.h +10 -1
- package/src/llama.cpp/src/llama-hparams.cpp +5 -1
- package/src/llama.cpp/src/llama-hparams.h +17 -2
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +2 -2
- package/src/llama.cpp/src/llama-kv-cache.cpp +2 -5
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +11 -9
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +11 -3
- package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/src/llama.cpp/src/llama-model.cpp +572 -45
- package/src/llama.cpp/src/llama-model.h +18 -0
- package/src/llama.cpp/src/llama-sampling.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.h +41 -40
- package/src/llama.cpp/src/unicode.h +43 -0
|
@@ -107,6 +107,7 @@ enum llm_type {
|
|
|
107
107
|
LLM_TYPE_17B_16E, // llama4 Scout
|
|
108
108
|
LLM_TYPE_17B_128E, // llama4 Maverick
|
|
109
109
|
LLM_TYPE_A13B,
|
|
110
|
+
LLM_TYPE_8B_A1B, // lfm2moe
|
|
110
111
|
LLM_TYPE_21B_A3B, // Ernie MoE small
|
|
111
112
|
LLM_TYPE_30B_A3B,
|
|
112
113
|
LLM_TYPE_106B_A12B, // GLM-4.5-Air
|
|
@@ -275,6 +276,11 @@ struct llama_layer {
|
|
|
275
276
|
struct ggml_tensor * ffn_down_shexp = nullptr;
|
|
276
277
|
struct ggml_tensor * ffn_up_shexp = nullptr;
|
|
277
278
|
|
|
279
|
+
// ff adjugate experts (chexps)
|
|
280
|
+
struct ggml_tensor * ffn_gate_chexps = nullptr;
|
|
281
|
+
struct ggml_tensor * ffn_down_chexps = nullptr;
|
|
282
|
+
struct ggml_tensor * ffn_up_chexps = nullptr;
|
|
283
|
+
|
|
278
284
|
// ff bias
|
|
279
285
|
struct ggml_tensor * ffn_gate_b = nullptr;
|
|
280
286
|
struct ggml_tensor * ffn_down_b = nullptr; // b2
|
|
@@ -375,6 +381,12 @@ struct llama_layer {
|
|
|
375
381
|
// openai-moe
|
|
376
382
|
struct ggml_tensor * attn_sinks = nullptr;
|
|
377
383
|
|
|
384
|
+
// xIELU activation parameters for Apertus
|
|
385
|
+
struct ggml_tensor * ffn_act_alpha_n = nullptr;
|
|
386
|
+
struct ggml_tensor * ffn_act_alpha_p = nullptr;
|
|
387
|
+
struct ggml_tensor * ffn_act_beta = nullptr;
|
|
388
|
+
struct ggml_tensor * ffn_act_eps = nullptr;
|
|
389
|
+
|
|
378
390
|
struct llama_layer_posnet posnet;
|
|
379
391
|
|
|
380
392
|
struct llama_layer_convnext convnext;
|
|
@@ -426,6 +438,12 @@ struct llama_model {
|
|
|
426
438
|
|
|
427
439
|
std::vector<llama_layer> layers;
|
|
428
440
|
|
|
441
|
+
//Dense linear projections for SentenceTransformers models like embeddinggemma
|
|
442
|
+
// For Sentence Transformers models structure see
|
|
443
|
+
// https://sbert.net/docs/sentence_transformer/usage/custom_models.html#structure-of-sentence-transformer-models
|
|
444
|
+
struct ggml_tensor * dense_2_out_layers = nullptr;
|
|
445
|
+
struct ggml_tensor * dense_3_out_layers = nullptr;
|
|
446
|
+
|
|
429
447
|
llama_model_params params;
|
|
430
448
|
|
|
431
449
|
// gguf metadata
|
|
@@ -2541,8 +2541,13 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
|
|
|
2541
2541
|
if (n_non_eog == 0) {
|
|
2542
2542
|
cur_p->size = 1;
|
|
2543
2543
|
cur_p->data[0].id = ctx->vocab->token_eot();
|
|
2544
|
+
if (cur_p->data[0].id == LLAMA_TOKEN_NULL) {
|
|
2545
|
+
cur_p->data[0].id = ctx->vocab->token_eos();
|
|
2546
|
+
}
|
|
2544
2547
|
cur_p->data[0].logit = 1.0f;
|
|
2545
2548
|
|
|
2549
|
+
GGML_ASSERT(cur_p->data[0].id != LLAMA_TOKEN_NULL);
|
|
2550
|
+
|
|
2546
2551
|
return;
|
|
2547
2552
|
}
|
|
2548
2553
|
|
|
@@ -347,6 +347,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
|
347
347
|
case LLAMA_VOCAB_PRE_TYPE_OLMO:
|
|
348
348
|
case LLAMA_VOCAB_PRE_TYPE_JAIS:
|
|
349
349
|
case LLAMA_VOCAB_PRE_TYPE_TRILLION:
|
|
350
|
+
case LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING:
|
|
350
351
|
regex_exprs = {
|
|
351
352
|
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
|
352
353
|
};
|
|
@@ -1772,7 +1773,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1772
1773
|
const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
|
|
1773
1774
|
const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
|
|
1774
1775
|
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
|
|
1775
|
-
#
|
|
1776
|
+
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
1776
1777
|
// correct endiannes of data in precompiled_charsmap binary blob
|
|
1777
1778
|
uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
|
|
1778
1779
|
*xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
|
|
@@ -1961,6 +1962,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1961
1962
|
tokenizer_pre == "trillion") {
|
|
1962
1963
|
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
|
|
1963
1964
|
clean_spaces = false;
|
|
1965
|
+
} else if (
|
|
1966
|
+
tokenizer_pre == "granite-docling") {
|
|
1967
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING;
|
|
1968
|
+
clean_spaces = false;
|
|
1964
1969
|
} else if (
|
|
1965
1970
|
tokenizer_pre == "bailingmoe" ||
|
|
1966
1971
|
tokenizer_pre == "llada-moe") {
|
|
@@ -2166,6 +2171,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
2166
2171
|
|| t.first == "<|end|>"
|
|
2167
2172
|
|| t.first == "<end_of_turn>"
|
|
2168
2173
|
|| t.first == "<|endoftext|>"
|
|
2174
|
+
|| t.first == "<|end_of_text|>" // granite
|
|
2169
2175
|
|| t.first == "<EOT>"
|
|
2170
2176
|
|| t.first == "_<EOT>"
|
|
2171
2177
|
|| t.first == "<|end▁of▁sentence|>" // DeepSeek
|
|
@@ -8,46 +8,47 @@
|
|
|
8
8
|
|
|
9
9
|
// pre-tokenization types
|
|
10
10
|
enum llama_vocab_pre_type {
|
|
11
|
-
LLAMA_VOCAB_PRE_TYPE_DEFAULT
|
|
12
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA3
|
|
13
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM
|
|
14
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER
|
|
15
|
-
LLAMA_VOCAB_PRE_TYPE_FALCON
|
|
16
|
-
LLAMA_VOCAB_PRE_TYPE_MPT
|
|
17
|
-
LLAMA_VOCAB_PRE_TYPE_STARCODER
|
|
18
|
-
LLAMA_VOCAB_PRE_TYPE_GPT2
|
|
19
|
-
LLAMA_VOCAB_PRE_TYPE_REFACT
|
|
20
|
-
LLAMA_VOCAB_PRE_TYPE_COMMAND_R
|
|
21
|
-
LLAMA_VOCAB_PRE_TYPE_STABLELM2
|
|
22
|
-
LLAMA_VOCAB_PRE_TYPE_QWEN2
|
|
23
|
-
LLAMA_VOCAB_PRE_TYPE_OLMO
|
|
24
|
-
LLAMA_VOCAB_PRE_TYPE_DBRX
|
|
25
|
-
LLAMA_VOCAB_PRE_TYPE_SMAUG
|
|
26
|
-
LLAMA_VOCAB_PRE_TYPE_PORO
|
|
27
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM3
|
|
28
|
-
LLAMA_VOCAB_PRE_TYPE_CHATGLM4
|
|
29
|
-
LLAMA_VOCAB_PRE_TYPE_VIKING
|
|
30
|
-
LLAMA_VOCAB_PRE_TYPE_JAIS
|
|
31
|
-
LLAMA_VOCAB_PRE_TYPE_TEKKEN
|
|
32
|
-
LLAMA_VOCAB_PRE_TYPE_SMOLLM
|
|
33
|
-
LLAMA_VOCAB_PRE_TYPE_CODESHELL
|
|
34
|
-
LLAMA_VOCAB_PRE_TYPE_BLOOM
|
|
35
|
-
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH
|
|
36
|
-
LLAMA_VOCAB_PRE_TYPE_EXAONE
|
|
37
|
-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON
|
|
38
|
-
LLAMA_VOCAB_PRE_TYPE_MINERVA
|
|
39
|
-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM
|
|
40
|
-
LLAMA_VOCAB_PRE_TYPE_GPT4O
|
|
41
|
-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE
|
|
42
|
-
LLAMA_VOCAB_PRE_TYPE_TRILLION
|
|
43
|
-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE
|
|
44
|
-
LLAMA_VOCAB_PRE_TYPE_LLAMA4
|
|
45
|
-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL
|
|
46
|
-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER
|
|
47
|
-
LLAMA_VOCAB_PRE_TYPE_HUNYUAN
|
|
48
|
-
LLAMA_VOCAB_PRE_TYPE_KIMI_K2
|
|
49
|
-
LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE
|
|
50
|
-
LLAMA_VOCAB_PRE_TYPE_GROK_2
|
|
11
|
+
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
|
|
12
|
+
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
|
|
13
|
+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
|
|
14
|
+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
|
|
15
|
+
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
|
|
16
|
+
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
|
|
17
|
+
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
|
|
18
|
+
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
|
19
|
+
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
|
20
|
+
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
|
21
|
+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
|
22
|
+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
|
23
|
+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
|
24
|
+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
|
25
|
+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
|
26
|
+
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
|
27
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
|
|
28
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
|
29
|
+
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
|
30
|
+
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
|
31
|
+
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
|
|
32
|
+
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
|
|
33
|
+
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
|
|
34
|
+
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
|
35
|
+
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
|
36
|
+
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
|
37
|
+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
|
38
|
+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
39
|
+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
40
|
+
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
41
|
+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
|
|
42
|
+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
|
|
43
|
+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
|
|
44
|
+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
|
|
45
|
+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
|
|
46
|
+
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
|
|
47
|
+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
|
|
48
|
+
LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37,
|
|
49
|
+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38,
|
|
50
|
+
LLAMA_VOCAB_PRE_TYPE_GROK_2 = 39,
|
|
51
|
+
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
|
|
51
52
|
};
|
|
52
53
|
|
|
53
54
|
struct LLM_KV;
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include <string>
|
|
5
5
|
#include <vector>
|
|
6
6
|
|
|
7
|
+
// TODO: reimplement this structure in endian-independent way
|
|
7
8
|
struct unicode_cpt_flags {
|
|
8
9
|
enum {
|
|
9
10
|
UNDEFINED = 0x0001,
|
|
@@ -15,6 +16,10 @@ struct unicode_cpt_flags {
|
|
|
15
16
|
SYMBOL = 0x0040, // regex: \p{S}
|
|
16
17
|
CONTROL = 0x0080, // regex: \p{C}
|
|
17
18
|
MASK_CATEGORIES = 0x00FF,
|
|
19
|
+
WHITESPACE = 0x0100,
|
|
20
|
+
LOWERCASE = 0x0200,
|
|
21
|
+
UPPERCASE = 0x0400,
|
|
22
|
+
NFD = 0x0800,
|
|
18
23
|
};
|
|
19
24
|
|
|
20
25
|
// codepoint type
|
|
@@ -34,11 +39,49 @@ struct unicode_cpt_flags {
|
|
|
34
39
|
|
|
35
40
|
// decode from uint16
|
|
36
41
|
inline unicode_cpt_flags(const uint16_t flags = 0) {
|
|
42
|
+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
37
43
|
*reinterpret_cast<uint16_t*>(this) = flags;
|
|
44
|
+
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
45
|
+
is_undefined = (flags & UNDEFINED) ? 1 : 0;
|
|
46
|
+
is_number = (flags & NUMBER) ? 1 : 0;
|
|
47
|
+
is_letter = (flags & LETTER) ? 1 : 0;
|
|
48
|
+
is_separator = (flags & SEPARATOR) ? 1 : 0;
|
|
49
|
+
is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0;
|
|
50
|
+
is_punctuation = (flags & PUNCTUATION) ? 1 : 0;
|
|
51
|
+
is_symbol = (flags & SYMBOL) ? 1 : 0;
|
|
52
|
+
is_control = (flags & CONTROL) ? 1 : 0;
|
|
53
|
+
is_whitespace = (flags & WHITESPACE) ? 1 : 0;
|
|
54
|
+
is_lowercase = (flags & LOWERCASE) ? 1 : 0;
|
|
55
|
+
is_uppercase = (flags & UPPERCASE) ? 1 : 0;
|
|
56
|
+
is_nfd = (flags & NFD) ? 1 : 0;
|
|
57
|
+
#else
|
|
58
|
+
#error Unexpected or undefined __BYTE_ORDER__
|
|
59
|
+
#endif
|
|
38
60
|
}
|
|
39
61
|
|
|
40
62
|
inline uint16_t as_uint() const {
|
|
63
|
+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
41
64
|
return *reinterpret_cast<const uint16_t*>(this);
|
|
65
|
+
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
66
|
+
uint16_t result =
|
|
67
|
+
is_undefined * UNDEFINED
|
|
68
|
+
+ is_number * NUMBER
|
|
69
|
+
+ is_letter * LETTER
|
|
70
|
+
+ is_separator * SEPARATOR
|
|
71
|
+
+ is_accent_mark * ACCENT_MARK
|
|
72
|
+
+ is_punctuation * PUNCTUATION
|
|
73
|
+
+ is_symbol * SYMBOL
|
|
74
|
+
+ is_control * CONTROL
|
|
75
|
+
+ is_whitespace * WHITESPACE
|
|
76
|
+
+ is_lowercase * LOWERCASE
|
|
77
|
+
+ is_uppercase * UPPERCASE
|
|
78
|
+
+ is_nfd * NFD
|
|
79
|
+
;
|
|
80
|
+
|
|
81
|
+
return result;
|
|
82
|
+
#else
|
|
83
|
+
#error Unexpected or undefined __BYTE_ORDER__
|
|
84
|
+
#endif
|
|
42
85
|
}
|
|
43
86
|
|
|
44
87
|
inline uint16_t category_flag() const {
|