@fugood/llama.node 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/llama.cpp/CMakeLists.txt +0 -1
- package/src/llama.cpp/common/CMakeLists.txt +4 -5
- package/src/llama.cpp/common/arg.cpp +44 -0
- package/src/llama.cpp/common/common.cpp +22 -6
- package/src/llama.cpp/common/common.h +15 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +10 -2
- package/src/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/src/llama.cpp/ggml/include/ggml.h +104 -10
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +12 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +749 -163
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +12 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +88 -9
- package/src/llama.cpp/include/llama.h +13 -47
- package/src/llama.cpp/src/llama-arch.cpp +298 -3
- package/src/llama.cpp/src/llama-arch.h +22 -1
- package/src/llama.cpp/src/llama-batch.cpp +103 -71
- package/src/llama.cpp/src/llama-batch.h +31 -18
- package/src/llama.cpp/src/llama-chat.cpp +59 -1
- package/src/llama.cpp/src/llama-chat.h +3 -0
- package/src/llama.cpp/src/llama-context.cpp +134 -95
- package/src/llama.cpp/src/llama-context.h +13 -16
- package/src/llama.cpp/src/llama-cparams.h +3 -2
- package/src/llama.cpp/src/llama-graph.cpp +279 -180
- package/src/llama.cpp/src/llama-graph.h +183 -122
- package/src/llama.cpp/src/llama-hparams.cpp +47 -1
- package/src/llama.cpp/src/llama-hparams.h +12 -1
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/src/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/src/llama.cpp/src/llama-kv-cells.h +62 -10
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +21 -11
- package/src/llama.cpp/src/llama-memory.cpp +17 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +3373 -743
- package/src/llama.cpp/src/llama-model.h +20 -4
- package/src/llama.cpp/src/llama-quant.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +376 -10
- package/src/llama.cpp/src/llama-vocab.h +43 -0
- package/src/llama.cpp/src/unicode.cpp +207 -0
- package/src/llama.cpp/src/unicode.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
|
@@ -34,6 +34,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
34
34
|
{ LLM_ARCH_PHI3, "phi3" },
|
|
35
35
|
{ LLM_ARCH_PHIMOE, "phimoe" },
|
|
36
36
|
{ LLM_ARCH_PLAMO, "plamo" },
|
|
37
|
+
{ LLM_ARCH_PLAMO2, "plamo2" },
|
|
37
38
|
{ LLM_ARCH_CODESHELL, "codeshell" },
|
|
38
39
|
{ LLM_ARCH_ORION, "orion" },
|
|
39
40
|
{ LLM_ARCH_INTERNLM2, "internlm2" },
|
|
@@ -45,6 +46,9 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
45
46
|
{ LLM_ARCH_GEMMA3N, "gemma3n" },
|
|
46
47
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
|
47
48
|
{ LLM_ARCH_MAMBA, "mamba" },
|
|
49
|
+
{ LLM_ARCH_MAMBA2, "mamba2" },
|
|
50
|
+
{ LLM_ARCH_JAMBA, "jamba" },
|
|
51
|
+
{ LLM_ARCH_FALCON_H1, "falcon-h1" },
|
|
48
52
|
{ LLM_ARCH_XVERSE, "xverse" },
|
|
49
53
|
{ LLM_ARCH_COMMAND_R, "command-r" },
|
|
50
54
|
{ LLM_ARCH_COHERE2, "cohere2" },
|
|
@@ -64,12 +68,14 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
64
68
|
{ LLM_ARCH_JAIS, "jais" },
|
|
65
69
|
{ LLM_ARCH_NEMOTRON, "nemotron" },
|
|
66
70
|
{ LLM_ARCH_EXAONE, "exaone" },
|
|
71
|
+
{ LLM_ARCH_EXAONE4, "exaone4" },
|
|
67
72
|
{ LLM_ARCH_RWKV6, "rwkv6" },
|
|
68
73
|
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
|
|
69
74
|
{ LLM_ARCH_RWKV7, "rwkv7" },
|
|
70
75
|
{ LLM_ARCH_ARWKV7, "arwkv7" },
|
|
71
76
|
{ LLM_ARCH_GRANITE, "granite" },
|
|
72
77
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
|
78
|
+
{ LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
|
|
73
79
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
|
74
80
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
|
75
81
|
{ LLM_ARCH_PLM, "plm" },
|
|
@@ -77,6 +83,11 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
77
83
|
{ LLM_ARCH_DOTS1, "dots1" },
|
|
78
84
|
{ LLM_ARCH_ARCEE, "arcee" },
|
|
79
85
|
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
|
|
86
|
+
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
|
|
87
|
+
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
|
|
88
|
+
{ LLM_ARCH_SMOLLM3, "smollm3" },
|
|
89
|
+
{ LLM_ARCH_LFM2, "lfm2" },
|
|
90
|
+
{ LLM_ARCH_DREAM, "dream" },
|
|
80
91
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
|
81
92
|
};
|
|
82
93
|
|
|
@@ -149,7 +160,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
149
160
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
|
150
161
|
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
|
151
162
|
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
|
152
|
-
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
|
|
153
163
|
|
|
154
164
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
|
155
165
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
|
@@ -170,6 +180,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
170
180
|
{ LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
|
|
171
181
|
{ LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
|
|
172
182
|
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
|
|
183
|
+
{ LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
|
|
173
184
|
{ LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
|
|
174
185
|
|
|
175
186
|
{ LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
|
|
@@ -182,6 +193,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
182
193
|
|
|
183
194
|
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
|
|
184
195
|
|
|
196
|
+
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
|
|
197
|
+
|
|
185
198
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
|
186
199
|
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
|
187
200
|
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
|
|
@@ -775,6 +788,36 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
775
788
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
776
789
|
},
|
|
777
790
|
},
|
|
791
|
+
{
|
|
792
|
+
LLM_ARCH_PLAMO2,
|
|
793
|
+
{
|
|
794
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
795
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
796
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
797
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
798
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
799
|
+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
|
800
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
801
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
802
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
803
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
|
804
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
805
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
806
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
807
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
808
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
809
|
+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
|
|
810
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
811
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
812
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
813
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
814
|
+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
|
|
815
|
+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
|
|
816
|
+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
|
|
817
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
818
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
819
|
+
},
|
|
820
|
+
},
|
|
778
821
|
{
|
|
779
822
|
LLM_ARCH_CODESHELL,
|
|
780
823
|
{
|
|
@@ -1004,6 +1047,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1004
1047
|
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1005
1048
|
},
|
|
1006
1049
|
},
|
|
1050
|
+
{
|
|
1051
|
+
LLM_ARCH_MAMBA2,
|
|
1052
|
+
{
|
|
1053
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1054
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1055
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1056
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1057
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1058
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1059
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1060
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1061
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1062
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1063
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1064
|
+
},
|
|
1065
|
+
},
|
|
1066
|
+
{
|
|
1067
|
+
LLM_ARCH_JAMBA,
|
|
1068
|
+
{
|
|
1069
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1070
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1071
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1072
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1073
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1074
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1075
|
+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
|
|
1076
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1077
|
+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
|
|
1078
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1079
|
+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
|
|
1080
|
+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
|
|
1081
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1082
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1083
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1084
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1085
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1086
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1087
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1088
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1089
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1090
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1091
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1092
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1093
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1094
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1095
|
+
},
|
|
1096
|
+
},
|
|
1097
|
+
{
|
|
1098
|
+
LLM_ARCH_FALCON_H1,
|
|
1099
|
+
{
|
|
1100
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1101
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1102
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1103
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1104
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1105
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1106
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1107
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1108
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1109
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1110
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1111
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1112
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1113
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1114
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1115
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1116
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1117
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1118
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1119
|
+
},
|
|
1120
|
+
},
|
|
1007
1121
|
{
|
|
1008
1122
|
LLM_ARCH_XVERSE,
|
|
1009
1123
|
{
|
|
@@ -1397,6 +1511,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1397
1511
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1398
1512
|
},
|
|
1399
1513
|
},
|
|
1514
|
+
{
|
|
1515
|
+
LLM_ARCH_EXAONE4,
|
|
1516
|
+
{
|
|
1517
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1518
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1519
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1520
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
1521
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1522
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1523
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1524
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1525
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1526
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1527
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
1528
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1529
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1530
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1531
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
1532
|
+
}
|
|
1533
|
+
},
|
|
1400
1534
|
{
|
|
1401
1535
|
LLM_ARCH_RWKV6,
|
|
1402
1536
|
{
|
|
@@ -1564,6 +1698,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1564
1698
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1565
1699
|
},
|
|
1566
1700
|
},
|
|
1701
|
+
{
|
|
1702
|
+
LLM_ARCH_GRANITE_HYBRID,
|
|
1703
|
+
{
|
|
1704
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1705
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1706
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1707
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1708
|
+
// mamba(2) ssm layers
|
|
1709
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1710
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1711
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1712
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1713
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1714
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1715
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1716
|
+
// attention layers
|
|
1717
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1718
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1719
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1720
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1721
|
+
// dense FFN
|
|
1722
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1723
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1724
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1725
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1726
|
+
// moe FFN
|
|
1727
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1728
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1729
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1730
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1731
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1732
|
+
// shared expert
|
|
1733
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1734
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1735
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1736
|
+
},
|
|
1737
|
+
},
|
|
1567
1738
|
{
|
|
1568
1739
|
LLM_ARCH_CHAMELEON,
|
|
1569
1740
|
{
|
|
@@ -1676,12 +1847,115 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1676
1847
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1677
1848
|
},
|
|
1678
1849
|
},
|
|
1850
|
+
{
|
|
1851
|
+
LLM_ARCH_ERNIE4_5_MOE,
|
|
1852
|
+
{
|
|
1853
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1854
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1855
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1856
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1857
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1858
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1859
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1860
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1861
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1862
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1863
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1864
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1865
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1866
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1867
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1868
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1869
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1870
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1871
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1872
|
+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
|
1873
|
+
},
|
|
1874
|
+
},
|
|
1875
|
+
{
|
|
1876
|
+
LLM_ARCH_HUNYUAN_MOE,
|
|
1877
|
+
{
|
|
1878
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1879
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1880
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1881
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1882
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1883
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1884
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1885
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1886
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1887
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1888
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1889
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1890
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1891
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1892
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1893
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1894
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1895
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1896
|
+
},
|
|
1897
|
+
},
|
|
1898
|
+
{
|
|
1899
|
+
LLM_ARCH_SMOLLM3,
|
|
1900
|
+
{
|
|
1901
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1902
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1903
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1904
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1905
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1906
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1907
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1908
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1909
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1910
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1911
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1912
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1913
|
+
},
|
|
1914
|
+
},
|
|
1915
|
+
{
|
|
1916
|
+
LLM_ARCH_LFM2,
|
|
1917
|
+
{
|
|
1918
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1919
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1920
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1921
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1922
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1923
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1924
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1925
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1926
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1927
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1928
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1929
|
+
{ LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
|
|
1930
|
+
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
|
|
1931
|
+
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
|
|
1932
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1933
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
|
1934
|
+
}
|
|
1935
|
+
},
|
|
1679
1936
|
{
|
|
1680
1937
|
LLM_ARCH_UNKNOWN,
|
|
1681
1938
|
{
|
|
1682
1939
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1683
1940
|
},
|
|
1684
1941
|
},
|
|
1942
|
+
{
|
|
1943
|
+
LLM_ARCH_DREAM,
|
|
1944
|
+
{
|
|
1945
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1946
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1947
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1948
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1949
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1950
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1951
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1952
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1953
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1954
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1955
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1956
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1957
|
+
},
|
|
1958
|
+
},
|
|
1685
1959
|
};
|
|
1686
1960
|
|
|
1687
1961
|
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
@@ -1760,7 +2034,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1760
2034
|
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
|
|
1761
2035
|
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
|
1762
2036
|
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
|
|
2037
|
+
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2038
|
+
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2039
|
+
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1763
2040
|
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2041
|
+
{LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1764
2042
|
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1765
2043
|
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1766
2044
|
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
@@ -1839,6 +2117,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1839
2117
|
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1840
2118
|
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1841
2119
|
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2120
|
+
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
|
2121
|
+
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2122
|
+
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1842
2123
|
};
|
|
1843
2124
|
|
|
1844
2125
|
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
|
|
@@ -1894,6 +2175,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
|
|
|
1894
2175
|
bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
1895
2176
|
switch (arch) {
|
|
1896
2177
|
case LLM_ARCH_MAMBA:
|
|
2178
|
+
case LLM_ARCH_MAMBA2:
|
|
1897
2179
|
case LLM_ARCH_RWKV6:
|
|
1898
2180
|
case LLM_ARCH_RWKV6QWEN2:
|
|
1899
2181
|
case LLM_ARCH_RWKV7:
|
|
@@ -1905,9 +2187,22 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
|
1905
2187
|
}
|
|
1906
2188
|
|
|
1907
2189
|
bool llm_arch_is_hybrid(const llm_arch & arch) {
|
|
1908
|
-
// TODO: There are currently no hybrid models! Once there are, this will be
|
|
1909
|
-
// the place to identify them
|
|
1910
2190
|
switch (arch) {
|
|
2191
|
+
case LLM_ARCH_JAMBA:
|
|
2192
|
+
case LLM_ARCH_FALCON_H1:
|
|
2193
|
+
case LLM_ARCH_PLAMO2:
|
|
2194
|
+
case LLM_ARCH_GRANITE_HYBRID:
|
|
2195
|
+
case LLM_ARCH_LFM2:
|
|
2196
|
+
return true;
|
|
2197
|
+
default:
|
|
2198
|
+
return false;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
bool llm_arch_is_diffusion(const llm_arch & arch) {
|
|
2203
|
+
switch (arch) {
|
|
2204
|
+
case LLM_ARCH_DREAM:
|
|
2205
|
+
return true;
|
|
1911
2206
|
default:
|
|
1912
2207
|
return false;
|
|
1913
2208
|
}
|
|
@@ -38,6 +38,7 @@ enum llm_arch {
|
|
|
38
38
|
LLM_ARCH_PHI3,
|
|
39
39
|
LLM_ARCH_PHIMOE,
|
|
40
40
|
LLM_ARCH_PLAMO,
|
|
41
|
+
LLM_ARCH_PLAMO2,
|
|
41
42
|
LLM_ARCH_CODESHELL,
|
|
42
43
|
LLM_ARCH_ORION,
|
|
43
44
|
LLM_ARCH_INTERNLM2,
|
|
@@ -49,6 +50,9 @@ enum llm_arch {
|
|
|
49
50
|
LLM_ARCH_GEMMA3N,
|
|
50
51
|
LLM_ARCH_STARCODER2,
|
|
51
52
|
LLM_ARCH_MAMBA,
|
|
53
|
+
LLM_ARCH_MAMBA2,
|
|
54
|
+
LLM_ARCH_JAMBA,
|
|
55
|
+
LLM_ARCH_FALCON_H1,
|
|
52
56
|
LLM_ARCH_XVERSE,
|
|
53
57
|
LLM_ARCH_COMMAND_R,
|
|
54
58
|
LLM_ARCH_COHERE2,
|
|
@@ -68,12 +72,14 @@ enum llm_arch {
|
|
|
68
72
|
LLM_ARCH_JAIS,
|
|
69
73
|
LLM_ARCH_NEMOTRON,
|
|
70
74
|
LLM_ARCH_EXAONE,
|
|
75
|
+
LLM_ARCH_EXAONE4,
|
|
71
76
|
LLM_ARCH_RWKV6,
|
|
72
77
|
LLM_ARCH_RWKV6QWEN2,
|
|
73
78
|
LLM_ARCH_RWKV7,
|
|
74
79
|
LLM_ARCH_ARWKV7,
|
|
75
80
|
LLM_ARCH_GRANITE,
|
|
76
81
|
LLM_ARCH_GRANITE_MOE,
|
|
82
|
+
LLM_ARCH_GRANITE_HYBRID,
|
|
77
83
|
LLM_ARCH_CHAMELEON,
|
|
78
84
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
|
79
85
|
LLM_ARCH_PLM,
|
|
@@ -81,6 +87,11 @@ enum llm_arch {
|
|
|
81
87
|
LLM_ARCH_DOTS1,
|
|
82
88
|
LLM_ARCH_ARCEE,
|
|
83
89
|
LLM_ARCH_ERNIE4_5,
|
|
90
|
+
LLM_ARCH_ERNIE4_5_MOE,
|
|
91
|
+
LLM_ARCH_HUNYUAN_MOE,
|
|
92
|
+
LLM_ARCH_SMOLLM3,
|
|
93
|
+
LLM_ARCH_LFM2,
|
|
94
|
+
LLM_ARCH_DREAM,
|
|
84
95
|
LLM_ARCH_UNKNOWN,
|
|
85
96
|
};
|
|
86
97
|
|
|
@@ -153,7 +164,6 @@ enum llm_kv {
|
|
|
153
164
|
LLM_KV_ATTENTION_SCALE,
|
|
154
165
|
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
|
155
166
|
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
|
156
|
-
LLM_KV_ATTENTION_LAYER_INDICES,
|
|
157
167
|
|
|
158
168
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
|
159
169
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
|
@@ -174,6 +184,7 @@ enum llm_kv {
|
|
|
174
184
|
LLM_KV_SSM_CONV_KERNEL,
|
|
175
185
|
LLM_KV_SSM_STATE_SIZE,
|
|
176
186
|
LLM_KV_SSM_TIME_STEP_RANK,
|
|
187
|
+
LLM_KV_SSM_GROUP_COUNT,
|
|
177
188
|
LLM_KV_SSM_DT_B_C_RMS,
|
|
178
189
|
|
|
179
190
|
LLM_KV_WKV_HEAD_SIZE,
|
|
@@ -221,6 +232,8 @@ enum llm_kv {
|
|
|
221
232
|
|
|
222
233
|
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
|
|
223
234
|
|
|
235
|
+
LLM_KV_SHORTCONV_L_CACHE,
|
|
236
|
+
|
|
224
237
|
// deprecated:
|
|
225
238
|
LLM_KV_TOKENIZER_PREFIX_ID,
|
|
226
239
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
|
@@ -291,8 +304,12 @@ enum llm_tensor {
|
|
|
291
304
|
LLM_TENSOR_SSM_CONV1D,
|
|
292
305
|
LLM_TENSOR_SSM_X,
|
|
293
306
|
LLM_TENSOR_SSM_DT,
|
|
307
|
+
LLM_TENSOR_SSM_DT_NORM,
|
|
294
308
|
LLM_TENSOR_SSM_A,
|
|
309
|
+
LLM_TENSOR_SSM_B_NORM,
|
|
310
|
+
LLM_TENSOR_SSM_C_NORM,
|
|
295
311
|
LLM_TENSOR_SSM_D,
|
|
312
|
+
LLM_TENSOR_SSM_NORM,
|
|
296
313
|
LLM_TENSOR_SSM_OUT,
|
|
297
314
|
LLM_TENSOR_TIME_MIX_W0,
|
|
298
315
|
LLM_TENSOR_TIME_MIX_W1,
|
|
@@ -386,6 +403,9 @@ enum llm_tensor {
|
|
|
386
403
|
LLM_TENSOR_POS_NET_ATTN_K,
|
|
387
404
|
LLM_TENSOR_POS_NET_ATTN_V,
|
|
388
405
|
LLM_TENSOR_POS_NET_ATTN_OUT,
|
|
406
|
+
LLM_TENSOR_SHORTCONV_CONV,
|
|
407
|
+
LLM_TENSOR_SHORTCONV_INPROJ,
|
|
408
|
+
LLM_TENSOR_SHORTCONV_OUTPROJ,
|
|
389
409
|
};
|
|
390
410
|
|
|
391
411
|
enum llm_tensor_layer {
|
|
@@ -462,3 +482,4 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|
|
|
462
482
|
|
|
463
483
|
bool llm_arch_is_recurrent(const llm_arch & arch);
|
|
464
484
|
bool llm_arch_is_hybrid (const llm_arch & arch);
|
|
485
|
+
bool llm_arch_is_diffusion(const llm_arch & arch);
|