cui-llama.rn 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +4 -23
  2. package/android/build.gradle +12 -3
  3. package/android/src/main/CMakeLists.txt +13 -7
  4. package/android/src/main/java/com/rnllama/LlamaContext.java +27 -20
  5. package/android/src/main/java/com/rnllama/RNLlama.java +5 -1
  6. package/android/src/main/jni.cpp +15 -12
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/cpp/README.md +1 -1
  16. package/cpp/common.cpp +158 -267
  17. package/cpp/common.h +46 -12
  18. package/cpp/ggml-alloc.c +1042 -1037
  19. package/cpp/ggml-backend-impl.h +255 -256
  20. package/cpp/ggml-backend-reg.cpp +582 -582
  21. package/cpp/ggml-backend.cpp +2002 -2002
  22. package/cpp/ggml-backend.h +354 -352
  23. package/cpp/ggml-common.h +1853 -1853
  24. package/cpp/ggml-cpp.h +39 -39
  25. package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
  26. package/cpp/ggml-cpu-aarch64.h +8 -8
  27. package/cpp/ggml-cpu-impl.h +386 -386
  28. package/cpp/ggml-cpu-quants.c +10920 -10839
  29. package/cpp/ggml-cpu-traits.cpp +36 -36
  30. package/cpp/ggml-cpu-traits.h +38 -38
  31. package/cpp/ggml-cpu.c +329 -60
  32. package/cpp/ggml-cpu.cpp +10 -2
  33. package/cpp/ggml-cpu.h +135 -135
  34. package/cpp/ggml-impl.h +567 -567
  35. package/cpp/ggml-metal-impl.h +17 -17
  36. package/cpp/ggml-metal.m +4884 -4884
  37. package/cpp/ggml-quants.c +5238 -5238
  38. package/cpp/ggml-threading.h +14 -14
  39. package/cpp/ggml.c +6514 -6448
  40. package/cpp/ggml.h +2194 -2163
  41. package/cpp/gguf.cpp +1329 -1325
  42. package/cpp/gguf.h +202 -202
  43. package/cpp/json-schema-to-grammar.cpp +1045 -1045
  44. package/cpp/json-schema-to-grammar.h +8 -8
  45. package/cpp/json.hpp +24766 -24766
  46. package/cpp/llama-adapter.cpp +347 -346
  47. package/cpp/llama-adapter.h +74 -73
  48. package/cpp/llama-arch.cpp +1487 -1434
  49. package/cpp/llama-arch.h +400 -395
  50. package/cpp/llama-batch.cpp +368 -368
  51. package/cpp/llama-batch.h +88 -88
  52. package/cpp/llama-chat.cpp +578 -567
  53. package/cpp/llama-chat.h +52 -51
  54. package/cpp/llama-context.cpp +1775 -1771
  55. package/cpp/llama-context.h +128 -128
  56. package/cpp/llama-cparams.cpp +1 -1
  57. package/cpp/llama-cparams.h +37 -37
  58. package/cpp/llama-cpp.h +30 -30
  59. package/cpp/llama-grammar.cpp +1139 -1139
  60. package/cpp/llama-grammar.h +143 -143
  61. package/cpp/llama-hparams.cpp +71 -71
  62. package/cpp/llama-hparams.h +139 -140
  63. package/cpp/llama-impl.cpp +167 -167
  64. package/cpp/llama-impl.h +61 -61
  65. package/cpp/llama-kv-cache.cpp +718 -718
  66. package/cpp/llama-kv-cache.h +218 -218
  67. package/cpp/llama-mmap.cpp +2 -1
  68. package/cpp/llama-mmap.h +67 -67
  69. package/cpp/llama-model-loader.cpp +1124 -1011
  70. package/cpp/llama-model-loader.h +167 -158
  71. package/cpp/llama-model.cpp +3997 -2202
  72. package/cpp/llama-model.h +370 -391
  73. package/cpp/llama-sampling.cpp +2408 -2406
  74. package/cpp/llama-sampling.h +32 -48
  75. package/cpp/llama-vocab.cpp +3247 -1982
  76. package/cpp/llama-vocab.h +125 -182
  77. package/cpp/llama.cpp +416 -2886
  78. package/cpp/llama.h +1323 -1285
  79. package/cpp/log.cpp +401 -401
  80. package/cpp/log.h +121 -121
  81. package/cpp/rn-llama.cpp +822 -0
  82. package/cpp/rn-llama.h +123 -0
  83. package/cpp/rn-llama.hpp +18 -12
  84. package/cpp/sampling.cpp +505 -500
  85. package/cpp/sgemm.cpp +2597 -2597
  86. package/cpp/speculative.cpp +277 -274
  87. package/cpp/speculative.h +28 -28
  88. package/cpp/unicode.cpp +2 -3
  89. package/ios/CMakeLists.txt +99 -0
  90. package/ios/RNLlama.h +5 -1
  91. package/ios/RNLlama.mm +2 -2
  92. package/ios/RNLlamaContext.h +8 -1
  93. package/ios/RNLlamaContext.mm +15 -11
  94. package/ios/rnllama.xcframework/Info.plist +74 -0
  95. package/jest/mock.js +3 -2
  96. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  97. package/lib/commonjs/index.js +4 -2
  98. package/lib/commonjs/index.js.map +1 -1
  99. package/lib/module/NativeRNLlama.js.map +1 -1
  100. package/lib/module/index.js +4 -2
  101. package/lib/module/index.js.map +1 -1
  102. package/lib/typescript/NativeRNLlama.d.ts +5 -1
  103. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  104. package/lib/typescript/index.d.ts.map +1 -1
  105. package/llama-rn.podspec +8 -2
  106. package/package.json +5 -2
  107. package/src/NativeRNLlama.ts +5 -1
  108. package/src/index.ts +9 -2
@@ -1,1434 +1,1487 @@
1
- #include "llama-arch.h"
2
-
3
- #include "llama-impl.h"
4
-
5
- #include <map>
6
-
7
- static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
- { LLM_ARCH_LLAMA, "llama" },
9
- { LLM_ARCH_DECI, "deci" },
10
- { LLM_ARCH_FALCON, "falcon" },
11
- { LLM_ARCH_GROK, "grok" },
12
- { LLM_ARCH_GPT2, "gpt2" },
13
- { LLM_ARCH_GPTJ, "gptj" },
14
- { LLM_ARCH_GPTNEOX, "gptneox" },
15
- { LLM_ARCH_MPT, "mpt" },
16
- { LLM_ARCH_BAICHUAN, "baichuan" },
17
- { LLM_ARCH_STARCODER, "starcoder" },
18
- { LLM_ARCH_REFACT, "refact" },
19
- { LLM_ARCH_BERT, "bert" },
20
- { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
- { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
- { LLM_ARCH_BLOOM, "bloom" },
23
- { LLM_ARCH_STABLELM, "stablelm" },
24
- { LLM_ARCH_QWEN, "qwen" },
25
- { LLM_ARCH_QWEN2, "qwen2" },
26
- { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
- { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
- { LLM_ARCH_PHI2, "phi2" },
29
- { LLM_ARCH_PHI3, "phi3" },
30
- { LLM_ARCH_PLAMO, "plamo" },
31
- { LLM_ARCH_CODESHELL, "codeshell" },
32
- { LLM_ARCH_ORION, "orion" },
33
- { LLM_ARCH_INTERNLM2, "internlm2" },
34
- { LLM_ARCH_MINICPM, "minicpm" },
35
- { LLM_ARCH_MINICPM3, "minicpm3" },
36
- { LLM_ARCH_GEMMA, "gemma" },
37
- { LLM_ARCH_GEMMA2, "gemma2" },
38
- { LLM_ARCH_STARCODER2, "starcoder2" },
39
- { LLM_ARCH_MAMBA, "mamba" },
40
- { LLM_ARCH_XVERSE, "xverse" },
41
- { LLM_ARCH_COMMAND_R, "command-r" },
42
- { LLM_ARCH_COHERE2, "cohere2" },
43
- { LLM_ARCH_DBRX, "dbrx" },
44
- { LLM_ARCH_OLMO, "olmo" },
45
- { LLM_ARCH_OLMO2, "olmo2" },
46
- { LLM_ARCH_OLMOE, "olmoe" },
47
- { LLM_ARCH_OPENELM, "openelm" },
48
- { LLM_ARCH_ARCTIC, "arctic" },
49
- { LLM_ARCH_DEEPSEEK, "deepseek" },
50
- { LLM_ARCH_DEEPSEEK2, "deepseek2" },
51
- { LLM_ARCH_CHATGLM, "chatglm" },
52
- { LLM_ARCH_BITNET, "bitnet" },
53
- { LLM_ARCH_T5, "t5" },
54
- { LLM_ARCH_T5ENCODER, "t5encoder" },
55
- { LLM_ARCH_JAIS, "jais" },
56
- { LLM_ARCH_NEMOTRON, "nemotron" },
57
- { LLM_ARCH_EXAONE, "exaone" },
58
- { LLM_ARCH_RWKV6, "rwkv6" },
59
- { LLM_ARCH_GRANITE, "granite" },
60
- { LLM_ARCH_GRANITE_MOE, "granitemoe" },
61
- { LLM_ARCH_CHAMELEON, "chameleon" },
62
- { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
63
- { LLM_ARCH_UNKNOWN, "(unknown)" },
64
- };
65
-
66
- static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
67
- { LLM_KV_GENERAL_TYPE, "general.type" },
68
- { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
69
- { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
70
- { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
71
- { LLM_KV_GENERAL_NAME, "general.name" },
72
- { LLM_KV_GENERAL_AUTHOR, "general.author" },
73
- { LLM_KV_GENERAL_VERSION, "general.version" },
74
- { LLM_KV_GENERAL_URL, "general.url" },
75
- { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
76
- { LLM_KV_GENERAL_LICENSE, "general.license" },
77
- { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
78
- { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
79
-
80
- { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
81
- { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
82
- { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
83
- { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
84
- { LLM_KV_BLOCK_COUNT, "%s.block_count" },
85
- { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
86
- { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
87
- { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
88
- { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
89
- { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
90
- { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
91
- { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
92
- { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
93
- { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
94
- { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
95
- { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
96
- { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
97
- { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
98
- { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
99
- { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
100
- { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
101
- { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
102
- { LLM_KV_SWIN_NORM, "%s.swin_norm" },
103
- { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
104
- { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
105
- { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
106
- { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
107
- { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
108
-
109
- { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
110
- { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
111
- { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
112
- { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
113
- { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
114
- { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
115
- { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
116
- { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
117
- { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
118
- { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
119
- { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
120
- { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
121
- { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
122
- { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
123
- { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
124
- { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
125
-
126
- { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
127
- { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
128
- { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
129
- { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
130
- { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
131
- { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
132
- { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
133
- { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
134
- { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
135
- { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
136
-
137
- { LLM_KV_SPLIT_NO, "split.no" },
138
- { LLM_KV_SPLIT_COUNT, "split.count" },
139
- { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
140
-
141
- { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
142
- { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
143
- { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
144
- { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
145
- { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
146
-
147
- { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
148
-
149
- { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
150
- { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
151
-
152
- { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
153
- { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
154
-
155
- { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
156
- { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
157
- { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
158
- { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
159
- { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
160
- { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
161
- { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
162
- { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
163
- { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
164
- { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
165
- { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
166
- { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
167
- { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
168
- { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
169
- { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
170
- { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
171
- { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
172
- { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
173
- { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
174
- { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
175
- { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
176
- { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
177
- { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
178
- { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
179
- { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
180
- { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
181
- { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
182
- { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
183
- { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
184
-
185
- { LLM_KV_ADAPTER_TYPE, "adapter.type" },
186
- { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
187
-
188
- // deprecated
189
- { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
190
- { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
191
- { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
192
- };
193
-
194
- static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
195
- {
196
- LLM_ARCH_LLAMA,
197
- {
198
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
199
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
200
- { LLM_TENSOR_OUTPUT, "output" },
201
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
202
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
203
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
204
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
205
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
206
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
207
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
208
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
209
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
210
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
211
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
212
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
213
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
214
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
215
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
216
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
217
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
218
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
219
- },
220
- },
221
- {
222
- LLM_ARCH_DECI,
223
- {
224
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
225
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
226
- { LLM_TENSOR_OUTPUT, "output" },
227
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
228
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
229
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
230
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
231
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
232
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
233
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
234
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
235
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
236
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
237
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
238
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
239
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
240
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
241
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
242
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
243
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
244
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
245
- },
246
- },
247
- {
248
- LLM_ARCH_BAICHUAN,
249
- {
250
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
251
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
252
- { LLM_TENSOR_OUTPUT, "output" },
253
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
254
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
255
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
256
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
257
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
258
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
259
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
260
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
261
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
262
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
263
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
264
- },
265
- },
266
- {
267
- LLM_ARCH_FALCON,
268
- {
269
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
270
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
271
- { LLM_TENSOR_OUTPUT, "output" },
272
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
273
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
274
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
275
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
276
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
277
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
278
- },
279
- },
280
- {
281
- LLM_ARCH_GROK,
282
- {
283
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
284
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
285
- { LLM_TENSOR_OUTPUT, "output" },
286
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
287
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
288
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
289
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
290
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
291
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
292
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
293
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
294
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
295
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
296
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
297
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
298
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
299
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
300
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
301
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
302
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
303
- },
304
- },
305
- {
306
- LLM_ARCH_GPT2,
307
- {
308
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
309
- { LLM_TENSOR_POS_EMBD, "position_embd" },
310
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
311
- { LLM_TENSOR_OUTPUT, "output" },
312
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
313
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
314
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
315
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
316
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
317
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
318
- },
319
- },
320
- {
321
- LLM_ARCH_GPTJ,
322
- {
323
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
324
- },
325
- },
326
- {
327
- LLM_ARCH_GPTNEOX,
328
- {
329
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
330
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
331
- { LLM_TENSOR_OUTPUT, "output" },
332
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
333
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
334
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
335
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
336
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
337
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
338
- },
339
- },
340
- {
341
- LLM_ARCH_MPT,
342
- {
343
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
344
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
345
- { LLM_TENSOR_OUTPUT, "output"},
346
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
347
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
348
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
349
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
350
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
351
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
352
- { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
353
- { LLM_TENSOR_POS_EMBD, "position_embd" },
354
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
355
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
356
- },
357
- },
358
- {
359
- LLM_ARCH_STARCODER,
360
- {
361
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
362
- { LLM_TENSOR_POS_EMBD, "position_embd" },
363
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
364
- { LLM_TENSOR_OUTPUT, "output" },
365
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
366
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
367
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
368
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
369
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
370
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
371
- },
372
- },
373
- {
374
- LLM_ARCH_REFACT,
375
- {
376
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
377
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
378
- { LLM_TENSOR_OUTPUT, "output" },
379
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
380
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
381
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
382
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
383
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
384
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
385
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
386
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
387
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
388
- },
389
- },
390
- {
391
- LLM_ARCH_BERT,
392
- {
393
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
394
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
395
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
396
- { LLM_TENSOR_POS_EMBD, "position_embd" },
397
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
398
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
399
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
400
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
401
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
402
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
403
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
404
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
405
- { LLM_TENSOR_CLS, "cls" },
406
- { LLM_TENSOR_CLS_OUT, "cls.output" },
407
- },
408
- },
409
- {
410
- LLM_ARCH_NOMIC_BERT,
411
- {
412
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
413
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
414
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
415
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
416
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
417
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
418
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
419
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
420
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
421
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
422
- },
423
- },
424
- {
425
- LLM_ARCH_JINA_BERT_V2,
426
- {
427
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
428
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
429
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
430
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
431
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
432
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
433
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
434
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
435
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
436
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
437
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
438
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
439
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
440
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
441
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
442
- { LLM_TENSOR_CLS, "cls" },
443
- },
444
- },
445
- {
446
- LLM_ARCH_BLOOM,
447
- {
448
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
449
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
450
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
451
- { LLM_TENSOR_OUTPUT, "output" },
452
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
453
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
454
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
455
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
456
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
457
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
458
- },
459
- },
460
- {
461
- LLM_ARCH_STABLELM,
462
- {
463
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
464
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
465
- { LLM_TENSOR_OUTPUT, "output" },
466
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
467
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
468
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
469
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
470
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
471
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
472
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
473
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
474
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
475
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
476
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
477
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
478
- },
479
- },
480
- {
481
- LLM_ARCH_QWEN,
482
- {
483
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
484
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
485
- { LLM_TENSOR_OUTPUT, "output" },
486
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
487
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
488
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
489
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
490
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
491
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
492
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
493
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
494
- },
495
- },
496
- {
497
- LLM_ARCH_QWEN2,
498
- {
499
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
500
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
501
- { LLM_TENSOR_OUTPUT, "output" },
502
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
503
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
504
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
505
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
506
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
507
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
508
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
509
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
510
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
511
- },
512
- },
513
- {
514
- LLM_ARCH_QWEN2VL,
515
- {
516
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
517
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
518
- { LLM_TENSOR_OUTPUT, "output" },
519
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
520
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
521
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
522
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
523
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
524
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
525
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
526
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
527
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
528
- },
529
- },
530
- {
531
- LLM_ARCH_QWEN2MOE,
532
- {
533
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
534
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
535
- { LLM_TENSOR_OUTPUT, "output" },
536
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
537
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
538
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
539
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
540
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
541
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
542
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
543
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
544
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
545
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
546
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
547
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
548
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
549
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
550
- },
551
- },
552
- {
553
- LLM_ARCH_PHI2,
554
- {
555
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
556
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
557
- { LLM_TENSOR_OUTPUT, "output" },
558
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
559
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
560
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
561
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
562
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
563
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
564
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
565
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
566
- },
567
- },
568
- {
569
- LLM_ARCH_PHI3,
570
- {
571
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
572
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
573
- { LLM_TENSOR_OUTPUT, "output" },
574
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
575
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
576
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
577
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
578
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
579
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
580
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
581
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
582
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
583
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
584
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
585
- },
586
- },
587
- {
588
- LLM_ARCH_PLAMO,
589
- {
590
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
591
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
592
- { LLM_TENSOR_OUTPUT, "output" },
593
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
594
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
595
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
596
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
597
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
598
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
599
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
600
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
601
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
602
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
603
- },
604
- },
605
- {
606
- LLM_ARCH_CODESHELL,
607
- {
608
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
609
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
610
- { LLM_TENSOR_OUTPUT, "output" },
611
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
612
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
613
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
614
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
615
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
616
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
617
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
618
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
619
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
620
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
621
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
622
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
623
- },
624
- },
625
- {
626
- LLM_ARCH_ORION,
627
- {
628
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
629
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
630
- { LLM_TENSOR_OUTPUT, "output" },
631
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
632
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
633
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
634
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
635
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
636
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
637
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
638
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
639
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
640
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
641
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
642
- },
643
- },
644
- {
645
- LLM_ARCH_INTERNLM2,
646
- {
647
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
648
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
649
- { LLM_TENSOR_OUTPUT, "output" },
650
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
651
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
652
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
653
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
654
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
655
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
656
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
657
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
658
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
659
- },
660
- },
661
- {
662
- LLM_ARCH_MINICPM,
663
- {
664
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
665
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
666
- { LLM_TENSOR_OUTPUT, "output" },
667
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
668
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
669
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
670
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
671
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
672
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
673
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
674
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
675
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
676
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
677
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
678
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
679
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
680
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
681
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
682
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
683
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
684
- },
685
- },
686
- {
687
- LLM_ARCH_MINICPM3,
688
- {
689
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
690
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
691
- { LLM_TENSOR_OUTPUT, "output" },
692
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
693
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
694
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
695
- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
696
- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
697
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
698
- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
699
- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
700
- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
701
- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
702
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
703
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
704
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
705
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
706
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
707
- },
708
- },
709
- {
710
- LLM_ARCH_GEMMA,
711
- {
712
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
713
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
714
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
715
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
716
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
717
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
718
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
719
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
720
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
721
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
722
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
723
- },
724
- },
725
- {
726
- LLM_ARCH_GEMMA2,
727
- {
728
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
729
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
730
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
731
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
732
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
733
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
734
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
735
- { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
736
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
737
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
738
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
739
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
740
- { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
741
- },
742
- },
743
- {
744
- LLM_ARCH_STARCODER2,
745
- {
746
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
747
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
748
- { LLM_TENSOR_OUTPUT, "output" },
749
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
750
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
751
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
752
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
753
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
754
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
755
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
756
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
757
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
758
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
759
- },
760
- },
761
- {
762
- LLM_ARCH_MAMBA,
763
- {
764
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
765
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
766
- { LLM_TENSOR_OUTPUT, "output" },
767
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
768
- { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
769
- { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
770
- { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
771
- { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
772
- { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
773
- { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
774
- { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
775
- },
776
- },
777
- {
778
- LLM_ARCH_XVERSE,
779
- {
780
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
781
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
782
- { LLM_TENSOR_OUTPUT, "output" },
783
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
784
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
785
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
786
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
787
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
788
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
789
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
790
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
791
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
792
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
793
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
794
- },
795
- },
796
- {
797
- LLM_ARCH_COMMAND_R,
798
- {
799
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
800
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
801
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
802
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
803
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
804
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
805
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
806
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
807
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
808
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
809
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
810
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
811
- },
812
- },
813
- {
814
- LLM_ARCH_COHERE2,
815
- {
816
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
817
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
818
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
819
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
820
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
821
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
822
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
823
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
824
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
825
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
826
- },
827
- },
828
- {
829
- LLM_ARCH_DBRX,
830
- {
831
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
832
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
833
- { LLM_TENSOR_OUTPUT, "output" },
834
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
835
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
836
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
837
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
838
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
839
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
840
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
841
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
842
- },
843
- },
844
- {
845
- LLM_ARCH_OLMO,
846
- {
847
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
848
- { LLM_TENSOR_OUTPUT, "output" },
849
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
850
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
851
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
852
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
853
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
854
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
855
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
856
- },
857
- },
858
- {
859
- LLM_ARCH_OLMO2,
860
- {
861
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
862
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
863
- { LLM_TENSOR_OUTPUT, "output" },
864
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
865
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
866
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
867
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
868
- { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
869
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
870
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
871
- { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
872
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
873
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
874
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
875
- },
876
- },
877
- {
878
- LLM_ARCH_OLMOE,
879
- {
880
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
881
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
882
- { LLM_TENSOR_OUTPUT, "output" },
883
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
884
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
885
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
886
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
887
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
888
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
889
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
890
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
891
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
892
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
893
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
894
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
895
- },
896
- },
897
- {
898
- LLM_ARCH_OPENELM,
899
- {
900
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
901
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
902
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
903
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
904
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
905
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
906
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
907
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
908
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
909
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
910
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
911
- },
912
- },
913
- {
914
- LLM_ARCH_ARCTIC,
915
- {
916
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
917
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
918
- { LLM_TENSOR_OUTPUT, "output" },
919
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
920
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
921
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
922
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
923
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
924
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
925
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
926
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
927
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
928
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
929
- { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
930
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
931
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
932
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
933
- },
934
- },
935
- {
936
- LLM_ARCH_DEEPSEEK,
937
- {
938
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
939
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
940
- { LLM_TENSOR_OUTPUT, "output" },
941
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
942
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
943
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
944
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
945
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
946
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
947
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
948
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
949
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
950
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
951
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
952
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
953
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
954
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
955
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
956
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
957
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
958
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
959
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
960
- },
961
- },
962
- {
963
- LLM_ARCH_DEEPSEEK2,
964
- {
965
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
966
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
967
- { LLM_TENSOR_OUTPUT, "output" },
968
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
969
- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
970
- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
971
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
972
- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
973
- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
974
- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
975
- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
976
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
977
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
978
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
979
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
980
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
981
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
982
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
983
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
984
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
985
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
986
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
987
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
988
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
989
- { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
990
- },
991
- },
992
- {
993
- LLM_ARCH_CHATGLM,
994
- {
995
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
996
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
997
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
998
- { LLM_TENSOR_OUTPUT, "output" },
999
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1000
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1001
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1002
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1003
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1004
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1005
- },
1006
- },
1007
- {
1008
- LLM_ARCH_BITNET,
1009
- {
1010
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1011
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1012
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1013
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1014
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1015
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1016
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1017
- { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1018
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1019
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1020
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1021
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1022
- { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1023
- },
1024
- },
1025
- {
1026
- LLM_ARCH_T5,
1027
- {
1028
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1029
- { LLM_TENSOR_OUTPUT, "output" },
1030
- { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1031
- { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1032
- { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1033
- { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1034
- { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1035
- { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1036
- { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1037
- { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1038
- { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1039
- { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1040
- { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1041
- { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1042
- { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1043
- { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1044
- { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1045
- { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1046
- { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1047
- { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1048
- { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1049
- { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1050
- { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1051
- { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1052
- { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1053
- { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1054
- { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1055
- { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1056
- { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1057
- { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1058
- },
1059
- },
1060
- {
1061
- LLM_ARCH_T5ENCODER,
1062
- {
1063
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1064
- { LLM_TENSOR_OUTPUT, "output" },
1065
- { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1066
- { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1067
- { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1068
- { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1069
- { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1070
- { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1071
- { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1072
- { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1073
- { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1074
- { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1075
- { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1076
- },
1077
- },
1078
- {
1079
- LLM_ARCH_JAIS,
1080
- {
1081
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1082
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1083
- { LLM_TENSOR_OUTPUT, "output" },
1084
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1085
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1086
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1087
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1088
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1089
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1090
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1091
- },
1092
- },
1093
- {
1094
- LLM_ARCH_NEMOTRON,
1095
- {
1096
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1097
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1098
- { LLM_TENSOR_OUTPUT, "output" },
1099
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1100
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1101
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1102
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1103
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1104
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1105
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1106
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1107
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1108
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1109
- },
1110
- },
1111
- {
1112
- LLM_ARCH_EXAONE,
1113
- {
1114
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1115
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1116
- { LLM_TENSOR_OUTPUT, "output" },
1117
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1118
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1119
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1120
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1121
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1122
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1123
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1124
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1125
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1126
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1127
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1128
- },
1129
- },
1130
- {
1131
- LLM_ARCH_RWKV6,
1132
- {
1133
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1134
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1135
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1136
- { LLM_TENSOR_OUTPUT, "output" },
1137
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1138
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1139
- { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1140
- { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1141
- { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1142
- { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1143
- { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1144
- { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1145
- { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1146
- { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1147
- { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1148
- { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1149
- { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1150
- { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1151
- { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1152
- { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1153
- { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1154
- { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1155
- { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1156
- { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1157
- { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1158
- { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1159
- { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1160
- { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1161
- { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1162
- },
1163
- },
1164
- {
1165
- LLM_ARCH_GRANITE,
1166
- {
1167
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1168
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1169
- { LLM_TENSOR_OUTPUT, "output" },
1170
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1171
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1172
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1173
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1174
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1175
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1176
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1177
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1178
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1179
- },
1180
- },
1181
- {
1182
- LLM_ARCH_GRANITE_MOE,
1183
- {
1184
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1185
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1186
- { LLM_TENSOR_OUTPUT, "output" },
1187
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1188
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1189
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1190
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1191
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1192
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1193
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1194
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1195
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1196
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1197
- },
1198
- },
1199
- {
1200
- LLM_ARCH_CHAMELEON,
1201
- {
1202
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1203
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1204
- { LLM_TENSOR_OUTPUT, "output" },
1205
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1206
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1207
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1208
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1209
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1210
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1211
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1212
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1213
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1214
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1215
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1216
- },
1217
- },
1218
- {
1219
- LLM_ARCH_WAVTOKENIZER_DEC,
1220
- {
1221
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1222
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1223
- { LLM_TENSOR_CONV1D, "conv1d" },
1224
- { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1225
- { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1226
- { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1227
- { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1228
- { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1229
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1230
- { LLM_TENSOR_OUTPUT, "output" },
1231
- { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1232
- { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1233
- { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1234
- { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1235
- { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1236
- { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1237
- { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1238
- { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1239
- { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1240
- { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1241
- },
1242
- },
1243
- {
1244
- LLM_ARCH_UNKNOWN,
1245
- {
1246
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1247
- },
1248
- },
1249
- };
1250
-
1251
- static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1252
- {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1253
- {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1254
- {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1255
- {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1256
- {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1257
- {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1258
- {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1259
- {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1260
- {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1261
- {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1262
- {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1263
- {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1264
- {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1265
- {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1266
- {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1267
- {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1268
- {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1269
- {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1270
- {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1271
- {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1272
- {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1273
- {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1274
- {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1275
- {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1276
- {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1277
- {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1278
- {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1279
- {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1280
- {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1281
- {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1282
- {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1283
- {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1284
- {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1285
- {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1286
- {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1287
- {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1288
- {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1289
- {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1290
- {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1291
- {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1292
- {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1293
- {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1294
- {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1295
- {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1296
- {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1297
- {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1298
- {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1299
- {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1300
- {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1301
- {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1302
- {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1303
- {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1304
- {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1305
- {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1306
- {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1307
- {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1308
- {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1309
- {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1310
- {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1311
- {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1312
- {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1313
- {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1314
- {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1315
- {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1316
- {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1317
- {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1318
- {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1319
- {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1320
- {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1321
- {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1322
- {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1323
- {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1324
- {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1325
- {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1326
- {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1327
- {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1328
- {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1329
- {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1330
- {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1331
- {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1332
- {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1333
- {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1334
- {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1335
- {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1336
- {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1337
- {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1338
- {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1339
- {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1340
- {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1341
- {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1342
- {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1343
- {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1344
- {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1345
- {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1346
- {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1347
- {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1348
- {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1349
- {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1350
- {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1351
- {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1352
- {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1353
- {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1354
- {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1355
- {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1356
- {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1357
- {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1358
- {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1359
- {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1360
- {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1361
- {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1362
- {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1363
- {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1364
- {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1365
- {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1366
- {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1367
- {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1368
- {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1369
- {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1370
- {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1371
- {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1372
- {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1373
- // this tensor is loaded for T5, but never used
1374
- {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1375
- {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1376
- {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1377
- {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1378
- {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1379
- {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1380
- {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1381
- {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1382
- {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1383
- {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1384
- {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1385
- {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1386
- {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1387
- {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1388
- {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1389
- {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1390
- {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1391
- };
1392
-
1393
- LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
1394
-
1395
- std::string LLM_KV::operator()(llm_kv kv) const {
1396
- return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1397
- }
1398
-
1399
- std::string LLM_TN_IMPL::str() const {
1400
- if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1401
- return "__missing__";
1402
- }
1403
-
1404
- std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1405
-
1406
- if (suffix != nullptr) {
1407
- name += ".";
1408
- name += suffix;
1409
- }
1410
-
1411
- return name;
1412
- }
1413
-
1414
- const char * llm_arch_name(llm_arch arch) {
1415
- auto it = LLM_ARCH_NAMES.find(arch);
1416
- if (it == LLM_ARCH_NAMES.end()) {
1417
- return "unknown";
1418
- }
1419
- return it->second;
1420
- }
1421
-
1422
- llm_arch llm_arch_from_string(const std::string & name) {
1423
- for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1424
- if (kv.second == name) {
1425
- return kv.first;
1426
- }
1427
- }
1428
-
1429
- return LLM_ARCH_UNKNOWN;
1430
- }
1431
-
1432
- const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1433
- return LLM_TENSOR_INFOS.at(tensor);
1434
- }
1
+ #include "llama-arch.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include <map>
6
+
7
+ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
+ { LLM_ARCH_LLAMA, "llama" },
9
+ { LLM_ARCH_DECI, "deci" },
10
+ { LLM_ARCH_FALCON, "falcon" },
11
+ { LLM_ARCH_GROK, "grok" },
12
+ { LLM_ARCH_GPT2, "gpt2" },
13
+ { LLM_ARCH_GPTJ, "gptj" },
14
+ { LLM_ARCH_GPTNEOX, "gptneox" },
15
+ { LLM_ARCH_MPT, "mpt" },
16
+ { LLM_ARCH_BAICHUAN, "baichuan" },
17
+ { LLM_ARCH_STARCODER, "starcoder" },
18
+ { LLM_ARCH_REFACT, "refact" },
19
+ { LLM_ARCH_BERT, "bert" },
20
+ { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
+ { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
+ { LLM_ARCH_BLOOM, "bloom" },
23
+ { LLM_ARCH_STABLELM, "stablelm" },
24
+ { LLM_ARCH_QWEN, "qwen" },
25
+ { LLM_ARCH_QWEN2, "qwen2" },
26
+ { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
+ { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
+ { LLM_ARCH_PHI2, "phi2" },
29
+ { LLM_ARCH_PHI3, "phi3" },
30
+ { LLM_ARCH_PHIMOE, "phimoe" },
31
+ { LLM_ARCH_PLAMO, "plamo" },
32
+ { LLM_ARCH_CODESHELL, "codeshell" },
33
+ { LLM_ARCH_ORION, "orion" },
34
+ { LLM_ARCH_INTERNLM2, "internlm2" },
35
+ { LLM_ARCH_MINICPM, "minicpm" },
36
+ { LLM_ARCH_MINICPM3, "minicpm3" },
37
+ { LLM_ARCH_GEMMA, "gemma" },
38
+ { LLM_ARCH_GEMMA2, "gemma2" },
39
+ { LLM_ARCH_STARCODER2, "starcoder2" },
40
+ { LLM_ARCH_MAMBA, "mamba" },
41
+ { LLM_ARCH_XVERSE, "xverse" },
42
+ { LLM_ARCH_COMMAND_R, "command-r" },
43
+ { LLM_ARCH_COHERE2, "cohere2" },
44
+ { LLM_ARCH_DBRX, "dbrx" },
45
+ { LLM_ARCH_OLMO, "olmo" },
46
+ { LLM_ARCH_OLMO2, "olmo2" },
47
+ { LLM_ARCH_OLMOE, "olmoe" },
48
+ { LLM_ARCH_OPENELM, "openelm" },
49
+ { LLM_ARCH_ARCTIC, "arctic" },
50
+ { LLM_ARCH_DEEPSEEK, "deepseek" },
51
+ { LLM_ARCH_DEEPSEEK2, "deepseek2" },
52
+ { LLM_ARCH_CHATGLM, "chatglm" },
53
+ { LLM_ARCH_BITNET, "bitnet" },
54
+ { LLM_ARCH_T5, "t5" },
55
+ { LLM_ARCH_T5ENCODER, "t5encoder" },
56
+ { LLM_ARCH_JAIS, "jais" },
57
+ { LLM_ARCH_NEMOTRON, "nemotron" },
58
+ { LLM_ARCH_EXAONE, "exaone" },
59
+ { LLM_ARCH_RWKV6, "rwkv6" },
60
+ { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
61
+ { LLM_ARCH_GRANITE, "granite" },
62
+ { LLM_ARCH_GRANITE_MOE, "granitemoe" },
63
+ { LLM_ARCH_CHAMELEON, "chameleon" },
64
+ { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
65
+ { LLM_ARCH_UNKNOWN, "(unknown)" },
66
+ };
67
+
68
+ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
69
+ { LLM_KV_GENERAL_TYPE, "general.type" },
70
+ { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
71
+ { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
72
+ { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
73
+ { LLM_KV_GENERAL_NAME, "general.name" },
74
+ { LLM_KV_GENERAL_AUTHOR, "general.author" },
75
+ { LLM_KV_GENERAL_VERSION, "general.version" },
76
+ { LLM_KV_GENERAL_URL, "general.url" },
77
+ { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
78
+ { LLM_KV_GENERAL_LICENSE, "general.license" },
79
+ { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
80
+ { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
81
+
82
+ { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
83
+ { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
84
+ { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
85
+ { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
86
+ { LLM_KV_BLOCK_COUNT, "%s.block_count" },
87
+ { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
88
+ { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
89
+ { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
90
+ { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
91
+ { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
92
+ { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
93
+ { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
94
+ { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
95
+ { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
96
+ { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
97
+ { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
98
+ { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
99
+ { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
100
+ { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
101
+ { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
102
+ { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
103
+ { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
104
+ { LLM_KV_SWIN_NORM, "%s.swin_norm" },
105
+ { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
106
+ { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
107
+ { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
108
+ { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
109
+ { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
110
+ { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
111
+
112
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
113
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
114
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
115
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
116
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
117
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
118
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
119
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
120
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
121
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
122
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
123
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
124
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
125
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
126
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
127
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
128
+
129
+ { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
130
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
131
+ { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
132
+ { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
133
+ { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
134
+ { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
135
+ { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
136
+ { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
137
+ { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
138
+ { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
139
+
140
+ { LLM_KV_SPLIT_NO, "split.no" },
141
+ { LLM_KV_SPLIT_COUNT, "split.count" },
142
+ { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
143
+
144
+ { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
145
+ { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
146
+ { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
147
+ { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
148
+ { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
149
+
150
+ { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
151
+
152
+ { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
153
+ { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
154
+
155
+ { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
156
+ { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
157
+
158
+ { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
159
+ { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
160
+ { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
161
+ { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
162
+ { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
163
+ { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
164
+ { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
165
+ { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
166
+ { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
167
+ { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
168
+ { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
169
+ { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
170
+ { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
171
+ { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
172
+ { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
173
+ { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
174
+ { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
175
+ { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
176
+ { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
177
+ { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
178
+ { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
179
+ { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
180
+ { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
181
+ { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
182
+ { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
183
+ { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
184
+ { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
185
+ { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
186
+ { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
187
+ { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
188
+
189
+ { LLM_KV_ADAPTER_TYPE, "adapter.type" },
190
+ { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
191
+
192
+ // deprecated
193
+ { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
194
+ { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
195
+ { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
196
+ };
197
+
198
+ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
199
+ {
200
+ LLM_ARCH_LLAMA,
201
+ {
202
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
203
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
204
+ { LLM_TENSOR_OUTPUT, "output" },
205
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
206
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
207
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
208
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
209
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
210
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
211
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
212
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
213
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
214
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
215
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
216
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
217
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
218
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
219
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
220
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
221
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
222
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
223
+ },
224
+ },
225
+ {
226
+ LLM_ARCH_DECI,
227
+ {
228
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
229
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
230
+ { LLM_TENSOR_OUTPUT, "output" },
231
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
232
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
233
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
234
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
235
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
236
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
237
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
238
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
239
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
240
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
241
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
242
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
243
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
244
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
245
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
246
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
247
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
248
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
249
+ },
250
+ },
251
+ {
252
+ LLM_ARCH_BAICHUAN,
253
+ {
254
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
255
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
256
+ { LLM_TENSOR_OUTPUT, "output" },
257
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
258
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
259
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
260
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
261
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
262
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
263
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
264
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
265
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
266
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
267
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
268
+ },
269
+ },
270
+ {
271
+ LLM_ARCH_FALCON,
272
+ {
273
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
274
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
275
+ { LLM_TENSOR_OUTPUT, "output" },
276
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
277
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
278
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
279
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
280
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
281
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
282
+ },
283
+ },
284
+ {
285
+ LLM_ARCH_GROK,
286
+ {
287
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
288
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
289
+ { LLM_TENSOR_OUTPUT, "output" },
290
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
291
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
292
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
293
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
294
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
295
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
296
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
297
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
298
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
299
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
300
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
301
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
302
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
303
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
304
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
305
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
306
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
307
+ },
308
+ },
309
+ {
310
+ LLM_ARCH_GPT2,
311
+ {
312
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
313
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
314
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
315
+ { LLM_TENSOR_OUTPUT, "output" },
316
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
317
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
318
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
319
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
320
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
321
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
322
+ },
323
+ },
324
+ {
325
+ LLM_ARCH_GPTJ,
326
+ {
327
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
328
+ },
329
+ },
330
+ {
331
+ LLM_ARCH_GPTNEOX,
332
+ {
333
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
334
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
335
+ { LLM_TENSOR_OUTPUT, "output" },
336
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
337
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
338
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
339
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
340
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
341
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
342
+ },
343
+ },
344
+ {
345
+ LLM_ARCH_MPT,
346
+ {
347
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
348
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
349
+ { LLM_TENSOR_OUTPUT, "output"},
350
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
351
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
352
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
353
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
354
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
355
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
356
+ { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
357
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
358
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
359
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
360
+ },
361
+ },
362
+ {
363
+ LLM_ARCH_STARCODER,
364
+ {
365
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
366
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
367
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
368
+ { LLM_TENSOR_OUTPUT, "output" },
369
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
370
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
371
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
372
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
373
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
374
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
375
+ },
376
+ },
377
+ {
378
+ LLM_ARCH_REFACT,
379
+ {
380
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
381
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
382
+ { LLM_TENSOR_OUTPUT, "output" },
383
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
384
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
385
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
386
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
387
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
388
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
389
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
390
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
391
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
392
+ },
393
+ },
394
+ {
395
+ LLM_ARCH_BERT,
396
+ {
397
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
398
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
399
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
400
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
401
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
402
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
403
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
404
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
405
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
406
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
407
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
408
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
409
+ { LLM_TENSOR_CLS, "cls" },
410
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
411
+ },
412
+ },
413
+ {
414
+ LLM_ARCH_NOMIC_BERT,
415
+ {
416
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
417
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
418
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
419
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
420
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
421
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
422
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
423
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
424
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
425
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
426
+ },
427
+ },
428
+ {
429
+ LLM_ARCH_JINA_BERT_V2,
430
+ {
431
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
432
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
433
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
434
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
435
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
436
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
437
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
438
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
439
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
440
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
441
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
442
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
443
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
444
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
445
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
446
+ { LLM_TENSOR_CLS, "cls" },
447
+ },
448
+ },
449
+ {
450
+ LLM_ARCH_BLOOM,
451
+ {
452
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
453
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
454
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
455
+ { LLM_TENSOR_OUTPUT, "output" },
456
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
457
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
458
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
459
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
460
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
461
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
462
+ },
463
+ },
464
+ {
465
+ LLM_ARCH_STABLELM,
466
+ {
467
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
468
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
469
+ { LLM_TENSOR_OUTPUT, "output" },
470
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
471
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
472
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
473
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
474
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
475
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
476
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
477
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
478
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
479
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
480
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
481
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
482
+ },
483
+ },
484
+ {
485
+ LLM_ARCH_QWEN,
486
+ {
487
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
488
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
489
+ { LLM_TENSOR_OUTPUT, "output" },
490
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
491
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
492
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
493
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
494
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
495
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
496
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
497
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
498
+ },
499
+ },
500
+ {
501
+ LLM_ARCH_QWEN2,
502
+ {
503
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
504
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
505
+ { LLM_TENSOR_OUTPUT, "output" },
506
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
507
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
508
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
509
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
510
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
511
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
512
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
513
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
514
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
515
+ },
516
+ },
517
+ {
518
+ LLM_ARCH_QWEN2VL,
519
+ {
520
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
521
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
522
+ { LLM_TENSOR_OUTPUT, "output" },
523
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
524
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
525
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
526
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
527
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
528
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
529
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
530
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
531
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
532
+ },
533
+ },
534
+ {
535
+ LLM_ARCH_QWEN2MOE,
536
+ {
537
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
538
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
539
+ { LLM_TENSOR_OUTPUT, "output" },
540
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
541
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
542
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
543
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
544
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
545
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
546
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
547
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
548
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
549
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
550
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
551
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
552
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
553
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
554
+ },
555
+ },
556
+ {
557
+ LLM_ARCH_PHI2,
558
+ {
559
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
560
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
561
+ { LLM_TENSOR_OUTPUT, "output" },
562
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
563
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
564
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
565
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
566
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
567
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
568
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
569
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
570
+ },
571
+ },
572
+ {
573
+ LLM_ARCH_PHI3,
574
+ {
575
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
576
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
577
+ { LLM_TENSOR_OUTPUT, "output" },
578
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
579
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
580
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
581
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
582
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
583
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
584
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
585
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
586
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
587
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
588
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
589
+ },
590
+ },
591
+ {
592
+ LLM_ARCH_PHIMOE,
593
+ {
594
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
595
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
596
+ { LLM_TENSOR_OUTPUT, "output" },
597
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
598
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
599
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
600
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
601
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
602
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
603
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
604
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
605
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
606
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
607
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
608
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
609
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
610
+ },
611
+ },
612
+ {
613
+ LLM_ARCH_PLAMO,
614
+ {
615
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
616
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
617
+ { LLM_TENSOR_OUTPUT, "output" },
618
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
619
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
620
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
621
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
622
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
623
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
624
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
625
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
626
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
627
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
628
+ },
629
+ },
630
+ {
631
+ LLM_ARCH_CODESHELL,
632
+ {
633
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
634
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
635
+ { LLM_TENSOR_OUTPUT, "output" },
636
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
637
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
638
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
639
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
640
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
641
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
642
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
643
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
644
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
645
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
646
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
647
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
648
+ },
649
+ },
650
+ {
651
+ LLM_ARCH_ORION,
652
+ {
653
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
654
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
655
+ { LLM_TENSOR_OUTPUT, "output" },
656
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
657
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
658
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
659
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
660
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
661
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
662
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
663
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
664
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
665
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
666
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
667
+ },
668
+ },
669
+ {
670
+ LLM_ARCH_INTERNLM2,
671
+ {
672
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
673
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
674
+ { LLM_TENSOR_OUTPUT, "output" },
675
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
676
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
677
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
678
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
679
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
680
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
681
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
682
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
683
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
684
+ },
685
+ },
686
+ {
687
+ LLM_ARCH_MINICPM,
688
+ {
689
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
690
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
691
+ { LLM_TENSOR_OUTPUT, "output" },
692
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
693
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
694
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
695
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
696
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
697
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
698
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
699
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
700
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
701
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
702
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
703
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
704
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
705
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
706
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
707
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
708
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
709
+ },
710
+ },
711
+ {
712
+ LLM_ARCH_MINICPM3,
713
+ {
714
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
715
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
716
+ { LLM_TENSOR_OUTPUT, "output" },
717
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
718
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
719
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
720
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
721
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
722
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
723
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
724
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
725
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
726
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
727
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
728
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
729
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
730
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
731
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
732
+ },
733
+ },
734
+ {
735
+ LLM_ARCH_GEMMA,
736
+ {
737
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
738
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
739
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
740
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
741
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
742
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
743
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
744
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
745
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
746
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
747
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
748
+ },
749
+ },
750
+ {
751
+ LLM_ARCH_GEMMA2,
752
+ {
753
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
754
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
755
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
756
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
757
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
758
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
759
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
760
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
761
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
762
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
763
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
764
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
765
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
766
+ },
767
+ },
768
+ {
769
+ LLM_ARCH_STARCODER2,
770
+ {
771
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
772
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
773
+ { LLM_TENSOR_OUTPUT, "output" },
774
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
775
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
776
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
777
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
778
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
779
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
780
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
781
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
782
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
783
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
784
+ },
785
+ },
786
+ {
787
+ LLM_ARCH_MAMBA,
788
+ {
789
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
790
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
791
+ { LLM_TENSOR_OUTPUT, "output" },
792
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
793
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
794
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
795
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
796
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
797
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
798
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
799
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
800
+ },
801
+ },
802
+ {
803
+ LLM_ARCH_XVERSE,
804
+ {
805
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
806
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
807
+ { LLM_TENSOR_OUTPUT, "output" },
808
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
809
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
810
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
811
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
812
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
813
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
814
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
815
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
816
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
817
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
818
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
819
+ },
820
+ },
821
+ {
822
+ LLM_ARCH_COMMAND_R,
823
+ {
824
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
825
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
826
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
827
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
828
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
829
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
830
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
831
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
832
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
833
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
834
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
835
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
836
+ },
837
+ },
838
+ {
839
+ LLM_ARCH_COHERE2,
840
+ {
841
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
842
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
843
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
844
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
845
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
846
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
847
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
848
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
849
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
850
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
851
+ },
852
+ },
853
+ {
854
+ LLM_ARCH_DBRX,
855
+ {
856
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
857
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
858
+ { LLM_TENSOR_OUTPUT, "output" },
859
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
860
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
861
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
862
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
863
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
864
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
865
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
866
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
867
+ },
868
+ },
869
+ {
870
+ LLM_ARCH_OLMO,
871
+ {
872
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
873
+ { LLM_TENSOR_OUTPUT, "output" },
874
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
875
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
876
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
877
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
878
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
879
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
880
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
881
+ },
882
+ },
883
+ {
884
+ LLM_ARCH_OLMO2,
885
+ {
886
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
887
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
888
+ { LLM_TENSOR_OUTPUT, "output" },
889
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
890
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
891
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
892
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
893
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
894
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
895
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
896
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
897
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
898
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
899
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
900
+ },
901
+ },
902
+ {
903
+ LLM_ARCH_OLMOE,
904
+ {
905
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
906
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
907
+ { LLM_TENSOR_OUTPUT, "output" },
908
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
909
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
910
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
911
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
912
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
913
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
914
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
915
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
916
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
917
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
918
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
919
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
920
+ },
921
+ },
922
+ {
923
+ LLM_ARCH_OPENELM,
924
+ {
925
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
926
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
927
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
928
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
929
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
930
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
931
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
932
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
933
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
934
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
935
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
936
+ },
937
+ },
938
+ {
939
+ LLM_ARCH_ARCTIC,
940
+ {
941
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
942
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
943
+ { LLM_TENSOR_OUTPUT, "output" },
944
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
945
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
946
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
947
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
948
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
949
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
950
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
951
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
952
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
953
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
954
+ { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
955
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
956
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
957
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
958
+ },
959
+ },
960
+ {
961
+ LLM_ARCH_DEEPSEEK,
962
+ {
963
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
964
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
965
+ { LLM_TENSOR_OUTPUT, "output" },
966
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
967
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
968
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
969
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
970
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
971
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
972
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
973
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
974
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
975
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
976
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
977
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
978
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
979
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
980
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
981
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
982
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
983
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
984
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
985
+ },
986
+ },
987
+ {
988
+ LLM_ARCH_DEEPSEEK2,
989
+ {
990
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
991
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
992
+ { LLM_TENSOR_OUTPUT, "output" },
993
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
994
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
995
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
996
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
997
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
998
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
999
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1000
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1001
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1002
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1003
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1004
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1005
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1006
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1007
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1008
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1009
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1010
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1011
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1012
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1013
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1014
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1015
+ },
1016
+ },
1017
+ {
1018
+ LLM_ARCH_CHATGLM,
1019
+ {
1020
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1021
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1022
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1023
+ { LLM_TENSOR_OUTPUT, "output" },
1024
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1025
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1026
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1027
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1028
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1029
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1030
+ },
1031
+ },
1032
+ {
1033
+ LLM_ARCH_BITNET,
1034
+ {
1035
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1036
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1037
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1038
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1039
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1040
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1041
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1042
+ { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1043
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1044
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1045
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1046
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1047
+ { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1048
+ },
1049
+ },
1050
+ {
1051
+ LLM_ARCH_T5,
1052
+ {
1053
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1054
+ { LLM_TENSOR_OUTPUT, "output" },
1055
+ { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1056
+ { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1057
+ { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1058
+ { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1059
+ { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1060
+ { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1061
+ { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1062
+ { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1063
+ { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1064
+ { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1065
+ { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1066
+ { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1067
+ { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1068
+ { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1069
+ { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1070
+ { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1071
+ { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1072
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1073
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1074
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1075
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1076
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1077
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1078
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1079
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1080
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1081
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1082
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1083
+ },
1084
+ },
1085
+ {
1086
+ LLM_ARCH_T5ENCODER,
1087
+ {
1088
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1089
+ { LLM_TENSOR_OUTPUT, "output" },
1090
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1091
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1092
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1093
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1094
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1095
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1096
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1097
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1098
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1099
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1100
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1101
+ },
1102
+ },
1103
+ {
1104
+ LLM_ARCH_JAIS,
1105
+ {
1106
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1107
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1108
+ { LLM_TENSOR_OUTPUT, "output" },
1109
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1110
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1111
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1112
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1113
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1114
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1115
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1116
+ },
1117
+ },
1118
+ {
1119
+ LLM_ARCH_NEMOTRON,
1120
+ {
1121
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1122
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1123
+ { LLM_TENSOR_OUTPUT, "output" },
1124
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1125
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1126
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1127
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1128
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1129
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1130
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1131
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1132
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1133
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1134
+ },
1135
+ },
1136
+ {
1137
+ LLM_ARCH_EXAONE,
1138
+ {
1139
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1140
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1141
+ { LLM_TENSOR_OUTPUT, "output" },
1142
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1143
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1144
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1145
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1146
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1147
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1148
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1149
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1150
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1151
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1152
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1153
+ },
1154
+ },
1155
+ {
1156
+ LLM_ARCH_RWKV6,
1157
+ {
1158
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1159
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1160
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1161
+ { LLM_TENSOR_OUTPUT, "output" },
1162
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1163
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1164
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1165
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1166
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1167
+ { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1168
+ { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1169
+ { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1170
+ { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1171
+ { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1172
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1173
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1174
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1175
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1176
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1177
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1178
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1179
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1180
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1181
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1182
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1183
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1184
+ { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1185
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1186
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1187
+ { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1188
+ },
1189
+ },
1190
+ {
1191
+ LLM_ARCH_RWKV6QWEN2,
1192
+ {
1193
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1194
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1195
+ { LLM_TENSOR_OUTPUT, "output" },
1196
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1197
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1198
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1199
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1200
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1201
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1202
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1203
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1204
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1205
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1206
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1207
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1208
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1209
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1210
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1211
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1212
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1213
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1214
+ },
1215
+ },
1216
+ {
1217
+ LLM_ARCH_GRANITE,
1218
+ {
1219
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1220
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1221
+ { LLM_TENSOR_OUTPUT, "output" },
1222
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1223
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1224
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1225
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1226
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1227
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1228
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1229
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1230
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1231
+ },
1232
+ },
1233
+ {
1234
+ LLM_ARCH_GRANITE_MOE,
1235
+ {
1236
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1237
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1238
+ { LLM_TENSOR_OUTPUT, "output" },
1239
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1240
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1241
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1242
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1243
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1244
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1245
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1246
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1247
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1248
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1249
+ },
1250
+ },
1251
+ {
1252
+ LLM_ARCH_CHAMELEON,
1253
+ {
1254
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1255
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1256
+ { LLM_TENSOR_OUTPUT, "output" },
1257
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1258
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1259
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1260
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1261
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1262
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1263
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1264
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1265
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1266
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1267
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1268
+ },
1269
+ },
1270
+ {
1271
+ LLM_ARCH_WAVTOKENIZER_DEC,
1272
+ {
1273
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1274
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1275
+ { LLM_TENSOR_CONV1D, "conv1d" },
1276
+ { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1277
+ { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1278
+ { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1279
+ { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1280
+ { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1281
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1282
+ { LLM_TENSOR_OUTPUT, "output" },
1283
+ { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1284
+ { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1285
+ { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1286
+ { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1287
+ { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1288
+ { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1289
+ { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1290
+ { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1291
+ { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1292
+ { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1293
+ },
1294
+ },
1295
+ {
1296
+ LLM_ARCH_UNKNOWN,
1297
+ {
1298
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1299
+ },
1300
+ },
1301
+ };
1302
+
1303
+ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1304
+ {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1305
+ {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1306
+ {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1307
+ {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1308
+ {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1309
+ {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1310
+ {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1311
+ {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1312
+ {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1313
+ {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1314
+ {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1315
+ {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1316
+ {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1317
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1318
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1319
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1320
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1321
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1322
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1323
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1324
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1325
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1326
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1327
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1328
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1329
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1330
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1331
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1332
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1333
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1334
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1335
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1336
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1337
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1338
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1339
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1340
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1341
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1342
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1343
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1344
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1345
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1346
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1347
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1348
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1349
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1350
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1351
+ {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1352
+ {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1353
+ {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1354
+ {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1355
+ {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1356
+ {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1357
+ {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1358
+ {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1359
+ {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1360
+ {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1361
+ {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1362
+ {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1363
+ {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1364
+ {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1365
+ {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1366
+ {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1367
+ {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1368
+ {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1369
+ {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1370
+ {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1371
+ {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1372
+ {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1373
+ {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1374
+ {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1375
+ {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1376
+ {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1377
+ {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1378
+ {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1379
+ {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1380
+ {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1381
+ {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1382
+ {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1383
+ {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1384
+ {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1385
+ {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1386
+ {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1387
+ {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1388
+ {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1389
+ {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1390
+ {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1391
+ {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1392
+ {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1393
+ {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1394
+ {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1395
+ {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1396
+ {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1397
+ {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1398
+ {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1399
+ {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1400
+ {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1401
+ {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1402
+ {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1403
+ {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1404
+ {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1405
+ {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1406
+ {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1407
+ {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1408
+ {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1409
+ {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1410
+ {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1411
+ {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1412
+ {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1413
+ {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1414
+ {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1415
+ {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1416
+ {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1417
+ {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1418
+ {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1419
+ {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1420
+ {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1421
+ {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1422
+ {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1423
+ {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1424
+ {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1425
+ {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1426
+ // this tensor is loaded for T5, but never used
1427
+ {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1428
+ {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1429
+ {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1430
+ {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1431
+ {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1432
+ {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1433
+ {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1434
+ {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1435
+ {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1436
+ {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1437
+ {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1438
+ {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1439
+ {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1440
+ {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1441
+ {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1442
+ {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1443
+ {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1444
+ };
1445
+
1446
+ LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
1447
+
1448
+ std::string LLM_KV::operator()(llm_kv kv) const {
1449
+ return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1450
+ }
1451
+
1452
+ std::string LLM_TN_IMPL::str() const {
1453
+ if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1454
+ return "__missing__";
1455
+ }
1456
+
1457
+ std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1458
+
1459
+ if (suffix != nullptr) {
1460
+ name += ".";
1461
+ name += suffix;
1462
+ }
1463
+
1464
+ return name;
1465
+ }
1466
+
1467
+ const char * llm_arch_name(llm_arch arch) {
1468
+ auto it = LLM_ARCH_NAMES.find(arch);
1469
+ if (it == LLM_ARCH_NAMES.end()) {
1470
+ return "unknown";
1471
+ }
1472
+ return it->second;
1473
+ }
1474
+
1475
+ llm_arch llm_arch_from_string(const std::string & name) {
1476
+ for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1477
+ if (kv.second == name) {
1478
+ return kv.first;
1479
+ }
1480
+ }
1481
+
1482
+ return LLM_ARCH_UNKNOWN;
1483
+ }
1484
+
1485
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1486
+ return LLM_TENSOR_INFOS.at(tensor);
1487
+ }