cui-llama.rn 1.3.5 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +22 -1
  2. package/android/src/main/CMakeLists.txt +25 -20
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +31 -9
  4. package/android/src/main/java/com/rnllama/RNLlama.java +98 -0
  5. package/android/src/main/jni-utils.h +94 -0
  6. package/android/src/main/jni.cpp +108 -37
  7. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +15 -0
  8. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +15 -0
  9. package/cpp/common.cpp +1982 -1965
  10. package/cpp/common.h +665 -657
  11. package/cpp/ggml-backend-reg.cpp +5 -0
  12. package/cpp/ggml-backend.cpp +5 -2
  13. package/cpp/ggml-cpp.h +1 -0
  14. package/cpp/ggml-cpu-aarch64.cpp +6 -1
  15. package/cpp/ggml-cpu-quants.c +5 -1
  16. package/cpp/ggml-cpu.c +14122 -14122
  17. package/cpp/ggml-cpu.cpp +627 -627
  18. package/cpp/ggml-impl.h +11 -16
  19. package/cpp/ggml-metal-impl.h +288 -0
  20. package/cpp/ggml-metal.m +2 -2
  21. package/cpp/ggml-opt.cpp +854 -0
  22. package/cpp/ggml-opt.h +216 -0
  23. package/cpp/ggml.c +0 -1276
  24. package/cpp/ggml.h +0 -140
  25. package/cpp/gguf.cpp +1325 -0
  26. package/cpp/gguf.h +202 -0
  27. package/cpp/llama-adapter.cpp +346 -0
  28. package/cpp/llama-adapter.h +73 -0
  29. package/cpp/llama-arch.cpp +1434 -0
  30. package/cpp/llama-arch.h +395 -0
  31. package/cpp/llama-batch.cpp +368 -0
  32. package/cpp/llama-batch.h +88 -0
  33. package/cpp/llama-chat.cpp +567 -0
  34. package/cpp/llama-chat.h +51 -0
  35. package/cpp/llama-context.cpp +1771 -0
  36. package/cpp/llama-context.h +128 -0
  37. package/cpp/llama-cparams.cpp +1 -0
  38. package/cpp/llama-cparams.h +37 -0
  39. package/cpp/llama-cpp.h +30 -0
  40. package/cpp/llama-grammar.cpp +1 -0
  41. package/cpp/llama-grammar.h +3 -1
  42. package/cpp/llama-hparams.cpp +71 -0
  43. package/cpp/llama-hparams.h +140 -0
  44. package/cpp/llama-impl.cpp +167 -0
  45. package/cpp/llama-impl.h +16 -136
  46. package/cpp/llama-kv-cache.cpp +718 -0
  47. package/cpp/llama-kv-cache.h +218 -0
  48. package/cpp/llama-mmap.cpp +589 -0
  49. package/cpp/llama-mmap.h +67 -0
  50. package/cpp/llama-model-loader.cpp +1011 -0
  51. package/cpp/llama-model-loader.h +158 -0
  52. package/cpp/llama-model.cpp +2202 -0
  53. package/cpp/llama-model.h +391 -0
  54. package/cpp/llama-sampling.cpp +117 -4
  55. package/cpp/llama-vocab.cpp +21 -28
  56. package/cpp/llama-vocab.h +13 -1
  57. package/cpp/llama.cpp +12547 -23528
  58. package/cpp/llama.h +31 -6
  59. package/cpp/rn-llama.hpp +90 -87
  60. package/cpp/sgemm.cpp +776 -70
  61. package/cpp/sgemm.h +14 -14
  62. package/cpp/unicode.cpp +6 -0
  63. package/ios/RNLlama.mm +47 -0
  64. package/ios/RNLlamaContext.h +3 -1
  65. package/ios/RNLlamaContext.mm +71 -14
  66. package/jest/mock.js +15 -3
  67. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  68. package/lib/commonjs/index.js +33 -37
  69. package/lib/commonjs/index.js.map +1 -1
  70. package/lib/module/NativeRNLlama.js.map +1 -1
  71. package/lib/module/index.js +31 -35
  72. package/lib/module/index.js.map +1 -1
  73. package/lib/typescript/NativeRNLlama.d.ts +26 -6
  74. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  75. package/lib/typescript/index.d.ts +21 -36
  76. package/lib/typescript/index.d.ts.map +1 -1
  77. package/llama-rn.podspec +4 -18
  78. package/package.json +2 -3
  79. package/src/NativeRNLlama.ts +32 -13
  80. package/src/index.ts +52 -47
@@ -0,0 +1,1434 @@
1
+ #include "llama-arch.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include <map>
6
+
7
+ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
+ { LLM_ARCH_LLAMA, "llama" },
9
+ { LLM_ARCH_DECI, "deci" },
10
+ { LLM_ARCH_FALCON, "falcon" },
11
+ { LLM_ARCH_GROK, "grok" },
12
+ { LLM_ARCH_GPT2, "gpt2" },
13
+ { LLM_ARCH_GPTJ, "gptj" },
14
+ { LLM_ARCH_GPTNEOX, "gptneox" },
15
+ { LLM_ARCH_MPT, "mpt" },
16
+ { LLM_ARCH_BAICHUAN, "baichuan" },
17
+ { LLM_ARCH_STARCODER, "starcoder" },
18
+ { LLM_ARCH_REFACT, "refact" },
19
+ { LLM_ARCH_BERT, "bert" },
20
+ { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
+ { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
+ { LLM_ARCH_BLOOM, "bloom" },
23
+ { LLM_ARCH_STABLELM, "stablelm" },
24
+ { LLM_ARCH_QWEN, "qwen" },
25
+ { LLM_ARCH_QWEN2, "qwen2" },
26
+ { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
+ { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
+ { LLM_ARCH_PHI2, "phi2" },
29
+ { LLM_ARCH_PHI3, "phi3" },
30
+ { LLM_ARCH_PLAMO, "plamo" },
31
+ { LLM_ARCH_CODESHELL, "codeshell" },
32
+ { LLM_ARCH_ORION, "orion" },
33
+ { LLM_ARCH_INTERNLM2, "internlm2" },
34
+ { LLM_ARCH_MINICPM, "minicpm" },
35
+ { LLM_ARCH_MINICPM3, "minicpm3" },
36
+ { LLM_ARCH_GEMMA, "gemma" },
37
+ { LLM_ARCH_GEMMA2, "gemma2" },
38
+ { LLM_ARCH_STARCODER2, "starcoder2" },
39
+ { LLM_ARCH_MAMBA, "mamba" },
40
+ { LLM_ARCH_XVERSE, "xverse" },
41
+ { LLM_ARCH_COMMAND_R, "command-r" },
42
+ { LLM_ARCH_COHERE2, "cohere2" },
43
+ { LLM_ARCH_DBRX, "dbrx" },
44
+ { LLM_ARCH_OLMO, "olmo" },
45
+ { LLM_ARCH_OLMO2, "olmo2" },
46
+ { LLM_ARCH_OLMOE, "olmoe" },
47
+ { LLM_ARCH_OPENELM, "openelm" },
48
+ { LLM_ARCH_ARCTIC, "arctic" },
49
+ { LLM_ARCH_DEEPSEEK, "deepseek" },
50
+ { LLM_ARCH_DEEPSEEK2, "deepseek2" },
51
+ { LLM_ARCH_CHATGLM, "chatglm" },
52
+ { LLM_ARCH_BITNET, "bitnet" },
53
+ { LLM_ARCH_T5, "t5" },
54
+ { LLM_ARCH_T5ENCODER, "t5encoder" },
55
+ { LLM_ARCH_JAIS, "jais" },
56
+ { LLM_ARCH_NEMOTRON, "nemotron" },
57
+ { LLM_ARCH_EXAONE, "exaone" },
58
+ { LLM_ARCH_RWKV6, "rwkv6" },
59
+ { LLM_ARCH_GRANITE, "granite" },
60
+ { LLM_ARCH_GRANITE_MOE, "granitemoe" },
61
+ { LLM_ARCH_CHAMELEON, "chameleon" },
62
+ { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
63
+ { LLM_ARCH_UNKNOWN, "(unknown)" },
64
+ };
65
+
66
+ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
67
+ { LLM_KV_GENERAL_TYPE, "general.type" },
68
+ { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
69
+ { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
70
+ { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
71
+ { LLM_KV_GENERAL_NAME, "general.name" },
72
+ { LLM_KV_GENERAL_AUTHOR, "general.author" },
73
+ { LLM_KV_GENERAL_VERSION, "general.version" },
74
+ { LLM_KV_GENERAL_URL, "general.url" },
75
+ { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
76
+ { LLM_KV_GENERAL_LICENSE, "general.license" },
77
+ { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
78
+ { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
79
+
80
+ { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
81
+ { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
82
+ { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
83
+ { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
84
+ { LLM_KV_BLOCK_COUNT, "%s.block_count" },
85
+ { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
86
+ { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
87
+ { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
88
+ { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
89
+ { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
90
+ { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
91
+ { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
92
+ { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
93
+ { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
94
+ { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
95
+ { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
96
+ { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
97
+ { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
98
+ { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
99
+ { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
100
+ { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
101
+ { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
102
+ { LLM_KV_SWIN_NORM, "%s.swin_norm" },
103
+ { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
104
+ { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
105
+ { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
106
+ { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
107
+ { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
108
+
109
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
110
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
111
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
112
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
113
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
114
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
115
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
116
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
117
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
118
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
119
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
120
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
121
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
122
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
123
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
124
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
125
+
126
+ { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
127
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
128
+ { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
129
+ { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
130
+ { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
131
+ { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
132
+ { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
133
+ { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
134
+ { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
135
+ { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
136
+
137
+ { LLM_KV_SPLIT_NO, "split.no" },
138
+ { LLM_KV_SPLIT_COUNT, "split.count" },
139
+ { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
140
+
141
+ { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
142
+ { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
143
+ { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
144
+ { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
145
+ { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
146
+
147
+ { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
148
+
149
+ { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
150
+ { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
151
+
152
+ { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
153
+ { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
154
+
155
+ { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
156
+ { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
157
+ { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
158
+ { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
159
+ { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
160
+ { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
161
+ { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
162
+ { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
163
+ { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
164
+ { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
165
+ { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
166
+ { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
167
+ { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
168
+ { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
169
+ { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
170
+ { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
171
+ { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
172
+ { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
173
+ { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
174
+ { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
175
+ { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
176
+ { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
177
+ { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
178
+ { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
179
+ { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
180
+ { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
181
+ { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
182
+ { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
183
+ { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
184
+
185
+ { LLM_KV_ADAPTER_TYPE, "adapter.type" },
186
+ { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
187
+
188
+ // deprecated
189
+ { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
190
+ { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
191
+ { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
192
+ };
193
+
194
+ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
195
+ {
196
+ LLM_ARCH_LLAMA,
197
+ {
198
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
199
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
200
+ { LLM_TENSOR_OUTPUT, "output" },
201
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
202
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
203
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
204
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
205
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
206
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
207
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
208
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
209
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
210
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
211
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
212
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
213
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
214
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
215
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
216
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
217
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
218
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
219
+ },
220
+ },
221
+ {
222
+ LLM_ARCH_DECI,
223
+ {
224
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
225
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
226
+ { LLM_TENSOR_OUTPUT, "output" },
227
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
228
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
229
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
230
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
231
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
232
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
233
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
234
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
235
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
236
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
237
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
238
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
239
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
240
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
241
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
242
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
243
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
244
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
245
+ },
246
+ },
247
+ {
248
+ LLM_ARCH_BAICHUAN,
249
+ {
250
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
251
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
252
+ { LLM_TENSOR_OUTPUT, "output" },
253
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
254
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
255
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
256
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
257
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
258
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
259
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
260
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
261
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
262
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
263
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
264
+ },
265
+ },
266
+ {
267
+ LLM_ARCH_FALCON,
268
+ {
269
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
270
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
271
+ { LLM_TENSOR_OUTPUT, "output" },
272
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
273
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
274
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
275
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
276
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
277
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
278
+ },
279
+ },
280
+ {
281
+ LLM_ARCH_GROK,
282
+ {
283
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
284
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
285
+ { LLM_TENSOR_OUTPUT, "output" },
286
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
287
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
288
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
289
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
290
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
291
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
292
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
293
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
294
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
295
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
296
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
297
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
298
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
299
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
300
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
301
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
302
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
303
+ },
304
+ },
305
+ {
306
+ LLM_ARCH_GPT2,
307
+ {
308
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
309
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
310
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
311
+ { LLM_TENSOR_OUTPUT, "output" },
312
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
313
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
314
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
315
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
316
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
317
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
318
+ },
319
+ },
320
+ {
321
+ LLM_ARCH_GPTJ,
322
+ {
323
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
324
+ },
325
+ },
326
+ {
327
+ LLM_ARCH_GPTNEOX,
328
+ {
329
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
330
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
331
+ { LLM_TENSOR_OUTPUT, "output" },
332
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
333
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
334
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
335
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
336
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
337
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
338
+ },
339
+ },
340
+ {
341
+ LLM_ARCH_MPT,
342
+ {
343
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
344
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
345
+ { LLM_TENSOR_OUTPUT, "output"},
346
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
347
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
348
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
349
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
350
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
351
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
352
+ { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
353
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
354
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
355
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
356
+ },
357
+ },
358
+ {
359
+ LLM_ARCH_STARCODER,
360
+ {
361
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
362
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
363
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
364
+ { LLM_TENSOR_OUTPUT, "output" },
365
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
366
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
367
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
368
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
369
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
370
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
371
+ },
372
+ },
373
+ {
374
+ LLM_ARCH_REFACT,
375
+ {
376
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
377
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
378
+ { LLM_TENSOR_OUTPUT, "output" },
379
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
380
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
381
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
382
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
383
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
384
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
385
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
386
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
387
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
388
+ },
389
+ },
390
+ {
391
+ LLM_ARCH_BERT,
392
+ {
393
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
394
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
395
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
396
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
397
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
398
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
399
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
400
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
401
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
402
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
403
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
404
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
405
+ { LLM_TENSOR_CLS, "cls" },
406
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
407
+ },
408
+ },
409
+ {
410
+ LLM_ARCH_NOMIC_BERT,
411
+ {
412
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
413
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
414
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
415
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
416
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
417
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
418
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
419
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
420
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
421
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
422
+ },
423
+ },
424
+ {
425
+ LLM_ARCH_JINA_BERT_V2,
426
+ {
427
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
428
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
429
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
430
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
431
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
432
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
433
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
434
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
435
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
436
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
437
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
438
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
439
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
440
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
441
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
442
+ { LLM_TENSOR_CLS, "cls" },
443
+ },
444
+ },
445
+ {
446
+ LLM_ARCH_BLOOM,
447
+ {
448
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
449
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
450
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
451
+ { LLM_TENSOR_OUTPUT, "output" },
452
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
453
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
454
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
455
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
456
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
457
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
458
+ },
459
+ },
460
+ {
461
+ LLM_ARCH_STABLELM,
462
+ {
463
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
464
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
465
+ { LLM_TENSOR_OUTPUT, "output" },
466
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
467
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
468
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
469
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
470
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
471
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
472
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
473
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
474
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
475
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
476
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
477
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
478
+ },
479
+ },
480
+ {
481
+ LLM_ARCH_QWEN,
482
+ {
483
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
484
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
485
+ { LLM_TENSOR_OUTPUT, "output" },
486
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
487
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
488
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
489
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
490
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
491
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
492
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
493
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
494
+ },
495
+ },
496
+ {
497
+ LLM_ARCH_QWEN2,
498
+ {
499
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
500
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
501
+ { LLM_TENSOR_OUTPUT, "output" },
502
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
503
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
504
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
505
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
506
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
507
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
508
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
509
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
510
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
511
+ },
512
+ },
513
+ {
514
+ LLM_ARCH_QWEN2VL,
515
+ {
516
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
517
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
518
+ { LLM_TENSOR_OUTPUT, "output" },
519
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
520
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
521
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
522
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
523
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
524
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
525
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
526
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
527
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
528
+ },
529
+ },
530
+ {
531
+ LLM_ARCH_QWEN2MOE,
532
+ {
533
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
534
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
535
+ { LLM_TENSOR_OUTPUT, "output" },
536
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
537
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
538
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
539
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
540
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
541
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
542
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
543
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
544
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
545
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
546
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
547
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
548
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
549
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
550
+ },
551
+ },
552
+ {
553
+ LLM_ARCH_PHI2,
554
+ {
555
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
556
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
557
+ { LLM_TENSOR_OUTPUT, "output" },
558
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
559
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
560
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
561
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
562
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
563
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
564
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
565
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
566
+ },
567
+ },
568
+ {
569
+ LLM_ARCH_PHI3,
570
+ {
571
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
572
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
573
+ { LLM_TENSOR_OUTPUT, "output" },
574
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
575
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
576
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
577
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
578
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
579
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
580
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
581
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
582
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
583
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
584
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
585
+ },
586
+ },
587
+ {
588
+ LLM_ARCH_PLAMO,
589
+ {
590
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
591
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
592
+ { LLM_TENSOR_OUTPUT, "output" },
593
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
594
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
595
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
596
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
597
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
598
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
599
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
600
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
601
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
602
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
603
+ },
604
+ },
605
+ {
606
+ LLM_ARCH_CODESHELL,
607
+ {
608
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
609
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
610
+ { LLM_TENSOR_OUTPUT, "output" },
611
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
612
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
613
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
614
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
615
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
616
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
617
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
618
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
619
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
620
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
621
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
622
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
623
+ },
624
+ },
625
+ {
626
+ LLM_ARCH_ORION,
627
+ {
628
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
629
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
630
+ { LLM_TENSOR_OUTPUT, "output" },
631
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
632
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
633
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
634
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
635
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
636
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
637
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
638
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
639
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
640
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
641
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
642
+ },
643
+ },
644
+ {
645
+ LLM_ARCH_INTERNLM2,
646
+ {
647
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
648
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
649
+ { LLM_TENSOR_OUTPUT, "output" },
650
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
651
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
652
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
653
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
654
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
655
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
656
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
657
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
658
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
659
+ },
660
+ },
661
+ {
662
+ LLM_ARCH_MINICPM,
663
+ {
664
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
665
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
666
+ { LLM_TENSOR_OUTPUT, "output" },
667
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
668
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
669
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
670
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
671
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
672
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
673
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
674
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
675
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
676
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
677
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
678
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
679
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
680
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
681
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
682
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
683
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
684
+ },
685
+ },
686
+ {
687
+ LLM_ARCH_MINICPM3,
688
+ {
689
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
690
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
691
+ { LLM_TENSOR_OUTPUT, "output" },
692
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
693
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
694
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
695
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
696
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
697
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
698
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
699
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
700
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
701
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
702
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
703
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
704
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
705
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
706
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
707
+ },
708
+ },
709
+ {
710
+ LLM_ARCH_GEMMA,
711
+ {
712
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
713
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
714
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
715
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
716
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
717
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
718
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
719
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
720
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
721
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
722
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
723
+ },
724
+ },
725
+ {
726
+ LLM_ARCH_GEMMA2,
727
+ {
728
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
729
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
730
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
731
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
732
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
733
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
734
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
735
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
736
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
737
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
738
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
739
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
740
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
741
+ },
742
+ },
743
+ {
744
+ LLM_ARCH_STARCODER2,
745
+ {
746
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
747
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
748
+ { LLM_TENSOR_OUTPUT, "output" },
749
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
750
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
751
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
752
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
753
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
754
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
755
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
756
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
757
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
758
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
759
+ },
760
+ },
761
+ {
762
+ LLM_ARCH_MAMBA,
763
+ {
764
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
765
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
766
+ { LLM_TENSOR_OUTPUT, "output" },
767
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
768
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
769
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
770
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
771
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
772
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
773
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
774
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
775
+ },
776
+ },
777
+ {
778
+ LLM_ARCH_XVERSE,
779
+ {
780
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
781
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
782
+ { LLM_TENSOR_OUTPUT, "output" },
783
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
784
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
785
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
786
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
787
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
788
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
789
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
790
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
791
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
792
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
793
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
794
+ },
795
+ },
796
+ {
797
+ LLM_ARCH_COMMAND_R,
798
+ {
799
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
800
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
801
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
802
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
803
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
804
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
805
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
806
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
807
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
808
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
809
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
810
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
811
+ },
812
+ },
813
+ {
814
+ LLM_ARCH_COHERE2,
815
+ {
816
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
817
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
818
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
819
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
820
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
821
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
822
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
823
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
824
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
825
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
826
+ },
827
+ },
828
+ {
829
+ LLM_ARCH_DBRX,
830
+ {
831
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
832
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
833
+ { LLM_TENSOR_OUTPUT, "output" },
834
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
835
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
836
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
837
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
838
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
839
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
840
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
841
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
842
+ },
843
+ },
844
+ {
845
+ LLM_ARCH_OLMO,
846
+ {
847
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
848
+ { LLM_TENSOR_OUTPUT, "output" },
849
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
850
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
851
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
852
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
853
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
854
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
855
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
856
+ },
857
+ },
858
+ {
859
+ LLM_ARCH_OLMO2,
860
+ {
861
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
862
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
863
+ { LLM_TENSOR_OUTPUT, "output" },
864
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
865
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
866
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
867
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
868
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
869
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
870
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
871
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
872
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
873
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
874
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
875
+ },
876
+ },
877
+ {
878
+ LLM_ARCH_OLMOE,
879
+ {
880
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
881
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
882
+ { LLM_TENSOR_OUTPUT, "output" },
883
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
884
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
885
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
886
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
887
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
888
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
889
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
890
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
891
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
892
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
893
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
894
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
895
+ },
896
+ },
897
+ {
898
+ LLM_ARCH_OPENELM,
899
+ {
900
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
901
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
902
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
903
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
904
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
905
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
906
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
907
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
908
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
909
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
910
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
911
+ },
912
+ },
913
+ {
914
+ LLM_ARCH_ARCTIC,
915
+ {
916
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
917
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
918
+ { LLM_TENSOR_OUTPUT, "output" },
919
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
920
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
921
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
922
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
923
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
924
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
925
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
926
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
927
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
928
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
929
+ { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
930
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
931
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
932
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
933
+ },
934
+ },
935
+ {
936
+ LLM_ARCH_DEEPSEEK,
937
+ {
938
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
939
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
940
+ { LLM_TENSOR_OUTPUT, "output" },
941
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
942
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
943
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
944
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
945
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
946
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
947
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
948
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
949
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
950
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
951
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
952
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
953
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
954
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
955
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
956
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
957
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
958
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
959
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
960
+ },
961
+ },
962
+ {
963
+ LLM_ARCH_DEEPSEEK2,
964
+ {
965
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
966
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
967
+ { LLM_TENSOR_OUTPUT, "output" },
968
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
969
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
970
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
971
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
972
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
973
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
974
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
975
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
976
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
977
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
978
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
979
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
980
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
981
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
982
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
983
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
984
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
985
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
986
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
987
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
988
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
989
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
990
+ },
991
+ },
992
+ {
993
+ LLM_ARCH_CHATGLM,
994
+ {
995
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
996
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
997
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
998
+ { LLM_TENSOR_OUTPUT, "output" },
999
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1000
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1001
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1002
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1003
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1004
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1005
+ },
1006
+ },
1007
+ {
1008
+ LLM_ARCH_BITNET,
1009
+ {
1010
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1011
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1012
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1013
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1014
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1015
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1016
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1017
+ { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1018
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1019
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1020
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1021
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1022
+ { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1023
+ },
1024
+ },
1025
+ {
1026
+ LLM_ARCH_T5,
1027
+ {
1028
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1029
+ { LLM_TENSOR_OUTPUT, "output" },
1030
+ { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1031
+ { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1032
+ { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1033
+ { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1034
+ { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1035
+ { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1036
+ { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1037
+ { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1038
+ { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1039
+ { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1040
+ { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1041
+ { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1042
+ { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1043
+ { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1044
+ { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1045
+ { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1046
+ { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1047
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1048
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1049
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1050
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1051
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1052
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1053
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1054
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1055
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1056
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1057
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1058
+ },
1059
+ },
1060
+ {
1061
+ LLM_ARCH_T5ENCODER,
1062
+ {
1063
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1064
+ { LLM_TENSOR_OUTPUT, "output" },
1065
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1066
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1067
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1068
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1069
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1070
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1071
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1072
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1073
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1074
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1075
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1076
+ },
1077
+ },
1078
+ {
1079
+ LLM_ARCH_JAIS,
1080
+ {
1081
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1082
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1083
+ { LLM_TENSOR_OUTPUT, "output" },
1084
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1085
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1086
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1087
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1088
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1089
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1090
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1091
+ },
1092
+ },
1093
+ {
1094
+ LLM_ARCH_NEMOTRON,
1095
+ {
1096
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1097
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1098
+ { LLM_TENSOR_OUTPUT, "output" },
1099
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1100
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1101
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1102
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1103
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1104
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1105
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1106
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1107
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1108
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1109
+ },
1110
+ },
1111
+ {
1112
+ LLM_ARCH_EXAONE,
1113
+ {
1114
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1115
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1116
+ { LLM_TENSOR_OUTPUT, "output" },
1117
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1118
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1119
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1120
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1121
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1122
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1123
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1124
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1125
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1126
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1127
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1128
+ },
1129
+ },
1130
+ {
1131
+ LLM_ARCH_RWKV6,
1132
+ {
1133
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1134
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1135
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1136
+ { LLM_TENSOR_OUTPUT, "output" },
1137
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1138
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1139
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1140
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1141
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1142
+ { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1143
+ { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1144
+ { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1145
+ { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1146
+ { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1147
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1148
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1149
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1150
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1151
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1152
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1153
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1154
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1155
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1156
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1157
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1158
+ { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1159
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1160
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1161
+ { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1162
+ },
1163
+ },
1164
+ {
1165
+ LLM_ARCH_GRANITE,
1166
+ {
1167
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1168
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1169
+ { LLM_TENSOR_OUTPUT, "output" },
1170
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1171
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1172
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1173
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1174
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1175
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1176
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1177
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1178
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1179
+ },
1180
+ },
1181
+ {
1182
+ LLM_ARCH_GRANITE_MOE,
1183
+ {
1184
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1185
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1186
+ { LLM_TENSOR_OUTPUT, "output" },
1187
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1188
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1189
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1190
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1191
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1192
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1193
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1194
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1195
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1196
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1197
+ },
1198
+ },
1199
+ {
1200
+ LLM_ARCH_CHAMELEON,
1201
+ {
1202
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1203
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1204
+ { LLM_TENSOR_OUTPUT, "output" },
1205
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1206
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1207
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1208
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1209
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1210
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1211
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1212
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1213
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1214
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1215
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1216
+ },
1217
+ },
1218
+ {
1219
+ LLM_ARCH_WAVTOKENIZER_DEC,
1220
+ {
1221
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1222
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1223
+ { LLM_TENSOR_CONV1D, "conv1d" },
1224
+ { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1225
+ { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1226
+ { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1227
+ { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1228
+ { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1229
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1230
+ { LLM_TENSOR_OUTPUT, "output" },
1231
+ { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1232
+ { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1233
+ { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1234
+ { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1235
+ { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1236
+ { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1237
+ { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1238
+ { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1239
+ { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1240
+ { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1241
+ },
1242
+ },
1243
+ {
1244
+ LLM_ARCH_UNKNOWN,
1245
+ {
1246
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1247
+ },
1248
+ },
1249
+ };
1250
+
1251
+ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1252
+ {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1253
+ {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1254
+ {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1255
+ {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1256
+ {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1257
+ {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1258
+ {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1259
+ {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1260
+ {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1261
+ {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1262
+ {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1263
+ {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1264
+ {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1265
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1266
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1267
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1268
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1269
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1270
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1271
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1272
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1273
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1274
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1275
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1276
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1277
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1278
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1279
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1280
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1281
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1282
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1283
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1284
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1285
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1286
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1287
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1288
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1289
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1290
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1291
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1292
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1293
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1294
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1295
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1296
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1297
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1298
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1299
+ {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1300
+ {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1301
+ {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1302
+ {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1303
+ {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1304
+ {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1305
+ {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1306
+ {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1307
+ {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1308
+ {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1309
+ {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1310
+ {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1311
+ {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1312
+ {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1313
+ {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1314
+ {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1315
+ {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1316
+ {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1317
+ {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1318
+ {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1319
+ {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1320
+ {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1321
+ {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1322
+ {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1323
+ {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1324
+ {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1325
+ {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1326
+ {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1327
+ {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1328
+ {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1329
+ {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1330
+ {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1331
+ {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1332
+ {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1333
+ {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1334
+ {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1335
+ {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1336
+ {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1337
+ {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1338
+ {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1339
+ {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1340
+ {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1341
+ {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1342
+ {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1343
+ {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1344
+ {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1345
+ {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1346
+ {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1347
+ {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1348
+ {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1349
+ {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1350
+ {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1351
+ {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1352
+ {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1353
+ {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1354
+ {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1355
+ {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1356
+ {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1357
+ {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1358
+ {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1359
+ {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1360
+ {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1361
+ {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1362
+ {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1363
+ {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1364
+ {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1365
+ {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1366
+ {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1367
+ {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1368
+ {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1369
+ {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1370
+ {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1371
+ {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1372
+ {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1373
+ // this tensor is loaded for T5, but never used
1374
+ {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1375
+ {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1376
+ {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1377
+ {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1378
+ {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1379
+ {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1380
+ {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1381
+ {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1382
+ {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1383
+ {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1384
+ {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1385
+ {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1386
+ {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1387
+ {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1388
+ {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1389
+ {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1390
+ {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1391
+ };
1392
+
1393
+ LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
1394
+
1395
+ std::string LLM_KV::operator()(llm_kv kv) const {
1396
+ return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1397
+ }
1398
+
1399
+ std::string LLM_TN_IMPL::str() const {
1400
+ if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1401
+ return "__missing__";
1402
+ }
1403
+
1404
+ std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1405
+
1406
+ if (suffix != nullptr) {
1407
+ name += ".";
1408
+ name += suffix;
1409
+ }
1410
+
1411
+ return name;
1412
+ }
1413
+
1414
+ const char * llm_arch_name(llm_arch arch) {
1415
+ auto it = LLM_ARCH_NAMES.find(arch);
1416
+ if (it == LLM_ARCH_NAMES.end()) {
1417
+ return "unknown";
1418
+ }
1419
+ return it->second;
1420
+ }
1421
+
1422
+ llm_arch llm_arch_from_string(const std::string & name) {
1423
+ for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1424
+ if (kv.second == name) {
1425
+ return kv.first;
1426
+ }
1427
+ }
1428
+
1429
+ return LLM_ARCH_UNKNOWN;
1430
+ }
1431
+
1432
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1433
+ return LLM_TENSOR_INFOS.at(tensor);
1434
+ }