cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
@@ -1,1487 +1,1513 @@
1
- #include "llama-arch.h"
2
-
3
- #include "llama-impl.h"
4
-
5
- #include <map>
6
-
7
- static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
- { LLM_ARCH_LLAMA, "llama" },
9
- { LLM_ARCH_DECI, "deci" },
10
- { LLM_ARCH_FALCON, "falcon" },
11
- { LLM_ARCH_GROK, "grok" },
12
- { LLM_ARCH_GPT2, "gpt2" },
13
- { LLM_ARCH_GPTJ, "gptj" },
14
- { LLM_ARCH_GPTNEOX, "gptneox" },
15
- { LLM_ARCH_MPT, "mpt" },
16
- { LLM_ARCH_BAICHUAN, "baichuan" },
17
- { LLM_ARCH_STARCODER, "starcoder" },
18
- { LLM_ARCH_REFACT, "refact" },
19
- { LLM_ARCH_BERT, "bert" },
20
- { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
- { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
- { LLM_ARCH_BLOOM, "bloom" },
23
- { LLM_ARCH_STABLELM, "stablelm" },
24
- { LLM_ARCH_QWEN, "qwen" },
25
- { LLM_ARCH_QWEN2, "qwen2" },
26
- { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
- { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
- { LLM_ARCH_PHI2, "phi2" },
29
- { LLM_ARCH_PHI3, "phi3" },
30
- { LLM_ARCH_PHIMOE, "phimoe" },
31
- { LLM_ARCH_PLAMO, "plamo" },
32
- { LLM_ARCH_CODESHELL, "codeshell" },
33
- { LLM_ARCH_ORION, "orion" },
34
- { LLM_ARCH_INTERNLM2, "internlm2" },
35
- { LLM_ARCH_MINICPM, "minicpm" },
36
- { LLM_ARCH_MINICPM3, "minicpm3" },
37
- { LLM_ARCH_GEMMA, "gemma" },
38
- { LLM_ARCH_GEMMA2, "gemma2" },
39
- { LLM_ARCH_STARCODER2, "starcoder2" },
40
- { LLM_ARCH_MAMBA, "mamba" },
41
- { LLM_ARCH_XVERSE, "xverse" },
42
- { LLM_ARCH_COMMAND_R, "command-r" },
43
- { LLM_ARCH_COHERE2, "cohere2" },
44
- { LLM_ARCH_DBRX, "dbrx" },
45
- { LLM_ARCH_OLMO, "olmo" },
46
- { LLM_ARCH_OLMO2, "olmo2" },
47
- { LLM_ARCH_OLMOE, "olmoe" },
48
- { LLM_ARCH_OPENELM, "openelm" },
49
- { LLM_ARCH_ARCTIC, "arctic" },
50
- { LLM_ARCH_DEEPSEEK, "deepseek" },
51
- { LLM_ARCH_DEEPSEEK2, "deepseek2" },
52
- { LLM_ARCH_CHATGLM, "chatglm" },
53
- { LLM_ARCH_BITNET, "bitnet" },
54
- { LLM_ARCH_T5, "t5" },
55
- { LLM_ARCH_T5ENCODER, "t5encoder" },
56
- { LLM_ARCH_JAIS, "jais" },
57
- { LLM_ARCH_NEMOTRON, "nemotron" },
58
- { LLM_ARCH_EXAONE, "exaone" },
59
- { LLM_ARCH_RWKV6, "rwkv6" },
60
- { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
61
- { LLM_ARCH_GRANITE, "granite" },
62
- { LLM_ARCH_GRANITE_MOE, "granitemoe" },
63
- { LLM_ARCH_CHAMELEON, "chameleon" },
64
- { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
65
- { LLM_ARCH_UNKNOWN, "(unknown)" },
66
- };
67
-
68
- static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
69
- { LLM_KV_GENERAL_TYPE, "general.type" },
70
- { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
71
- { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
72
- { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
73
- { LLM_KV_GENERAL_NAME, "general.name" },
74
- { LLM_KV_GENERAL_AUTHOR, "general.author" },
75
- { LLM_KV_GENERAL_VERSION, "general.version" },
76
- { LLM_KV_GENERAL_URL, "general.url" },
77
- { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
78
- { LLM_KV_GENERAL_LICENSE, "general.license" },
79
- { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
80
- { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
81
-
82
- { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
83
- { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
84
- { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
85
- { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
86
- { LLM_KV_BLOCK_COUNT, "%s.block_count" },
87
- { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
88
- { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
89
- { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
90
- { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
91
- { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
92
- { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
93
- { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
94
- { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
95
- { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
96
- { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
97
- { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
98
- { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
99
- { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
100
- { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
101
- { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
102
- { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
103
- { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
104
- { LLM_KV_SWIN_NORM, "%s.swin_norm" },
105
- { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
106
- { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
107
- { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
108
- { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
109
- { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
110
- { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
111
-
112
- { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
113
- { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
114
- { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
115
- { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
116
- { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
117
- { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
118
- { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
119
- { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
120
- { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
121
- { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
122
- { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
123
- { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
124
- { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
125
- { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
126
- { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
127
- { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
128
-
129
- { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
130
- { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
131
- { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
132
- { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
133
- { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
134
- { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
135
- { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
136
- { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
137
- { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
138
- { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
139
-
140
- { LLM_KV_SPLIT_NO, "split.no" },
141
- { LLM_KV_SPLIT_COUNT, "split.count" },
142
- { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
143
-
144
- { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
145
- { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
146
- { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
147
- { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
148
- { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
149
-
150
- { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
151
-
152
- { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
153
- { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
154
-
155
- { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
156
- { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
157
-
158
- { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
159
- { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
160
- { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
161
- { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
162
- { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
163
- { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
164
- { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
165
- { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
166
- { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
167
- { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
168
- { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
169
- { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
170
- { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
171
- { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
172
- { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
173
- { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
174
- { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
175
- { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
176
- { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
177
- { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
178
- { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
179
- { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
180
- { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
181
- { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
182
- { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
183
- { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
184
- { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
185
- { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
186
- { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
187
- { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
188
-
189
- { LLM_KV_ADAPTER_TYPE, "adapter.type" },
190
- { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
191
-
192
- // deprecated
193
- { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
194
- { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
195
- { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
196
- };
197
-
198
- static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
199
- {
200
- LLM_ARCH_LLAMA,
201
- {
202
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
203
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
204
- { LLM_TENSOR_OUTPUT, "output" },
205
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
206
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
207
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
208
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
209
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
210
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
211
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
212
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
213
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
214
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
215
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
216
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
217
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
218
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
219
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
220
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
221
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
222
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
223
- },
224
- },
225
- {
226
- LLM_ARCH_DECI,
227
- {
228
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
229
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
230
- { LLM_TENSOR_OUTPUT, "output" },
231
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
232
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
233
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
234
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
235
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
236
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
237
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
238
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
239
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
240
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
241
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
242
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
243
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
244
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
245
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
246
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
247
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
248
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
249
- },
250
- },
251
- {
252
- LLM_ARCH_BAICHUAN,
253
- {
254
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
255
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
256
- { LLM_TENSOR_OUTPUT, "output" },
257
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
258
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
259
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
260
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
261
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
262
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
263
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
264
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
265
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
266
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
267
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
268
- },
269
- },
270
- {
271
- LLM_ARCH_FALCON,
272
- {
273
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
274
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
275
- { LLM_TENSOR_OUTPUT, "output" },
276
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
277
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
278
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
279
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
280
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
281
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
282
- },
283
- },
284
- {
285
- LLM_ARCH_GROK,
286
- {
287
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
288
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
289
- { LLM_TENSOR_OUTPUT, "output" },
290
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
291
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
292
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
293
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
294
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
295
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
296
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
297
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
298
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
299
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
300
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
301
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
302
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
303
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
304
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
305
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
306
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
307
- },
308
- },
309
- {
310
- LLM_ARCH_GPT2,
311
- {
312
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
313
- { LLM_TENSOR_POS_EMBD, "position_embd" },
314
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
315
- { LLM_TENSOR_OUTPUT, "output" },
316
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
317
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
318
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
319
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
320
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
321
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
322
- },
323
- },
324
- {
325
- LLM_ARCH_GPTJ,
326
- {
327
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
328
- },
329
- },
330
- {
331
- LLM_ARCH_GPTNEOX,
332
- {
333
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
334
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
335
- { LLM_TENSOR_OUTPUT, "output" },
336
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
337
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
338
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
339
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
340
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
341
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
342
- },
343
- },
344
- {
345
- LLM_ARCH_MPT,
346
- {
347
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
348
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
349
- { LLM_TENSOR_OUTPUT, "output"},
350
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
351
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
352
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
353
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
354
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
355
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
356
- { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
357
- { LLM_TENSOR_POS_EMBD, "position_embd" },
358
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
359
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
360
- },
361
- },
362
- {
363
- LLM_ARCH_STARCODER,
364
- {
365
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
366
- { LLM_TENSOR_POS_EMBD, "position_embd" },
367
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
368
- { LLM_TENSOR_OUTPUT, "output" },
369
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
370
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
371
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
372
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
373
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
374
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
375
- },
376
- },
377
- {
378
- LLM_ARCH_REFACT,
379
- {
380
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
381
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
382
- { LLM_TENSOR_OUTPUT, "output" },
383
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
384
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
385
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
386
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
387
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
388
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
389
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
390
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
391
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
392
- },
393
- },
394
- {
395
- LLM_ARCH_BERT,
396
- {
397
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
398
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
399
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
400
- { LLM_TENSOR_POS_EMBD, "position_embd" },
401
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
402
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
403
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
404
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
405
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
406
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
407
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
408
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
409
- { LLM_TENSOR_CLS, "cls" },
410
- { LLM_TENSOR_CLS_OUT, "cls.output" },
411
- },
412
- },
413
- {
414
- LLM_ARCH_NOMIC_BERT,
415
- {
416
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
417
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
418
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
419
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
420
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
421
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
422
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
423
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
424
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
425
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
426
- },
427
- },
428
- {
429
- LLM_ARCH_JINA_BERT_V2,
430
- {
431
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
432
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
433
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
434
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
435
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
436
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
437
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
438
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
439
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
440
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
441
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
442
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
443
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
444
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
445
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
446
- { LLM_TENSOR_CLS, "cls" },
447
- },
448
- },
449
- {
450
- LLM_ARCH_BLOOM,
451
- {
452
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
453
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
454
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
455
- { LLM_TENSOR_OUTPUT, "output" },
456
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
457
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
458
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
459
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
460
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
461
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
462
- },
463
- },
464
- {
465
- LLM_ARCH_STABLELM,
466
- {
467
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
468
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
469
- { LLM_TENSOR_OUTPUT, "output" },
470
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
471
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
472
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
473
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
474
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
475
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
476
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
477
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
478
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
479
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
480
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
481
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
482
- },
483
- },
484
- {
485
- LLM_ARCH_QWEN,
486
- {
487
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
488
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
489
- { LLM_TENSOR_OUTPUT, "output" },
490
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
491
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
492
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
493
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
494
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
495
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
496
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
497
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
498
- },
499
- },
500
- {
501
- LLM_ARCH_QWEN2,
502
- {
503
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
504
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
505
- { LLM_TENSOR_OUTPUT, "output" },
506
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
507
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
508
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
509
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
510
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
511
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
512
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
513
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
514
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
515
- },
516
- },
517
- {
518
- LLM_ARCH_QWEN2VL,
519
- {
520
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
521
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
522
- { LLM_TENSOR_OUTPUT, "output" },
523
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
524
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
525
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
526
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
527
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
528
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
529
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
530
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
531
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
532
- },
533
- },
534
- {
535
- LLM_ARCH_QWEN2MOE,
536
- {
537
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
538
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
539
- { LLM_TENSOR_OUTPUT, "output" },
540
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
541
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
542
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
543
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
544
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
545
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
546
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
547
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
548
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
549
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
550
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
551
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
552
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
553
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
554
- },
555
- },
556
- {
557
- LLM_ARCH_PHI2,
558
- {
559
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
560
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
561
- { LLM_TENSOR_OUTPUT, "output" },
562
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
563
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
564
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
565
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
566
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
567
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
568
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
569
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
570
- },
571
- },
572
- {
573
- LLM_ARCH_PHI3,
574
- {
575
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
576
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
577
- { LLM_TENSOR_OUTPUT, "output" },
578
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
579
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
580
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
581
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
582
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
583
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
584
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
585
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
586
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
587
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
588
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
589
- },
590
- },
591
- {
592
- LLM_ARCH_PHIMOE,
593
- {
594
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
595
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
596
- { LLM_TENSOR_OUTPUT, "output" },
597
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
598
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
599
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
600
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
601
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
602
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
603
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
604
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
605
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
606
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
607
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
608
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
609
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
610
- },
611
- },
612
- {
613
- LLM_ARCH_PLAMO,
614
- {
615
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
616
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
617
- { LLM_TENSOR_OUTPUT, "output" },
618
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
619
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
620
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
621
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
622
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
623
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
624
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
625
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
626
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
627
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
628
- },
629
- },
630
- {
631
- LLM_ARCH_CODESHELL,
632
- {
633
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
634
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
635
- { LLM_TENSOR_OUTPUT, "output" },
636
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
637
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
638
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
639
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
640
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
641
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
642
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
643
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
644
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
645
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
646
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
647
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
648
- },
649
- },
650
- {
651
- LLM_ARCH_ORION,
652
- {
653
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
654
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
655
- { LLM_TENSOR_OUTPUT, "output" },
656
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
657
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
658
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
659
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
660
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
661
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
662
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
663
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
664
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
665
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
666
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
667
- },
668
- },
669
- {
670
- LLM_ARCH_INTERNLM2,
671
- {
672
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
673
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
674
- { LLM_TENSOR_OUTPUT, "output" },
675
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
676
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
677
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
678
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
679
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
680
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
681
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
682
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
683
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
684
- },
685
- },
686
- {
687
- LLM_ARCH_MINICPM,
688
- {
689
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
690
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
691
- { LLM_TENSOR_OUTPUT, "output" },
692
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
693
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
694
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
695
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
696
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
697
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
698
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
699
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
700
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
701
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
702
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
703
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
704
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
705
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
706
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
707
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
708
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
709
- },
710
- },
711
- {
712
- LLM_ARCH_MINICPM3,
713
- {
714
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
715
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
716
- { LLM_TENSOR_OUTPUT, "output" },
717
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
718
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
719
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
720
- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
721
- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
722
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
723
- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
724
- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
725
- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
726
- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
727
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
728
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
729
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
730
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
731
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
732
- },
733
- },
734
- {
735
- LLM_ARCH_GEMMA,
736
- {
737
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
738
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
739
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
740
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
741
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
742
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
743
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
744
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
745
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
746
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
747
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
748
- },
749
- },
750
- {
751
- LLM_ARCH_GEMMA2,
752
- {
753
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
754
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
755
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
756
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
757
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
758
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
759
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
760
- { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
761
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
762
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
763
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
764
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
765
- { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
766
- },
767
- },
768
- {
769
- LLM_ARCH_STARCODER2,
770
- {
771
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
772
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
773
- { LLM_TENSOR_OUTPUT, "output" },
774
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
775
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
776
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
777
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
778
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
779
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
780
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
781
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
782
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
783
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
784
- },
785
- },
786
- {
787
- LLM_ARCH_MAMBA,
788
- {
789
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
790
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
791
- { LLM_TENSOR_OUTPUT, "output" },
792
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
793
- { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
794
- { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
795
- { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
796
- { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
797
- { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
798
- { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
799
- { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
800
- },
801
- },
802
- {
803
- LLM_ARCH_XVERSE,
804
- {
805
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
806
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
807
- { LLM_TENSOR_OUTPUT, "output" },
808
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
809
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
810
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
811
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
812
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
813
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
814
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
815
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
816
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
817
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
818
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
819
- },
820
- },
821
- {
822
- LLM_ARCH_COMMAND_R,
823
- {
824
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
825
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
826
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
827
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
828
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
829
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
830
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
831
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
832
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
833
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
834
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
835
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
836
- },
837
- },
838
- {
839
- LLM_ARCH_COHERE2,
840
- {
841
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
842
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
843
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
844
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
845
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
846
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
847
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
848
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
849
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
850
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
851
- },
852
- },
853
- {
854
- LLM_ARCH_DBRX,
855
- {
856
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
857
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
858
- { LLM_TENSOR_OUTPUT, "output" },
859
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
860
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
861
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
862
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
863
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
864
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
865
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
866
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
867
- },
868
- },
869
- {
870
- LLM_ARCH_OLMO,
871
- {
872
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
873
- { LLM_TENSOR_OUTPUT, "output" },
874
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
875
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
876
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
877
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
878
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
879
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
880
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
881
- },
882
- },
883
- {
884
- LLM_ARCH_OLMO2,
885
- {
886
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
887
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
888
- { LLM_TENSOR_OUTPUT, "output" },
889
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
890
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
891
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
892
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
893
- { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
894
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
895
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
896
- { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
897
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
898
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
899
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
900
- },
901
- },
902
- {
903
- LLM_ARCH_OLMOE,
904
- {
905
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
906
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
907
- { LLM_TENSOR_OUTPUT, "output" },
908
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
909
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
910
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
911
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
912
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
913
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
914
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
915
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
916
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
917
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
918
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
919
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
920
- },
921
- },
922
- {
923
- LLM_ARCH_OPENELM,
924
- {
925
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
926
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
927
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
928
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
929
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
930
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
931
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
932
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
933
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
934
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
935
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
936
- },
937
- },
938
- {
939
- LLM_ARCH_ARCTIC,
940
- {
941
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
942
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
943
- { LLM_TENSOR_OUTPUT, "output" },
944
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
945
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
946
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
947
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
948
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
949
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
950
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
951
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
952
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
953
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
954
- { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
955
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
956
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
957
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
958
- },
959
- },
960
- {
961
- LLM_ARCH_DEEPSEEK,
962
- {
963
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
964
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
965
- { LLM_TENSOR_OUTPUT, "output" },
966
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
967
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
968
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
969
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
970
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
971
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
972
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
973
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
974
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
975
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
976
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
977
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
978
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
979
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
980
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
981
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
982
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
983
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
984
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
985
- },
986
- },
987
- {
988
- LLM_ARCH_DEEPSEEK2,
989
- {
990
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
991
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
992
- { LLM_TENSOR_OUTPUT, "output" },
993
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
994
- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
995
- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
996
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
997
- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
998
- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
999
- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1000
- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1001
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1002
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1003
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1004
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1005
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1006
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1007
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1008
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1009
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1010
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1011
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1012
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1013
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1014
- { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1015
- },
1016
- },
1017
- {
1018
- LLM_ARCH_CHATGLM,
1019
- {
1020
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1021
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1022
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1023
- { LLM_TENSOR_OUTPUT, "output" },
1024
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1025
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1026
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1027
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1028
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1029
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1030
- },
1031
- },
1032
- {
1033
- LLM_ARCH_BITNET,
1034
- {
1035
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1036
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1037
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1038
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1039
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1040
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1041
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1042
- { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1043
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1044
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1045
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1046
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1047
- { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1048
- },
1049
- },
1050
- {
1051
- LLM_ARCH_T5,
1052
- {
1053
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1054
- { LLM_TENSOR_OUTPUT, "output" },
1055
- { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1056
- { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1057
- { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1058
- { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1059
- { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1060
- { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1061
- { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1062
- { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1063
- { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1064
- { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1065
- { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1066
- { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1067
- { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1068
- { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1069
- { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1070
- { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1071
- { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1072
- { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1073
- { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1074
- { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1075
- { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1076
- { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1077
- { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1078
- { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1079
- { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1080
- { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1081
- { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1082
- { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1083
- },
1084
- },
1085
- {
1086
- LLM_ARCH_T5ENCODER,
1087
- {
1088
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1089
- { LLM_TENSOR_OUTPUT, "output" },
1090
- { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1091
- { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1092
- { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1093
- { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1094
- { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1095
- { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1096
- { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1097
- { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1098
- { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1099
- { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1100
- { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1101
- },
1102
- },
1103
- {
1104
- LLM_ARCH_JAIS,
1105
- {
1106
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1107
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1108
- { LLM_TENSOR_OUTPUT, "output" },
1109
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1110
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1111
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1112
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1113
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1114
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1115
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1116
- },
1117
- },
1118
- {
1119
- LLM_ARCH_NEMOTRON,
1120
- {
1121
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1122
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1123
- { LLM_TENSOR_OUTPUT, "output" },
1124
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1125
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1126
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1127
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1128
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1129
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1130
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1131
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1132
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1133
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1134
- },
1135
- },
1136
- {
1137
- LLM_ARCH_EXAONE,
1138
- {
1139
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1140
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1141
- { LLM_TENSOR_OUTPUT, "output" },
1142
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1143
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1144
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1145
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1146
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1147
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1148
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1149
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1150
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1151
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1152
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1153
- },
1154
- },
1155
- {
1156
- LLM_ARCH_RWKV6,
1157
- {
1158
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1159
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1160
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1161
- { LLM_TENSOR_OUTPUT, "output" },
1162
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1163
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1164
- { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1165
- { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1166
- { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1167
- { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1168
- { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1169
- { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1170
- { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1171
- { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1172
- { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1173
- { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1174
- { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1175
- { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1176
- { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1177
- { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1178
- { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1179
- { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1180
- { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1181
- { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1182
- { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1183
- { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1184
- { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1185
- { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1186
- { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1187
- { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1188
- },
1189
- },
1190
- {
1191
- LLM_ARCH_RWKV6QWEN2,
1192
- {
1193
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1194
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1195
- { LLM_TENSOR_OUTPUT, "output" },
1196
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1197
- { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1198
- { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1199
- { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1200
- { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1201
- { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1202
- { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1203
- { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1204
- { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1205
- { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1206
- { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1207
- { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1208
- { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1209
- { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1210
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1211
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1212
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1213
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1214
- },
1215
- },
1216
- {
1217
- LLM_ARCH_GRANITE,
1218
- {
1219
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1220
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1221
- { LLM_TENSOR_OUTPUT, "output" },
1222
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1223
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1224
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1225
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1226
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1227
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1228
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1229
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1230
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1231
- },
1232
- },
1233
- {
1234
- LLM_ARCH_GRANITE_MOE,
1235
- {
1236
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1237
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1238
- { LLM_TENSOR_OUTPUT, "output" },
1239
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1240
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1241
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1242
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1243
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1244
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1245
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1246
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1247
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1248
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1249
- },
1250
- },
1251
- {
1252
- LLM_ARCH_CHAMELEON,
1253
- {
1254
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1255
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1256
- { LLM_TENSOR_OUTPUT, "output" },
1257
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1258
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1259
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1260
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1261
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1262
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1263
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1264
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1265
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1266
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1267
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1268
- },
1269
- },
1270
- {
1271
- LLM_ARCH_WAVTOKENIZER_DEC,
1272
- {
1273
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1274
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1275
- { LLM_TENSOR_CONV1D, "conv1d" },
1276
- { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1277
- { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1278
- { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1279
- { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1280
- { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1281
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1282
- { LLM_TENSOR_OUTPUT, "output" },
1283
- { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1284
- { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1285
- { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1286
- { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1287
- { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1288
- { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1289
- { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1290
- { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1291
- { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1292
- { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1293
- },
1294
- },
1295
- {
1296
- LLM_ARCH_UNKNOWN,
1297
- {
1298
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1299
- },
1300
- },
1301
- };
1302
-
1303
- static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1304
- {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1305
- {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1306
- {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1307
- {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1308
- {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1309
- {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1310
- {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1311
- {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1312
- {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1313
- {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1314
- {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1315
- {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1316
- {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1317
- {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1318
- {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1319
- {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1320
- {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1321
- {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1322
- {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1323
- {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1324
- {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1325
- {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1326
- {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1327
- {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1328
- {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1329
- {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1330
- {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1331
- {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1332
- {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1333
- {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1334
- {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1335
- {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1336
- {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1337
- {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1338
- {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1339
- {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1340
- {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1341
- {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1342
- {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1343
- {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1344
- {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1345
- {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1346
- {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1347
- {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1348
- {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1349
- {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1350
- {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1351
- {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1352
- {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1353
- {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1354
- {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1355
- {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1356
- {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1357
- {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1358
- {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1359
- {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1360
- {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1361
- {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1362
- {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1363
- {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1364
- {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1365
- {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1366
- {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1367
- {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1368
- {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1369
- {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1370
- {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1371
- {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1372
- {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1373
- {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1374
- {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1375
- {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1376
- {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1377
- {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1378
- {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1379
- {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1380
- {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1381
- {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1382
- {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1383
- {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1384
- {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1385
- {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1386
- {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1387
- {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1388
- {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1389
- {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1390
- {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1391
- {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1392
- {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1393
- {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1394
- {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1395
- {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1396
- {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1397
- {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1398
- {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1399
- {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1400
- {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1401
- {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1402
- {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1403
- {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1404
- {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1405
- {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1406
- {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1407
- {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1408
- {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1409
- {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1410
- {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1411
- {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1412
- {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1413
- {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1414
- {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1415
- {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1416
- {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1417
- {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1418
- {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1419
- {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1420
- {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1421
- {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1422
- {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1423
- {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1424
- {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1425
- {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1426
- // this tensor is loaded for T5, but never used
1427
- {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1428
- {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1429
- {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1430
- {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1431
- {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1432
- {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1433
- {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1434
- {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1435
- {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1436
- {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1437
- {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1438
- {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1439
- {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1440
- {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1441
- {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1442
- {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1443
- {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1444
- };
1445
-
1446
- LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
1447
-
1448
- std::string LLM_KV::operator()(llm_kv kv) const {
1449
- return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1450
- }
1451
-
1452
- std::string LLM_TN_IMPL::str() const {
1453
- if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1454
- return "__missing__";
1455
- }
1456
-
1457
- std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1458
-
1459
- if (suffix != nullptr) {
1460
- name += ".";
1461
- name += suffix;
1462
- }
1463
-
1464
- return name;
1465
- }
1466
-
1467
- const char * llm_arch_name(llm_arch arch) {
1468
- auto it = LLM_ARCH_NAMES.find(arch);
1469
- if (it == LLM_ARCH_NAMES.end()) {
1470
- return "unknown";
1471
- }
1472
- return it->second;
1473
- }
1474
-
1475
- llm_arch llm_arch_from_string(const std::string & name) {
1476
- for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1477
- if (kv.second == name) {
1478
- return kv.first;
1479
- }
1480
- }
1481
-
1482
- return LLM_ARCH_UNKNOWN;
1483
- }
1484
-
1485
- const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1486
- return LLM_TENSOR_INFOS.at(tensor);
1487
- }
1
+ #include "llama-arch.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include <map>
6
+
7
+ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
+ { LLM_ARCH_LLAMA, "llama" },
9
+ { LLM_ARCH_DECI, "deci" },
10
+ { LLM_ARCH_FALCON, "falcon" },
11
+ { LLM_ARCH_GROK, "grok" },
12
+ { LLM_ARCH_GPT2, "gpt2" },
13
+ { LLM_ARCH_GPTJ, "gptj" },
14
+ { LLM_ARCH_GPTNEOX, "gptneox" },
15
+ { LLM_ARCH_MPT, "mpt" },
16
+ { LLM_ARCH_BAICHUAN, "baichuan" },
17
+ { LLM_ARCH_STARCODER, "starcoder" },
18
+ { LLM_ARCH_REFACT, "refact" },
19
+ { LLM_ARCH_BERT, "bert" },
20
+ { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
+ { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
+ { LLM_ARCH_BLOOM, "bloom" },
23
+ { LLM_ARCH_STABLELM, "stablelm" },
24
+ { LLM_ARCH_QWEN, "qwen" },
25
+ { LLM_ARCH_QWEN2, "qwen2" },
26
+ { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
+ { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
+ { LLM_ARCH_PHI2, "phi2" },
29
+ { LLM_ARCH_PHI3, "phi3" },
30
+ { LLM_ARCH_PHIMOE, "phimoe" },
31
+ { LLM_ARCH_PLAMO, "plamo" },
32
+ { LLM_ARCH_CODESHELL, "codeshell" },
33
+ { LLM_ARCH_ORION, "orion" },
34
+ { LLM_ARCH_INTERNLM2, "internlm2" },
35
+ { LLM_ARCH_MINICPM, "minicpm" },
36
+ { LLM_ARCH_MINICPM3, "minicpm3" },
37
+ { LLM_ARCH_GEMMA, "gemma" },
38
+ { LLM_ARCH_GEMMA2, "gemma2" },
39
+ { LLM_ARCH_GEMMA3, "gemma3" },
40
+ { LLM_ARCH_STARCODER2, "starcoder2" },
41
+ { LLM_ARCH_MAMBA, "mamba" },
42
+ { LLM_ARCH_XVERSE, "xverse" },
43
+ { LLM_ARCH_COMMAND_R, "command-r" },
44
+ { LLM_ARCH_COHERE2, "cohere2" },
45
+ { LLM_ARCH_DBRX, "dbrx" },
46
+ { LLM_ARCH_OLMO, "olmo" },
47
+ { LLM_ARCH_OLMO2, "olmo2" },
48
+ { LLM_ARCH_OLMOE, "olmoe" },
49
+ { LLM_ARCH_OPENELM, "openelm" },
50
+ { LLM_ARCH_ARCTIC, "arctic" },
51
+ { LLM_ARCH_DEEPSEEK, "deepseek" },
52
+ { LLM_ARCH_DEEPSEEK2, "deepseek2" },
53
+ { LLM_ARCH_CHATGLM, "chatglm" },
54
+ { LLM_ARCH_BITNET, "bitnet" },
55
+ { LLM_ARCH_T5, "t5" },
56
+ { LLM_ARCH_T5ENCODER, "t5encoder" },
57
+ { LLM_ARCH_JAIS, "jais" },
58
+ { LLM_ARCH_NEMOTRON, "nemotron" },
59
+ { LLM_ARCH_EXAONE, "exaone" },
60
+ { LLM_ARCH_RWKV6, "rwkv6" },
61
+ { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
62
+ { LLM_ARCH_GRANITE, "granite" },
63
+ { LLM_ARCH_GRANITE_MOE, "granitemoe" },
64
+ { LLM_ARCH_CHAMELEON, "chameleon" },
65
+ { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
66
+ { LLM_ARCH_UNKNOWN, "(unknown)" },
67
+ };
68
+
69
+ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
70
+ { LLM_KV_GENERAL_TYPE, "general.type" },
71
+ { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
72
+ { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
73
+ { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
74
+ { LLM_KV_GENERAL_NAME, "general.name" },
75
+ { LLM_KV_GENERAL_AUTHOR, "general.author" },
76
+ { LLM_KV_GENERAL_VERSION, "general.version" },
77
+ { LLM_KV_GENERAL_URL, "general.url" },
78
+ { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
79
+ { LLM_KV_GENERAL_LICENSE, "general.license" },
80
+ { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
81
+ { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
82
+
83
+ { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
84
+ { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
85
+ { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
86
+ { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
87
+ { LLM_KV_BLOCK_COUNT, "%s.block_count" },
88
+ { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
89
+ { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
90
+ { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
91
+ { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
92
+ { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
93
+ { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
94
+ { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
95
+ { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
96
+ { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
97
+ { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
98
+ { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
99
+ { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
100
+ { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
101
+ { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
102
+ { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
103
+ { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
104
+ { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
105
+ { LLM_KV_SWIN_NORM, "%s.swin_norm" },
106
+ { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
107
+ { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
108
+ { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
109
+ { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
110
+ { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
111
+ { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
112
+
113
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
114
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
115
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
116
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
117
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
118
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
119
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
120
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
121
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
122
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
123
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
124
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
125
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
126
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
127
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
128
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
129
+
130
+ { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
131
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
132
+ { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
133
+ { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
134
+ { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
135
+ { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
136
+ { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
137
+ { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
138
+ { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
139
+ { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
140
+
141
+ { LLM_KV_SPLIT_NO, "split.no" },
142
+ { LLM_KV_SPLIT_COUNT, "split.count" },
143
+ { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
144
+
145
+ { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
146
+ { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
147
+ { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
148
+ { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
149
+ { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
150
+
151
+ { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
152
+
153
+ { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
154
+ { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
155
+
156
+ { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
157
+ { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
158
+
159
+ { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
160
+ { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
161
+ { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
162
+ { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
163
+ { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
164
+ { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
165
+ { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
166
+ { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
167
+ { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
168
+ { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
169
+ { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
170
+ { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
171
+ { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
172
+ { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
173
+ { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
174
+ { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
175
+ { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
176
+ { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
177
+ { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
178
+ { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
179
+ { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
180
+ { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
181
+ { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
182
+ { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
183
+ { LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
184
+ { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
185
+ { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
186
+ { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
187
+ { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
188
+ { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
189
+ { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
190
+
191
+ { LLM_KV_ADAPTER_TYPE, "adapter.type" },
192
+ { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
193
+
194
+ // deprecated
195
+ { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
196
+ { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
197
+ { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
198
+ };
199
+
200
+ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
201
+ {
202
+ LLM_ARCH_LLAMA,
203
+ {
204
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
205
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
206
+ { LLM_TENSOR_OUTPUT, "output" },
207
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
208
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
209
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
210
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
211
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
212
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
213
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
214
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
215
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
216
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
217
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
218
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
219
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
220
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
221
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
222
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
223
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
224
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
225
+ },
226
+ },
227
+ {
228
+ LLM_ARCH_DECI,
229
+ {
230
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
231
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
232
+ { LLM_TENSOR_OUTPUT, "output" },
233
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
234
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
235
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
236
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
237
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
238
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
239
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
240
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
241
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
242
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
243
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
244
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
245
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
246
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
247
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
248
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
249
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
250
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
251
+ },
252
+ },
253
+ {
254
+ LLM_ARCH_BAICHUAN,
255
+ {
256
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
257
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
258
+ { LLM_TENSOR_OUTPUT, "output" },
259
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
260
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
261
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
262
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
263
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
264
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
265
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
266
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
267
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
268
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
269
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
270
+ },
271
+ },
272
+ {
273
+ LLM_ARCH_FALCON,
274
+ {
275
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
276
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
277
+ { LLM_TENSOR_OUTPUT, "output" },
278
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
279
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
280
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
281
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
282
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
283
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
284
+ },
285
+ },
286
+ {
287
+ LLM_ARCH_GROK,
288
+ {
289
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
290
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
291
+ { LLM_TENSOR_OUTPUT, "output" },
292
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
293
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
294
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
295
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
296
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
297
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
298
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
299
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
300
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
301
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
302
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
303
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
304
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
305
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
306
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
307
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
308
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
309
+ },
310
+ },
311
+ {
312
+ LLM_ARCH_GPT2,
313
+ {
314
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
315
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
316
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
317
+ { LLM_TENSOR_OUTPUT, "output" },
318
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
319
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
320
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
321
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
322
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
323
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
324
+ },
325
+ },
326
+ {
327
+ LLM_ARCH_GPTJ,
328
+ {
329
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
330
+ },
331
+ },
332
+ {
333
+ LLM_ARCH_GPTNEOX,
334
+ {
335
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
336
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
337
+ { LLM_TENSOR_OUTPUT, "output" },
338
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
339
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
340
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
341
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
342
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
343
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
344
+ },
345
+ },
346
+ {
347
+ LLM_ARCH_MPT,
348
+ {
349
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
350
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
351
+ { LLM_TENSOR_OUTPUT, "output"},
352
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
353
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
354
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
355
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
356
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
357
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
358
+ { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
359
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
360
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
361
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
362
+ },
363
+ },
364
+ {
365
+ LLM_ARCH_STARCODER,
366
+ {
367
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
368
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
369
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
370
+ { LLM_TENSOR_OUTPUT, "output" },
371
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
372
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
373
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
374
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
375
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
376
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
377
+ },
378
+ },
379
+ {
380
+ LLM_ARCH_REFACT,
381
+ {
382
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
383
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
384
+ { LLM_TENSOR_OUTPUT, "output" },
385
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
386
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
387
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
388
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
389
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
390
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
391
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
392
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
393
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
394
+ },
395
+ },
396
+ {
397
+ LLM_ARCH_BERT,
398
+ {
399
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
400
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
401
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
402
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
403
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
404
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
405
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
406
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
407
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
408
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
409
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
410
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
411
+ { LLM_TENSOR_CLS, "cls" },
412
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
413
+ },
414
+ },
415
+ {
416
+ LLM_ARCH_NOMIC_BERT,
417
+ {
418
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
419
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
420
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
421
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
422
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
423
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
424
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
425
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
426
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
427
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
428
+ },
429
+ },
430
+ {
431
+ LLM_ARCH_JINA_BERT_V2,
432
+ {
433
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
434
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
435
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
436
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
437
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
438
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
439
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
440
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
441
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
442
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
443
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
444
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
445
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
446
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
447
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
448
+ { LLM_TENSOR_CLS, "cls" },
449
+ },
450
+ },
451
+ {
452
+ LLM_ARCH_BLOOM,
453
+ {
454
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
455
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
456
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
457
+ { LLM_TENSOR_OUTPUT, "output" },
458
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
459
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
460
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
461
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
462
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
463
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
464
+ },
465
+ },
466
+ {
467
+ LLM_ARCH_STABLELM,
468
+ {
469
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
470
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
471
+ { LLM_TENSOR_OUTPUT, "output" },
472
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
473
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
474
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
475
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
476
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
477
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
478
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
479
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
480
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
481
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
482
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
483
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
484
+ },
485
+ },
486
+ {
487
+ LLM_ARCH_QWEN,
488
+ {
489
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
490
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
491
+ { LLM_TENSOR_OUTPUT, "output" },
492
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
493
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
494
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
495
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
496
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
497
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
498
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
499
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
500
+ },
501
+ },
502
+ {
503
+ LLM_ARCH_QWEN2,
504
+ {
505
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
506
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
507
+ { LLM_TENSOR_OUTPUT, "output" },
508
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
509
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
510
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
511
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
512
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
513
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
514
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
515
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
516
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
517
+ },
518
+ },
519
+ {
520
+ LLM_ARCH_QWEN2VL,
521
+ {
522
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
523
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
524
+ { LLM_TENSOR_OUTPUT, "output" },
525
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
526
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
527
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
528
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
529
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
530
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
531
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
532
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
533
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
534
+ },
535
+ },
536
+ {
537
+ LLM_ARCH_QWEN2MOE,
538
+ {
539
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
540
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
541
+ { LLM_TENSOR_OUTPUT, "output" },
542
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
543
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
544
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
545
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
546
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
547
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
548
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
549
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
550
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
551
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
552
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
553
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
554
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
555
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
556
+ },
557
+ },
558
+ {
559
+ LLM_ARCH_PHI2,
560
+ {
561
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
562
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
563
+ { LLM_TENSOR_OUTPUT, "output" },
564
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
565
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
566
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
567
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
568
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
569
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
570
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
571
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
572
+ },
573
+ },
574
+ {
575
+ LLM_ARCH_PHI3,
576
+ {
577
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
578
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
579
+ { LLM_TENSOR_OUTPUT, "output" },
580
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
581
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
582
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
583
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
584
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
585
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
586
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
587
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
588
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
589
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
590
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
591
+ },
592
+ },
593
+ {
594
+ LLM_ARCH_PHIMOE,
595
+ {
596
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
597
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
598
+ { LLM_TENSOR_OUTPUT, "output" },
599
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
600
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
601
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
602
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
603
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
604
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
605
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
606
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
607
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
608
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
609
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
610
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
611
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
612
+ },
613
+ },
614
+ {
615
+ LLM_ARCH_PLAMO,
616
+ {
617
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
618
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
619
+ { LLM_TENSOR_OUTPUT, "output" },
620
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
621
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
622
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
623
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
624
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
625
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
626
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
627
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
628
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
629
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
630
+ },
631
+ },
632
+ {
633
+ LLM_ARCH_CODESHELL,
634
+ {
635
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
636
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
637
+ { LLM_TENSOR_OUTPUT, "output" },
638
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
639
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
640
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
641
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
642
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
643
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
644
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
645
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
646
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
647
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
648
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
649
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
650
+ },
651
+ },
652
+ {
653
+ LLM_ARCH_ORION,
654
+ {
655
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
656
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
657
+ { LLM_TENSOR_OUTPUT, "output" },
658
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
659
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
660
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
661
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
662
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
663
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
664
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
665
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
666
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
667
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
668
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
669
+ },
670
+ },
671
+ {
672
+ LLM_ARCH_INTERNLM2,
673
+ {
674
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
675
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
676
+ { LLM_TENSOR_OUTPUT, "output" },
677
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
678
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
679
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
680
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
681
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
682
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
683
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
684
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
685
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
686
+ },
687
+ },
688
+ {
689
+ LLM_ARCH_MINICPM,
690
+ {
691
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
692
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
693
+ { LLM_TENSOR_OUTPUT, "output" },
694
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
695
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
696
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
697
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
698
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
699
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
700
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
701
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
702
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
703
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
704
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
705
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
706
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
707
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
708
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
709
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
710
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
711
+ },
712
+ },
713
+ {
714
+ LLM_ARCH_MINICPM3,
715
+ {
716
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
717
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
718
+ { LLM_TENSOR_OUTPUT, "output" },
719
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
720
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
721
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
722
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
723
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
724
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
725
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
726
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
727
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
728
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
729
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
730
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
731
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
732
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
733
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
734
+ },
735
+ },
736
+ {
737
+ LLM_ARCH_GEMMA,
738
+ {
739
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
740
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
741
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
742
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
743
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
744
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
745
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
746
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
747
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
748
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
749
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
750
+ },
751
+ },
752
+ {
753
+ LLM_ARCH_GEMMA2,
754
+ {
755
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
756
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
757
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
758
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
759
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
760
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
761
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
762
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
763
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
764
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
765
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
766
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
767
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
768
+ },
769
+ },
770
+ {
771
+ LLM_ARCH_GEMMA3,
772
+ {
773
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
774
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
775
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
776
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
777
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
778
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
779
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
780
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
781
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
782
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
783
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
784
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
785
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
786
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
787
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
788
+ },
789
+ },
790
+ {
791
+ LLM_ARCH_STARCODER2,
792
+ {
793
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
794
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
795
+ { LLM_TENSOR_OUTPUT, "output" },
796
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
797
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
798
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
799
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
800
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
801
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
802
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
803
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
804
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
805
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
806
+ },
807
+ },
808
+ {
809
+ LLM_ARCH_MAMBA,
810
+ {
811
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
812
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
813
+ { LLM_TENSOR_OUTPUT, "output" },
814
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
815
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
816
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
817
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
818
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
819
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
820
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
821
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
822
+ },
823
+ },
824
+ {
825
+ LLM_ARCH_XVERSE,
826
+ {
827
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
828
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
829
+ { LLM_TENSOR_OUTPUT, "output" },
830
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
831
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
832
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
833
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
834
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
835
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
836
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
837
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
838
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
839
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
840
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
841
+ },
842
+ },
843
+ {
844
+ LLM_ARCH_COMMAND_R,
845
+ {
846
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
847
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
848
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
849
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
850
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
851
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
852
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
853
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
854
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
855
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
856
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
857
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
858
+ },
859
+ },
860
+ {
861
+ LLM_ARCH_COHERE2,
862
+ {
863
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
864
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
865
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
866
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
867
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
868
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
869
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
870
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
871
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
872
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
873
+ },
874
+ },
875
+ {
876
+ LLM_ARCH_DBRX,
877
+ {
878
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
879
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
880
+ { LLM_TENSOR_OUTPUT, "output" },
881
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
882
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
883
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
884
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
885
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
886
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
887
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
888
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
889
+ },
890
+ },
891
+ {
892
+ LLM_ARCH_OLMO,
893
+ {
894
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
895
+ { LLM_TENSOR_OUTPUT, "output" },
896
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
897
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
898
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
899
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
900
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
901
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
902
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
903
+ },
904
+ },
905
+ {
906
+ LLM_ARCH_OLMO2,
907
+ {
908
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
909
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
910
+ { LLM_TENSOR_OUTPUT, "output" },
911
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
912
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
913
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
914
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
915
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
916
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
917
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
918
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
919
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
920
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
921
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
922
+ },
923
+ },
924
+ {
925
+ LLM_ARCH_OLMOE,
926
+ {
927
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
928
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
929
+ { LLM_TENSOR_OUTPUT, "output" },
930
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
931
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
932
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
933
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
934
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
935
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
936
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
937
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
938
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
939
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
940
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
941
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
942
+ },
943
+ },
944
+ {
945
+ LLM_ARCH_OPENELM,
946
+ {
947
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
948
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
949
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
950
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
951
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
952
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
953
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
954
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
955
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
956
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
957
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
958
+ },
959
+ },
960
+ {
961
+ LLM_ARCH_ARCTIC,
962
+ {
963
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
964
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
965
+ { LLM_TENSOR_OUTPUT, "output" },
966
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
967
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
968
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
969
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
970
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
971
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
972
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
973
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
974
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
975
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
976
+ { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
977
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
978
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
979
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
980
+ },
981
+ },
982
+ {
983
+ LLM_ARCH_DEEPSEEK,
984
+ {
985
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
986
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
987
+ { LLM_TENSOR_OUTPUT, "output" },
988
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
989
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
990
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
991
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
992
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
993
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
994
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
995
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
996
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
997
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
998
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
999
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1000
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1001
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1002
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1003
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1004
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1005
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1006
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1007
+ },
1008
+ },
1009
+ {
1010
+ LLM_ARCH_DEEPSEEK2,
1011
+ {
1012
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1013
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1014
+ { LLM_TENSOR_OUTPUT, "output" },
1015
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1016
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1017
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1018
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1019
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1020
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1021
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1022
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1023
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1024
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1025
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1026
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1027
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1028
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1029
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1030
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1031
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1032
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1033
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1034
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1035
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1036
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1037
+ },
1038
+ },
1039
+ {
1040
+ LLM_ARCH_CHATGLM,
1041
+ {
1042
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1043
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1044
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1045
+ { LLM_TENSOR_OUTPUT, "output" },
1046
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1047
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1048
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1049
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1050
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1051
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1052
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1053
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1054
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1055
+ },
1056
+ },
1057
+ {
1058
+ LLM_ARCH_BITNET,
1059
+ {
1060
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1061
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1062
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1063
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1064
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1065
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1066
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1067
+ { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1068
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1069
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1070
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1071
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1072
+ { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1073
+ },
1074
+ },
1075
+ {
1076
+ LLM_ARCH_T5,
1077
+ {
1078
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1079
+ { LLM_TENSOR_OUTPUT, "output" },
1080
+ { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1081
+ { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1082
+ { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1083
+ { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1084
+ { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1085
+ { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1086
+ { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1087
+ { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1088
+ { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1089
+ { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1090
+ { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1091
+ { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1092
+ { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1093
+ { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1094
+ { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1095
+ { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1096
+ { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1097
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1098
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1099
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1100
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1101
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1102
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1103
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1104
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1105
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1106
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1107
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1108
+ },
1109
+ },
1110
+ {
1111
+ LLM_ARCH_T5ENCODER,
1112
+ {
1113
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1114
+ { LLM_TENSOR_OUTPUT, "output" },
1115
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1116
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1117
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1118
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1119
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1120
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1121
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1122
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1123
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1124
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1125
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1126
+ },
1127
+ },
1128
+ {
1129
+ LLM_ARCH_JAIS,
1130
+ {
1131
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1132
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1133
+ { LLM_TENSOR_OUTPUT, "output" },
1134
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1135
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1136
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1137
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1138
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1139
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1140
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1141
+ },
1142
+ },
1143
+ {
1144
+ LLM_ARCH_NEMOTRON,
1145
+ {
1146
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1147
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1148
+ { LLM_TENSOR_OUTPUT, "output" },
1149
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1150
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1151
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1152
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1153
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1154
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1155
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1156
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1157
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1158
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1159
+ },
1160
+ },
1161
+ {
1162
+ LLM_ARCH_EXAONE,
1163
+ {
1164
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1165
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1166
+ { LLM_TENSOR_OUTPUT, "output" },
1167
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1168
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1169
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1170
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1171
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1172
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1173
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1174
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1175
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1176
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1177
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1178
+ },
1179
+ },
1180
+ {
1181
+ LLM_ARCH_RWKV6,
1182
+ {
1183
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1184
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1185
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1186
+ { LLM_TENSOR_OUTPUT, "output" },
1187
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1188
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1189
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1190
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1191
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1192
+ { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1193
+ { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1194
+ { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1195
+ { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1196
+ { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1197
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1198
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1199
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1200
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1201
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1202
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1203
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1204
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1205
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1206
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1207
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1208
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1209
+ { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1210
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1211
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1212
+ { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1213
+ },
1214
+ },
1215
+ {
1216
+ LLM_ARCH_RWKV6QWEN2,
1217
+ {
1218
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1219
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1220
+ { LLM_TENSOR_OUTPUT, "output" },
1221
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1222
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1223
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1224
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1225
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1226
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1227
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1228
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1229
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1230
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1231
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1232
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1233
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1234
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1235
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1236
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1237
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1238
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1239
+ },
1240
+ },
1241
+ {
1242
+ LLM_ARCH_GRANITE,
1243
+ {
1244
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1245
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1246
+ { LLM_TENSOR_OUTPUT, "output" },
1247
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1248
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1249
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1250
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1251
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1252
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1253
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1254
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1255
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1256
+ },
1257
+ },
1258
+ {
1259
+ LLM_ARCH_GRANITE_MOE,
1260
+ {
1261
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1262
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1263
+ { LLM_TENSOR_OUTPUT, "output" },
1264
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1265
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1266
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1267
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1268
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1269
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1270
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1271
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1272
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1273
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1274
+ },
1275
+ },
1276
+ {
1277
+ LLM_ARCH_CHAMELEON,
1278
+ {
1279
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1280
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1281
+ { LLM_TENSOR_OUTPUT, "output" },
1282
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1283
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1284
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1285
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1286
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1287
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1288
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1289
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1290
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1291
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1292
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1293
+ },
1294
+ },
1295
+ {
1296
+ LLM_ARCH_WAVTOKENIZER_DEC,
1297
+ {
1298
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1299
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1300
+ { LLM_TENSOR_CONV1D, "conv1d" },
1301
+ { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1302
+ { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1303
+ { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1304
+ { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1305
+ { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1306
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1307
+ { LLM_TENSOR_OUTPUT, "output" },
1308
+ { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1309
+ { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1310
+ { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1311
+ { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1312
+ { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1313
+ { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1314
+ { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1315
+ { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1316
+ { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1317
+ { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1318
+ },
1319
+ },
1320
+ {
1321
+ LLM_ARCH_UNKNOWN,
1322
+ {
1323
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1324
+ },
1325
+ },
1326
+ };
1327
+
1328
+ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1329
+ {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1330
+ {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1331
+ {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1332
+ {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1333
+ {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1334
+ {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1335
+ {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1336
+ {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1337
+ {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1338
+ {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1339
+ {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1340
+ {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1341
+ {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1342
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1343
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1344
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1345
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1346
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1347
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1348
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1349
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1350
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1351
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1352
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1353
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1354
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1355
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1356
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1357
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1358
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1359
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1360
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1361
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1362
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1363
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1364
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1365
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1366
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1367
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1368
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1369
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1370
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1371
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1372
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1373
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1374
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1375
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1376
+ {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1377
+ {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1378
+ {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1379
+ {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1380
+ {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1381
+ {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1382
+ {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1383
+ {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1384
+ {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1385
+ {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1386
+ {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1387
+ {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1388
+ {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1389
+ {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1390
+ {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1391
+ {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1392
+ {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1393
+ {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1394
+ {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1395
+ {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1396
+ {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1397
+ {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1398
+ {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1399
+ {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1400
+ {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1401
+ {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1402
+ {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1403
+ {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1404
+ {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1405
+ {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1406
+ {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1407
+ {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1408
+ {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1409
+ {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1410
+ {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1411
+ {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1412
+ {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1413
+ {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1414
+ {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1415
+ {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1416
+ {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1417
+ {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1418
+ {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1419
+ {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1420
+ {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1421
+ {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1422
+ {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1423
+ {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1424
+ {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1425
+ {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1426
+ {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1427
+ {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1428
+ {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1429
+ {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1430
+ {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1431
+ {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1432
+ {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1433
+ {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1434
+ {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1435
+ {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1436
+ {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1437
+ {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1438
+ {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1439
+ {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1440
+ {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1441
+ {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1442
+ {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1443
+ {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1444
+ {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1445
+ {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1446
+ {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1447
+ {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1448
+ {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1449
+ {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1450
+ {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1451
+ // this tensor is loaded for T5, but never used
1452
+ {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1453
+ {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1454
+ {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1455
+ {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1456
+ {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1457
+ {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1458
+ {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1459
+ {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1460
+ {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1461
+ {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1462
+ {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1463
+ {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1464
+ {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1465
+ {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1466
+ {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1467
+ {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1468
+ {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1469
+ };
1470
+
1471
+ LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
1472
+
1473
+ std::string LLM_KV::operator()(llm_kv kv) const {
1474
+ return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
1475
+ : ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1476
+ }
1477
+
1478
+ std::string LLM_TN_IMPL::str() const {
1479
+ if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1480
+ return "__missing__";
1481
+ }
1482
+
1483
+ std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1484
+
1485
+ if (suffix != nullptr) {
1486
+ name += ".";
1487
+ name += suffix;
1488
+ }
1489
+
1490
+ return name;
1491
+ }
1492
+
1493
+ const char * llm_arch_name(llm_arch arch) {
1494
+ auto it = LLM_ARCH_NAMES.find(arch);
1495
+ if (it == LLM_ARCH_NAMES.end()) {
1496
+ return "unknown";
1497
+ }
1498
+ return it->second;
1499
+ }
1500
+
1501
+ llm_arch llm_arch_from_string(const std::string & name) {
1502
+ for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1503
+ if (kv.second == name) {
1504
+ return kv.first;
1505
+ }
1506
+ }
1507
+
1508
+ return LLM_ARCH_UNKNOWN;
1509
+ }
1510
+
1511
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1512
+ return LLM_TENSOR_INFOS.at(tensor);
1513
+ }