cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,1492 +1,1714 @@
1
- #include "llama-arch.h"
2
-
3
- #include "llama-impl.h"
4
-
5
- #include <map>
6
-
7
- static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
- { LLM_ARCH_LLAMA, "llama" },
9
- { LLM_ARCH_DECI, "deci" },
10
- { LLM_ARCH_FALCON, "falcon" },
11
- { LLM_ARCH_GROK, "grok" },
12
- { LLM_ARCH_GPT2, "gpt2" },
13
- { LLM_ARCH_GPTJ, "gptj" },
14
- { LLM_ARCH_GPTNEOX, "gptneox" },
15
- { LLM_ARCH_MPT, "mpt" },
16
- { LLM_ARCH_BAICHUAN, "baichuan" },
17
- { LLM_ARCH_STARCODER, "starcoder" },
18
- { LLM_ARCH_REFACT, "refact" },
19
- { LLM_ARCH_BERT, "bert" },
20
- { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
- { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
- { LLM_ARCH_BLOOM, "bloom" },
23
- { LLM_ARCH_STABLELM, "stablelm" },
24
- { LLM_ARCH_QWEN, "qwen" },
25
- { LLM_ARCH_QWEN2, "qwen2" },
26
- { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
- { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
- { LLM_ARCH_PHI2, "phi2" },
29
- { LLM_ARCH_PHI3, "phi3" },
30
- { LLM_ARCH_PHIMOE, "phimoe" },
31
- { LLM_ARCH_PLAMO, "plamo" },
32
- { LLM_ARCH_CODESHELL, "codeshell" },
33
- { LLM_ARCH_ORION, "orion" },
34
- { LLM_ARCH_INTERNLM2, "internlm2" },
35
- { LLM_ARCH_MINICPM, "minicpm" },
36
- { LLM_ARCH_MINICPM3, "minicpm3" },
37
- { LLM_ARCH_GEMMA, "gemma" },
38
- { LLM_ARCH_GEMMA2, "gemma2" },
39
- { LLM_ARCH_STARCODER2, "starcoder2" },
40
- { LLM_ARCH_MAMBA, "mamba" },
41
- { LLM_ARCH_XVERSE, "xverse" },
42
- { LLM_ARCH_COMMAND_R, "command-r" },
43
- { LLM_ARCH_COHERE2, "cohere2" },
44
- { LLM_ARCH_DBRX, "dbrx" },
45
- { LLM_ARCH_OLMO, "olmo" },
46
- { LLM_ARCH_OLMO2, "olmo2" },
47
- { LLM_ARCH_OLMOE, "olmoe" },
48
- { LLM_ARCH_OPENELM, "openelm" },
49
- { LLM_ARCH_ARCTIC, "arctic" },
50
- { LLM_ARCH_DEEPSEEK, "deepseek" },
51
- { LLM_ARCH_DEEPSEEK2, "deepseek2" },
52
- { LLM_ARCH_CHATGLM, "chatglm" },
53
- { LLM_ARCH_BITNET, "bitnet" },
54
- { LLM_ARCH_T5, "t5" },
55
- { LLM_ARCH_T5ENCODER, "t5encoder" },
56
- { LLM_ARCH_JAIS, "jais" },
57
- { LLM_ARCH_NEMOTRON, "nemotron" },
58
- { LLM_ARCH_EXAONE, "exaone" },
59
- { LLM_ARCH_RWKV6, "rwkv6" },
60
- { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
61
- { LLM_ARCH_GRANITE, "granite" },
62
- { LLM_ARCH_GRANITE_MOE, "granitemoe" },
63
- { LLM_ARCH_CHAMELEON, "chameleon" },
64
- { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
65
- { LLM_ARCH_UNKNOWN, "(unknown)" },
66
- };
67
-
68
- static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
69
- { LLM_KV_GENERAL_TYPE, "general.type" },
70
- { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
71
- { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
72
- { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
73
- { LLM_KV_GENERAL_NAME, "general.name" },
74
- { LLM_KV_GENERAL_AUTHOR, "general.author" },
75
- { LLM_KV_GENERAL_VERSION, "general.version" },
76
- { LLM_KV_GENERAL_URL, "general.url" },
77
- { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
78
- { LLM_KV_GENERAL_LICENSE, "general.license" },
79
- { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
80
- { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
81
-
82
- { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
83
- { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
84
- { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
85
- { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
86
- { LLM_KV_BLOCK_COUNT, "%s.block_count" },
87
- { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
88
- { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
89
- { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
90
- { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
91
- { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
92
- { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
93
- { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
94
- { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
95
- { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
96
- { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
97
- { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
98
- { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
99
- { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
100
- { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
101
- { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
102
- { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
103
- { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
104
- { LLM_KV_SWIN_NORM, "%s.swin_norm" },
105
- { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
106
- { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
107
- { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
108
- { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
109
- { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
110
- { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
111
-
112
- { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
113
- { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
114
- { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
115
- { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
116
- { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
117
- { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
118
- { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
119
- { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
120
- { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
121
- { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
122
- { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
123
- { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
124
- { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
125
- { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
126
- { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
127
- { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
128
-
129
- { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
130
- { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
131
- { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
132
- { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
133
- { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
134
- { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
135
- { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
136
- { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
137
- { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
138
- { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
139
-
140
- { LLM_KV_SPLIT_NO, "split.no" },
141
- { LLM_KV_SPLIT_COUNT, "split.count" },
142
- { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
143
-
144
- { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
145
- { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
146
- { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
147
- { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
148
- { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
149
-
150
- { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
151
-
152
- { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
153
- { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
154
-
155
- { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
156
- { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
157
-
158
- { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
159
- { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
160
- { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
161
- { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
162
- { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
163
- { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
164
- { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
165
- { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
166
- { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
167
- { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
168
- { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
169
- { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
170
- { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
171
- { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
172
- { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
173
- { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
174
- { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
175
- { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
176
- { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
177
- { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
178
- { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
179
- { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
180
- { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
181
- { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
182
- { LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
183
- { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
184
- { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
185
- { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
186
- { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
187
- { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
188
- { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
189
-
190
- { LLM_KV_ADAPTER_TYPE, "adapter.type" },
191
- { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
192
-
193
- // deprecated
194
- { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
195
- { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
196
- { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
197
- };
198
-
199
- static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
200
- {
201
- LLM_ARCH_LLAMA,
202
- {
203
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
204
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
205
- { LLM_TENSOR_OUTPUT, "output" },
206
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
207
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
208
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
209
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
210
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
211
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
212
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
213
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
214
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
215
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
216
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
217
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
218
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
219
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
220
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
221
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
222
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
223
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
224
- },
225
- },
226
- {
227
- LLM_ARCH_DECI,
228
- {
229
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
230
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
231
- { LLM_TENSOR_OUTPUT, "output" },
232
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
233
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
234
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
235
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
236
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
237
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
238
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
239
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
240
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
241
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
242
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
243
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
244
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
245
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
246
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
247
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
248
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
249
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
250
- },
251
- },
252
- {
253
- LLM_ARCH_BAICHUAN,
254
- {
255
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
256
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
257
- { LLM_TENSOR_OUTPUT, "output" },
258
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
259
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
260
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
261
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
262
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
263
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
264
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
265
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
266
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
267
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
268
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
269
- },
270
- },
271
- {
272
- LLM_ARCH_FALCON,
273
- {
274
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
275
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
276
- { LLM_TENSOR_OUTPUT, "output" },
277
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
278
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
279
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
280
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
281
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
282
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
283
- },
284
- },
285
- {
286
- LLM_ARCH_GROK,
287
- {
288
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
289
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
290
- { LLM_TENSOR_OUTPUT, "output" },
291
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
292
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
293
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
294
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
295
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
296
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
297
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
298
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
299
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
300
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
301
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
302
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
303
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
304
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
305
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
306
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
307
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
308
- },
309
- },
310
- {
311
- LLM_ARCH_GPT2,
312
- {
313
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
314
- { LLM_TENSOR_POS_EMBD, "position_embd" },
315
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
316
- { LLM_TENSOR_OUTPUT, "output" },
317
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
318
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
319
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
320
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
321
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
322
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
323
- },
324
- },
325
- {
326
- LLM_ARCH_GPTJ,
327
- {
328
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
329
- },
330
- },
331
- {
332
- LLM_ARCH_GPTNEOX,
333
- {
334
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
335
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
336
- { LLM_TENSOR_OUTPUT, "output" },
337
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
338
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
339
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
340
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
341
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
342
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
343
- },
344
- },
345
- {
346
- LLM_ARCH_MPT,
347
- {
348
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
349
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
350
- { LLM_TENSOR_OUTPUT, "output"},
351
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
352
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
353
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
354
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
355
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
356
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
357
- { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
358
- { LLM_TENSOR_POS_EMBD, "position_embd" },
359
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
360
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
361
- },
362
- },
363
- {
364
- LLM_ARCH_STARCODER,
365
- {
366
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
367
- { LLM_TENSOR_POS_EMBD, "position_embd" },
368
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
369
- { LLM_TENSOR_OUTPUT, "output" },
370
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
371
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
372
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
373
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
374
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
375
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
376
- },
377
- },
378
- {
379
- LLM_ARCH_REFACT,
380
- {
381
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
382
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
383
- { LLM_TENSOR_OUTPUT, "output" },
384
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
385
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
386
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
387
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
388
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
389
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
390
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
391
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
392
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
393
- },
394
- },
395
- {
396
- LLM_ARCH_BERT,
397
- {
398
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
399
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
400
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
401
- { LLM_TENSOR_POS_EMBD, "position_embd" },
402
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
403
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
404
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
405
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
406
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
407
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
408
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
409
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
410
- { LLM_TENSOR_CLS, "cls" },
411
- { LLM_TENSOR_CLS_OUT, "cls.output" },
412
- },
413
- },
414
- {
415
- LLM_ARCH_NOMIC_BERT,
416
- {
417
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
418
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
419
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
420
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
421
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
422
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
423
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
424
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
425
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
426
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
427
- },
428
- },
429
- {
430
- LLM_ARCH_JINA_BERT_V2,
431
- {
432
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
433
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
434
- { LLM_TENSOR_TOKEN_TYPES, "token_types" },
435
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
436
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
437
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
438
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
439
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
440
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
441
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
442
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
443
- { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
444
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
445
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
446
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
447
- { LLM_TENSOR_CLS, "cls" },
448
- },
449
- },
450
- {
451
- LLM_ARCH_BLOOM,
452
- {
453
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
454
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
455
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
456
- { LLM_TENSOR_OUTPUT, "output" },
457
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
458
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
459
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
460
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
461
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
462
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
463
- },
464
- },
465
- {
466
- LLM_ARCH_STABLELM,
467
- {
468
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
469
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
470
- { LLM_TENSOR_OUTPUT, "output" },
471
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
472
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
473
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
474
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
475
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
476
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
477
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
478
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
479
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
480
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
481
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
482
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
483
- },
484
- },
485
- {
486
- LLM_ARCH_QWEN,
487
- {
488
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
489
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
490
- { LLM_TENSOR_OUTPUT, "output" },
491
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
492
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
493
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
494
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
495
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
496
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
497
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
498
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
499
- },
500
- },
501
- {
502
- LLM_ARCH_QWEN2,
503
- {
504
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
505
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
506
- { LLM_TENSOR_OUTPUT, "output" },
507
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
508
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
509
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
510
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
511
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
512
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
513
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
514
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
515
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
516
- },
517
- },
518
- {
519
- LLM_ARCH_QWEN2VL,
520
- {
521
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
522
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
523
- { LLM_TENSOR_OUTPUT, "output" },
524
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
525
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
526
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
527
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
528
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
529
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
530
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
531
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
532
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
533
- },
534
- },
535
- {
536
- LLM_ARCH_QWEN2MOE,
537
- {
538
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
539
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
540
- { LLM_TENSOR_OUTPUT, "output" },
541
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
542
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
543
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
544
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
545
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
546
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
547
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
548
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
549
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
550
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
551
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
552
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
553
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
554
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
555
- },
556
- },
557
- {
558
- LLM_ARCH_PHI2,
559
- {
560
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
561
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
562
- { LLM_TENSOR_OUTPUT, "output" },
563
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
564
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
565
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
566
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
567
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
568
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
569
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
570
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
571
- },
572
- },
573
- {
574
- LLM_ARCH_PHI3,
575
- {
576
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
577
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
578
- { LLM_TENSOR_OUTPUT, "output" },
579
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
580
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
581
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
582
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
583
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
584
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
585
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
586
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
587
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
588
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
589
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
590
- },
591
- },
592
- {
593
- LLM_ARCH_PHIMOE,
594
- {
595
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
596
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
597
- { LLM_TENSOR_OUTPUT, "output" },
598
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
599
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
600
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
601
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
602
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
603
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
604
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
605
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
606
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
607
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
608
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
609
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
610
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
611
- },
612
- },
613
- {
614
- LLM_ARCH_PLAMO,
615
- {
616
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
617
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
618
- { LLM_TENSOR_OUTPUT, "output" },
619
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
620
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
621
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
622
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
623
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
624
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
625
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
626
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
627
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
628
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
629
- },
630
- },
631
- {
632
- LLM_ARCH_CODESHELL,
633
- {
634
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
635
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
636
- { LLM_TENSOR_OUTPUT, "output" },
637
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
638
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
639
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
640
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
641
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
642
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
643
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
644
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
645
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
646
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
647
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
648
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
649
- },
650
- },
651
- {
652
- LLM_ARCH_ORION,
653
- {
654
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
655
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
656
- { LLM_TENSOR_OUTPUT, "output" },
657
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
658
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
659
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
660
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
661
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
662
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
663
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
664
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
665
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
666
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
667
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
668
- },
669
- },
670
- {
671
- LLM_ARCH_INTERNLM2,
672
- {
673
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
674
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
675
- { LLM_TENSOR_OUTPUT, "output" },
676
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
677
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
678
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
679
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
680
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
681
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
682
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
683
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
684
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
685
- },
686
- },
687
- {
688
- LLM_ARCH_MINICPM,
689
- {
690
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
691
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
692
- { LLM_TENSOR_OUTPUT, "output" },
693
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
694
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
695
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
696
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
697
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
698
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
699
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
700
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
701
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
702
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
703
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
704
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
705
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
706
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
707
- { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
708
- { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
709
- { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
710
- },
711
- },
712
- {
713
- LLM_ARCH_MINICPM3,
714
- {
715
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
716
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
717
- { LLM_TENSOR_OUTPUT, "output" },
718
- { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
719
- { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
720
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
721
- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
722
- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
723
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
724
- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
725
- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
726
- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
727
- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
728
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
729
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
730
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
731
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
732
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
733
- },
734
- },
735
- {
736
- LLM_ARCH_GEMMA,
737
- {
738
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
739
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
740
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
741
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
742
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
743
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
744
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
745
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
746
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
747
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
748
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
749
- },
750
- },
751
- {
752
- LLM_ARCH_GEMMA2,
753
- {
754
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
755
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
756
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
757
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
758
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
759
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
760
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
761
- { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
762
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
763
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
764
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
765
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
766
- { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
767
- },
768
- },
769
- {
770
- LLM_ARCH_STARCODER2,
771
- {
772
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
773
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
774
- { LLM_TENSOR_OUTPUT, "output" },
775
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
776
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
777
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
778
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
779
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
780
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
781
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
782
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
783
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
784
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
785
- },
786
- },
787
- {
788
- LLM_ARCH_MAMBA,
789
- {
790
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
791
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
792
- { LLM_TENSOR_OUTPUT, "output" },
793
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
794
- { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
795
- { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
796
- { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
797
- { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
798
- { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
799
- { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
800
- { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
801
- },
802
- },
803
- {
804
- LLM_ARCH_XVERSE,
805
- {
806
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
807
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
808
- { LLM_TENSOR_OUTPUT, "output" },
809
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
810
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
811
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
812
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
813
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
814
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
815
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
816
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
817
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
818
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
819
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
820
- },
821
- },
822
- {
823
- LLM_ARCH_COMMAND_R,
824
- {
825
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
826
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
827
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
828
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
829
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
830
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
831
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
832
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
833
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
834
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
835
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
836
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
837
- },
838
- },
839
- {
840
- LLM_ARCH_COHERE2,
841
- {
842
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
843
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
844
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
845
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
846
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
847
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
848
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
849
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
850
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
851
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
852
- },
853
- },
854
- {
855
- LLM_ARCH_DBRX,
856
- {
857
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
858
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
859
- { LLM_TENSOR_OUTPUT, "output" },
860
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
861
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
862
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
863
- { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
864
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
865
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
866
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
867
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
868
- },
869
- },
870
- {
871
- LLM_ARCH_OLMO,
872
- {
873
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
874
- { LLM_TENSOR_OUTPUT, "output" },
875
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
876
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
877
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
878
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
879
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
880
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
881
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
882
- },
883
- },
884
- {
885
- LLM_ARCH_OLMO2,
886
- {
887
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
888
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
889
- { LLM_TENSOR_OUTPUT, "output" },
890
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
891
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
892
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
893
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
894
- { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
895
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
896
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
897
- { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
898
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
899
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
900
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
901
- },
902
- },
903
- {
904
- LLM_ARCH_OLMOE,
905
- {
906
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
907
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
908
- { LLM_TENSOR_OUTPUT, "output" },
909
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
910
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
911
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
912
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
913
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
914
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
915
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
916
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
917
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
918
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
919
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
920
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
921
- },
922
- },
923
- {
924
- LLM_ARCH_OPENELM,
925
- {
926
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
927
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
928
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
929
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
930
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
931
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
932
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
933
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
934
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
935
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
936
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
937
- },
938
- },
939
- {
940
- LLM_ARCH_ARCTIC,
941
- {
942
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
943
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
944
- { LLM_TENSOR_OUTPUT, "output" },
945
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
946
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
947
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
948
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
949
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
950
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
951
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
952
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
953
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
954
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
955
- { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
956
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
957
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
958
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
959
- },
960
- },
961
- {
962
- LLM_ARCH_DEEPSEEK,
963
- {
964
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
965
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
966
- { LLM_TENSOR_OUTPUT, "output" },
967
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
968
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
969
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
970
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
971
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
972
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
973
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
974
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
975
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
976
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
977
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
978
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
979
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
980
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
981
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
982
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
983
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
984
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
985
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
986
- },
987
- },
988
- {
989
- LLM_ARCH_DEEPSEEK2,
990
- {
991
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
992
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
993
- { LLM_TENSOR_OUTPUT, "output" },
994
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
995
- { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
996
- { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
997
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
998
- { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
999
- { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1000
- { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1001
- { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1002
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1003
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1004
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1005
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1006
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1007
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1008
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1009
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1010
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1011
- { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1012
- { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1013
- { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1014
- { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1015
- { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1016
- },
1017
- },
1018
- {
1019
- LLM_ARCH_CHATGLM,
1020
- {
1021
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1022
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1023
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1024
- { LLM_TENSOR_OUTPUT, "output" },
1025
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1026
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1027
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1028
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1029
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1030
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1031
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1032
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1033
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1034
- },
1035
- },
1036
- {
1037
- LLM_ARCH_BITNET,
1038
- {
1039
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1040
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1041
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1042
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1043
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1044
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1045
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1046
- { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1047
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1048
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1049
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1050
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1051
- { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1052
- },
1053
- },
1054
- {
1055
- LLM_ARCH_T5,
1056
- {
1057
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1058
- { LLM_TENSOR_OUTPUT, "output" },
1059
- { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1060
- { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1061
- { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1062
- { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1063
- { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1064
- { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1065
- { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1066
- { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1067
- { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1068
- { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1069
- { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1070
- { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1071
- { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1072
- { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1073
- { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1074
- { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1075
- { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1076
- { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1077
- { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1078
- { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1079
- { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1080
- { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1081
- { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1082
- { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1083
- { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1084
- { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1085
- { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1086
- { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1087
- },
1088
- },
1089
- {
1090
- LLM_ARCH_T5ENCODER,
1091
- {
1092
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1093
- { LLM_TENSOR_OUTPUT, "output" },
1094
- { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1095
- { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1096
- { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1097
- { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1098
- { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1099
- { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1100
- { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1101
- { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1102
- { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1103
- { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1104
- { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1105
- },
1106
- },
1107
- {
1108
- LLM_ARCH_JAIS,
1109
- {
1110
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1111
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1112
- { LLM_TENSOR_OUTPUT, "output" },
1113
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1114
- { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1115
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1116
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1117
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1118
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1119
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1120
- },
1121
- },
1122
- {
1123
- LLM_ARCH_NEMOTRON,
1124
- {
1125
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1126
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1127
- { LLM_TENSOR_OUTPUT, "output" },
1128
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1129
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1130
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1131
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1132
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1133
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1134
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1135
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1136
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1137
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1138
- },
1139
- },
1140
- {
1141
- LLM_ARCH_EXAONE,
1142
- {
1143
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1144
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1145
- { LLM_TENSOR_OUTPUT, "output" },
1146
- { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1147
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1148
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1149
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1150
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1151
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1152
- { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1153
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1154
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1155
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1156
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1157
- },
1158
- },
1159
- {
1160
- LLM_ARCH_RWKV6,
1161
- {
1162
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1163
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1164
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1165
- { LLM_TENSOR_OUTPUT, "output" },
1166
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1167
- { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1168
- { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1169
- { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1170
- { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1171
- { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1172
- { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1173
- { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1174
- { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1175
- { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1176
- { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1177
- { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1178
- { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1179
- { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1180
- { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1181
- { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1182
- { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1183
- { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1184
- { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1185
- { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1186
- { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1187
- { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1188
- { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1189
- { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1190
- { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1191
- { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1192
- },
1193
- },
1194
- {
1195
- LLM_ARCH_RWKV6QWEN2,
1196
- {
1197
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1198
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1199
- { LLM_TENSOR_OUTPUT, "output" },
1200
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1201
- { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1202
- { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1203
- { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1204
- { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1205
- { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1206
- { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1207
- { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1208
- { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1209
- { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1210
- { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1211
- { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1212
- { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1213
- { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1214
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1215
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1216
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1217
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1218
- },
1219
- },
1220
- {
1221
- LLM_ARCH_GRANITE,
1222
- {
1223
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1224
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1225
- { LLM_TENSOR_OUTPUT, "output" },
1226
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1227
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1228
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1229
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1230
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1231
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1232
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1233
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1234
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1235
- },
1236
- },
1237
- {
1238
- LLM_ARCH_GRANITE_MOE,
1239
- {
1240
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1241
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1242
- { LLM_TENSOR_OUTPUT, "output" },
1243
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1244
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1245
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1246
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1247
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1248
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1249
- { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1250
- { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1251
- { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1252
- { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1253
- },
1254
- },
1255
- {
1256
- LLM_ARCH_CHAMELEON,
1257
- {
1258
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1259
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1260
- { LLM_TENSOR_OUTPUT, "output" },
1261
- { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1262
- { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1263
- { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1264
- { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1265
- { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1266
- { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1267
- { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1268
- { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1269
- { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1270
- { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1271
- { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1272
- },
1273
- },
1274
- {
1275
- LLM_ARCH_WAVTOKENIZER_DEC,
1276
- {
1277
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1278
- { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1279
- { LLM_TENSOR_CONV1D, "conv1d" },
1280
- { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1281
- { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1282
- { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1283
- { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1284
- { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1285
- { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1286
- { LLM_TENSOR_OUTPUT, "output" },
1287
- { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1288
- { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1289
- { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1290
- { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1291
- { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1292
- { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1293
- { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1294
- { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1295
- { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1296
- { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1297
- },
1298
- },
1299
- {
1300
- LLM_ARCH_UNKNOWN,
1301
- {
1302
- { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1303
- },
1304
- },
1305
- };
1306
-
1307
- static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1308
- {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1309
- {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1310
- {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1311
- {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1312
- {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1313
- {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1314
- {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1315
- {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1316
- {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1317
- {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1318
- {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1319
- {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1320
- {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1321
- {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1322
- {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1323
- {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1324
- {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1325
- {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1326
- {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1327
- {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1328
- {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1329
- {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1330
- {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1331
- {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1332
- {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1333
- {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1334
- {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1335
- {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1336
- {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1337
- {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1338
- {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1339
- {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1340
- {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1341
- {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1342
- {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1343
- {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1344
- {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1345
- {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1346
- {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1347
- {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1348
- {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1349
- {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1350
- {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1351
- {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1352
- {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1353
- {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1354
- {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1355
- {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1356
- {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1357
- {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1358
- {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1359
- {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1360
- {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1361
- {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1362
- {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1363
- {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1364
- {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1365
- {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1366
- {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1367
- {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1368
- {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1369
- {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1370
- {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1371
- {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1372
- {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1373
- {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1374
- {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1375
- {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1376
- {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1377
- {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1378
- {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1379
- {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1380
- {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1381
- {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1382
- {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1383
- {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1384
- {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1385
- {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1386
- {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1387
- {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1388
- {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1389
- {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1390
- {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1391
- {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1392
- {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1393
- {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1394
- {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1395
- {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1396
- {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1397
- {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1398
- {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1399
- {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1400
- {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1401
- {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1402
- {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1403
- {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1404
- {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1405
- {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1406
- {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1407
- {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1408
- {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1409
- {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1410
- {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1411
- {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1412
- {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1413
- {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1414
- {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1415
- {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1416
- {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1417
- {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1418
- {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1419
- {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1420
- {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1421
- {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1422
- {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1423
- {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1424
- {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1425
- {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1426
- {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1427
- {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1428
- {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1429
- {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1430
- // this tensor is loaded for T5, but never used
1431
- {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1432
- {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1433
- {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1434
- {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1435
- {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1436
- {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1437
- {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1438
- {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1439
- {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1440
- {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1441
- {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1442
- {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1443
- {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1444
- {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1445
- {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1446
- {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1447
- {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1448
- };
1449
-
1450
- LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
1451
-
1452
- std::string LLM_KV::operator()(llm_kv kv) const {
1453
- return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
1454
- : ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1455
- }
1456
-
1457
- std::string LLM_TN_IMPL::str() const {
1458
- if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1459
- return "__missing__";
1460
- }
1461
-
1462
- std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1463
-
1464
- if (suffix != nullptr) {
1465
- name += ".";
1466
- name += suffix;
1467
- }
1468
-
1469
- return name;
1470
- }
1471
-
1472
- const char * llm_arch_name(llm_arch arch) {
1473
- auto it = LLM_ARCH_NAMES.find(arch);
1474
- if (it == LLM_ARCH_NAMES.end()) {
1475
- return "unknown";
1476
- }
1477
- return it->second;
1478
- }
1479
-
1480
- llm_arch llm_arch_from_string(const std::string & name) {
1481
- for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1482
- if (kv.second == name) {
1483
- return kv.first;
1484
- }
1485
- }
1486
-
1487
- return LLM_ARCH_UNKNOWN;
1488
- }
1489
-
1490
- const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1491
- return LLM_TENSOR_INFOS.at(tensor);
1492
- }
1
+ #include "llama-arch.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include <map>
6
+
7
+ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
+ { LLM_ARCH_LLAMA, "llama" },
9
+ { LLM_ARCH_LLAMA4, "llama4" },
10
+ { LLM_ARCH_DECI, "deci" },
11
+ { LLM_ARCH_FALCON, "falcon" },
12
+ { LLM_ARCH_GROK, "grok" },
13
+ { LLM_ARCH_GPT2, "gpt2" },
14
+ { LLM_ARCH_GPTJ, "gptj" },
15
+ { LLM_ARCH_GPTNEOX, "gptneox" },
16
+ { LLM_ARCH_MPT, "mpt" },
17
+ { LLM_ARCH_BAICHUAN, "baichuan" },
18
+ { LLM_ARCH_STARCODER, "starcoder" },
19
+ { LLM_ARCH_REFACT, "refact" },
20
+ { LLM_ARCH_BERT, "bert" },
21
+ { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
22
+ { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
23
+ { LLM_ARCH_BLOOM, "bloom" },
24
+ { LLM_ARCH_STABLELM, "stablelm" },
25
+ { LLM_ARCH_QWEN, "qwen" },
26
+ { LLM_ARCH_QWEN2, "qwen2" },
27
+ { LLM_ARCH_QWEN2MOE, "qwen2moe" },
28
+ { LLM_ARCH_QWEN2VL, "qwen2vl" },
29
+ { LLM_ARCH_QWEN3, "qwen3" },
30
+ { LLM_ARCH_QWEN3MOE, "qwen3moe" },
31
+ { LLM_ARCH_PHI2, "phi2" },
32
+ { LLM_ARCH_PHI3, "phi3" },
33
+ { LLM_ARCH_PHIMOE, "phimoe" },
34
+ { LLM_ARCH_PLAMO, "plamo" },
35
+ { LLM_ARCH_CODESHELL, "codeshell" },
36
+ { LLM_ARCH_ORION, "orion" },
37
+ { LLM_ARCH_INTERNLM2, "internlm2" },
38
+ { LLM_ARCH_MINICPM, "minicpm" },
39
+ { LLM_ARCH_MINICPM3, "minicpm3" },
40
+ { LLM_ARCH_GEMMA, "gemma" },
41
+ { LLM_ARCH_GEMMA2, "gemma2" },
42
+ { LLM_ARCH_GEMMA3, "gemma3" },
43
+ { LLM_ARCH_STARCODER2, "starcoder2" },
44
+ { LLM_ARCH_MAMBA, "mamba" },
45
+ { LLM_ARCH_XVERSE, "xverse" },
46
+ { LLM_ARCH_COMMAND_R, "command-r" },
47
+ { LLM_ARCH_COHERE2, "cohere2" },
48
+ { LLM_ARCH_DBRX, "dbrx" },
49
+ { LLM_ARCH_OLMO, "olmo" },
50
+ { LLM_ARCH_OLMO2, "olmo2" },
51
+ { LLM_ARCH_OLMOE, "olmoe" },
52
+ { LLM_ARCH_OPENELM, "openelm" },
53
+ { LLM_ARCH_ARCTIC, "arctic" },
54
+ { LLM_ARCH_DEEPSEEK, "deepseek" },
55
+ { LLM_ARCH_DEEPSEEK2, "deepseek2" },
56
+ { LLM_ARCH_CHATGLM, "chatglm" },
57
+ { LLM_ARCH_BITNET, "bitnet" },
58
+ { LLM_ARCH_T5, "t5" },
59
+ { LLM_ARCH_T5ENCODER, "t5encoder" },
60
+ { LLM_ARCH_JAIS, "jais" },
61
+ { LLM_ARCH_NEMOTRON, "nemotron" },
62
+ { LLM_ARCH_EXAONE, "exaone" },
63
+ { LLM_ARCH_RWKV6, "rwkv6" },
64
+ { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
65
+ { LLM_ARCH_RWKV7, "rwkv7" },
66
+ { LLM_ARCH_ARWKV7, "arwkv7" },
67
+ { LLM_ARCH_GRANITE, "granite" },
68
+ { LLM_ARCH_GRANITE_MOE, "granitemoe" },
69
+ { LLM_ARCH_CHAMELEON, "chameleon" },
70
+ { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
71
+ { LLM_ARCH_PLM, "plm" },
72
+ { LLM_ARCH_BAILINGMOE, "bailingmoe" },
73
+ { LLM_ARCH_UNKNOWN, "(unknown)" },
74
+ };
75
+
76
+ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
77
+ { LLM_KV_GENERAL_TYPE, "general.type" },
78
+ { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
79
+ { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
80
+ { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
81
+ { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" },
82
+ { LLM_KV_GENERAL_NAME, "general.name" },
83
+ { LLM_KV_GENERAL_AUTHOR, "general.author" },
84
+ { LLM_KV_GENERAL_VERSION, "general.version" },
85
+ { LLM_KV_GENERAL_URL, "general.url" },
86
+ { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
87
+ { LLM_KV_GENERAL_LICENSE, "general.license" },
88
+ { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
89
+ { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
90
+
91
+ { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
92
+ { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
93
+ { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
94
+ { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
95
+ { LLM_KV_BLOCK_COUNT, "%s.block_count" },
96
+ { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
97
+ { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
98
+ { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
99
+ { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
100
+ { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
101
+ { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
102
+ { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
103
+ { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
104
+ { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
105
+ { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
106
+ { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
107
+ { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
108
+ { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
109
+ { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
110
+ { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
111
+ { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
112
+ { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
113
+ { LLM_KV_SWIN_NORM, "%s.swin_norm" },
114
+ { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
115
+ { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
116
+ { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
117
+ { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
118
+ { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
119
+ { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
120
+ { LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
121
+
122
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
123
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
124
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
125
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
126
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
127
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
128
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
129
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
130
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
131
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
132
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
133
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
134
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
135
+ { LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
136
+ { LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
137
+ { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
138
+ { LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
139
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
140
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
141
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
142
+
143
+ { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
144
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
145
+ { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
146
+ { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
147
+ { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
148
+ { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
149
+ { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
150
+ { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
151
+ { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
152
+ { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
153
+
154
+ { LLM_KV_SPLIT_NO, "split.no" },
155
+ { LLM_KV_SPLIT_COUNT, "split.count" },
156
+ { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
157
+
158
+ { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
159
+ { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
160
+ { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
161
+ { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
162
+ { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
163
+
164
+ { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
165
+
166
+ { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
167
+ { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
168
+
169
+ { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
170
+ { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
171
+
172
+ { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
173
+ { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
174
+ { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
175
+ { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
176
+ { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
177
+ { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
178
+ { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
179
+ { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
180
+ { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
181
+ { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
182
+ { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
183
+ { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
184
+ { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
185
+ { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
186
+ { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
187
+ { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
188
+ { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
189
+ { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
190
+ { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
191
+ { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
192
+ { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
193
+ { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
194
+ { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
195
+ { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
196
+ { LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
197
+ { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
198
+ { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
199
+ { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
200
+ { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
201
+ { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
202
+ { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
203
+
204
+ { LLM_KV_ADAPTER_TYPE, "adapter.type" },
205
+ { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
206
+
207
+ // deprecated
208
+ { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
209
+ { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
210
+ { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
211
+ };
212
+
213
+ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
214
+ {
215
+ LLM_ARCH_LLAMA,
216
+ {
217
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
218
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
219
+ { LLM_TENSOR_OUTPUT, "output" },
220
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
221
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
222
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
223
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
224
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
225
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
226
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
227
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
228
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
229
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
230
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
231
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
232
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
233
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
234
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
235
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
236
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
237
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
238
+ },
239
+ },
240
+ {
241
+ LLM_ARCH_LLAMA4,
242
+ {
243
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
244
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
245
+ { LLM_TENSOR_OUTPUT, "output" },
246
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
247
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
248
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
249
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
250
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
251
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
252
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
253
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
254
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
255
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
256
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
257
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
258
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
259
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
260
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
261
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
262
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
263
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
264
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
265
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
266
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
267
+ },
268
+ },
269
+ {
270
+ LLM_ARCH_DECI,
271
+ {
272
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
273
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
274
+ { LLM_TENSOR_OUTPUT, "output" },
275
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
276
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
277
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
278
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
279
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
280
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
281
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
282
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
283
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
284
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
285
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
286
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
287
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
288
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
289
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
290
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
291
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
292
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
293
+ },
294
+ },
295
+ {
296
+ LLM_ARCH_BAICHUAN,
297
+ {
298
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
299
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
300
+ { LLM_TENSOR_OUTPUT, "output" },
301
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
302
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
303
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
304
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
305
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
306
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
307
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
308
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
309
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
310
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
311
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
312
+ },
313
+ },
314
+ {
315
+ LLM_ARCH_FALCON,
316
+ {
317
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
318
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
319
+ { LLM_TENSOR_OUTPUT, "output" },
320
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
321
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
322
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
323
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
324
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
325
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
326
+ },
327
+ },
328
+ {
329
+ LLM_ARCH_GROK,
330
+ {
331
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
332
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
333
+ { LLM_TENSOR_OUTPUT, "output" },
334
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
335
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
336
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
337
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
338
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
339
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
340
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
341
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
342
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
343
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
344
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
345
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
346
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
347
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
348
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
349
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
350
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
351
+ },
352
+ },
353
+ {
354
+ LLM_ARCH_GPT2,
355
+ {
356
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
357
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
358
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
359
+ { LLM_TENSOR_OUTPUT, "output" },
360
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
361
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
362
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
363
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
364
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
365
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
366
+ },
367
+ },
368
+ {
369
+ LLM_ARCH_GPTJ,
370
+ {
371
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
372
+ },
373
+ },
374
+ {
375
+ LLM_ARCH_GPTNEOX,
376
+ {
377
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
378
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
379
+ { LLM_TENSOR_OUTPUT, "output" },
380
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
381
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
382
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
383
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
384
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
385
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
386
+ },
387
+ },
388
+ {
389
+ LLM_ARCH_MPT,
390
+ {
391
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
392
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
393
+ { LLM_TENSOR_OUTPUT, "output"},
394
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
395
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
396
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
397
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
398
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
399
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
400
+ { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
401
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
402
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
403
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
404
+ },
405
+ },
406
+ {
407
+ LLM_ARCH_STARCODER,
408
+ {
409
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
410
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
411
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
412
+ { LLM_TENSOR_OUTPUT, "output" },
413
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
414
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
415
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
416
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
417
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
418
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
419
+ },
420
+ },
421
+ {
422
+ LLM_ARCH_REFACT,
423
+ {
424
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
425
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
426
+ { LLM_TENSOR_OUTPUT, "output" },
427
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
428
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
429
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
430
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
431
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
432
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
433
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
434
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
435
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
436
+ },
437
+ },
438
+ {
439
+ LLM_ARCH_BERT,
440
+ {
441
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
442
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
443
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
444
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
445
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
446
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
447
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
448
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
449
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
450
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
451
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
452
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
453
+ { LLM_TENSOR_CLS, "cls" },
454
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
455
+ },
456
+ },
457
+ {
458
+ LLM_ARCH_NOMIC_BERT,
459
+ {
460
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
461
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
462
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
463
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
464
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
465
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
466
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
467
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
468
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
469
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
470
+ },
471
+ },
472
+ {
473
+ LLM_ARCH_JINA_BERT_V2,
474
+ {
475
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
476
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
477
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
478
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
479
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
480
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
481
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
482
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
483
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
484
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
485
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
486
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
487
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
488
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
489
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
490
+ { LLM_TENSOR_CLS, "cls" },
491
+ },
492
+ },
493
+ {
494
+ LLM_ARCH_BLOOM,
495
+ {
496
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
497
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
498
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
499
+ { LLM_TENSOR_OUTPUT, "output" },
500
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
501
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
502
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
503
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
504
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
505
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
506
+ },
507
+ },
508
+ {
509
+ LLM_ARCH_STABLELM,
510
+ {
511
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
512
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
513
+ { LLM_TENSOR_OUTPUT, "output" },
514
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
515
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
516
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
517
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
518
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
519
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
520
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
521
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
522
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
523
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
524
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
525
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
526
+ },
527
+ },
528
+ {
529
+ LLM_ARCH_QWEN,
530
+ {
531
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
532
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
533
+ { LLM_TENSOR_OUTPUT, "output" },
534
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
535
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
536
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
537
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
538
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
539
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
540
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
541
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
542
+ },
543
+ },
544
+ {
545
+ LLM_ARCH_QWEN2,
546
+ {
547
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
548
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
549
+ { LLM_TENSOR_OUTPUT, "output" },
550
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
551
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
552
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
553
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
554
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
555
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
556
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
557
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
558
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
559
+ },
560
+ },
561
+ {
562
+ LLM_ARCH_QWEN2VL,
563
+ {
564
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
565
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
566
+ { LLM_TENSOR_OUTPUT, "output" },
567
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
568
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
569
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
570
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
571
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
572
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
573
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
574
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
575
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
576
+ },
577
+ },
578
+ {
579
+ LLM_ARCH_QWEN2MOE,
580
+ {
581
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
582
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
583
+ { LLM_TENSOR_OUTPUT, "output" },
584
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
585
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
586
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
587
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
588
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
589
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
590
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
591
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
592
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
593
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
594
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
595
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
596
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
597
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
598
+ },
599
+ },
600
+ {
601
+ LLM_ARCH_QWEN3,
602
+ {
603
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
604
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
605
+ { LLM_TENSOR_OUTPUT, "output" },
606
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
607
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
608
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
609
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
610
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
611
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
612
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
613
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
614
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
615
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
616
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
617
+ },
618
+ },
619
+ {
620
+ LLM_ARCH_QWEN3MOE,
621
+ {
622
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
623
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
624
+ { LLM_TENSOR_OUTPUT, "output" },
625
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
626
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
627
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
628
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
629
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
630
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
631
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
632
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
633
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
634
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
635
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
636
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
637
+ },
638
+ },
639
+ {
640
+ LLM_ARCH_PHI2,
641
+ {
642
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
643
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
644
+ { LLM_TENSOR_OUTPUT, "output" },
645
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
646
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
647
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
648
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
649
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
650
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
651
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
652
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
653
+ },
654
+ },
655
+ {
656
+ LLM_ARCH_PHI3,
657
+ {
658
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
659
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
660
+ { LLM_TENSOR_OUTPUT, "output" },
661
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
662
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
663
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
664
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
665
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
666
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
667
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
668
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
669
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
670
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
671
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
672
+ },
673
+ },
674
+ {
675
+ LLM_ARCH_PHIMOE,
676
+ {
677
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
678
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
679
+ { LLM_TENSOR_OUTPUT, "output" },
680
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
681
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
682
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
683
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
684
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
685
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
686
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
687
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
688
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
689
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
690
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
691
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
692
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
693
+ },
694
+ },
695
+ {
696
+ LLM_ARCH_PLAMO,
697
+ {
698
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
699
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
700
+ { LLM_TENSOR_OUTPUT, "output" },
701
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
702
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
703
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
704
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
705
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
706
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
707
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
708
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
709
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
710
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
711
+ },
712
+ },
713
+ {
714
+ LLM_ARCH_CODESHELL,
715
+ {
716
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
717
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
718
+ { LLM_TENSOR_OUTPUT, "output" },
719
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
720
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
721
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
722
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
723
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
724
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
725
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
726
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
727
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
728
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
729
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
730
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
731
+ },
732
+ },
733
+ {
734
+ LLM_ARCH_ORION,
735
+ {
736
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
737
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
738
+ { LLM_TENSOR_OUTPUT, "output" },
739
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
740
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
741
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
742
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
743
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
744
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
745
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
746
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
747
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
748
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
749
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
750
+ },
751
+ },
752
+ {
753
+ LLM_ARCH_INTERNLM2,
754
+ {
755
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
756
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
757
+ { LLM_TENSOR_OUTPUT, "output" },
758
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
759
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
760
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
761
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
762
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
763
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
764
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
765
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
766
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
767
+ },
768
+ },
769
+ {
770
+ LLM_ARCH_MINICPM,
771
+ {
772
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
773
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
774
+ { LLM_TENSOR_OUTPUT, "output" },
775
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
776
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
777
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
778
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
779
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
780
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
781
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
782
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
783
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
784
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
785
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
786
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
787
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
788
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
789
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
790
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
791
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
792
+ },
793
+ },
794
+ {
795
+ LLM_ARCH_MINICPM3,
796
+ {
797
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
798
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
799
+ { LLM_TENSOR_OUTPUT, "output" },
800
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
801
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
802
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
803
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
804
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
805
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
806
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
807
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
808
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
809
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
810
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
811
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
812
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
813
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
814
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
815
+ },
816
+ },
817
+ {
818
+ LLM_ARCH_GEMMA,
819
+ {
820
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
821
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
822
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
823
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
824
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
825
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
826
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
827
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
828
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
829
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
830
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
831
+ },
832
+ },
833
+ {
834
+ LLM_ARCH_GEMMA2,
835
+ {
836
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
837
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
838
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
839
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
840
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
841
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
842
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
843
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
844
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
845
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
846
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
847
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
848
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
849
+ },
850
+ },
851
+ {
852
+ LLM_ARCH_GEMMA3,
853
+ {
854
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
855
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
856
+ { LLM_TENSOR_OUTPUT, "output" },
857
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
858
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
859
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
860
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
861
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
862
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
863
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
864
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
865
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
866
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
867
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
868
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
869
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
870
+ },
871
+ },
872
+ {
873
+ LLM_ARCH_STARCODER2,
874
+ {
875
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
876
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
877
+ { LLM_TENSOR_OUTPUT, "output" },
878
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
879
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
880
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
881
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
882
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
883
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
884
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
885
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
886
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
887
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
888
+ },
889
+ },
890
+ {
891
+ LLM_ARCH_MAMBA,
892
+ {
893
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
894
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
895
+ { LLM_TENSOR_OUTPUT, "output" },
896
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
897
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
898
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
899
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
900
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
901
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
902
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
903
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
904
+ },
905
+ },
906
+ {
907
+ LLM_ARCH_XVERSE,
908
+ {
909
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
910
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
911
+ { LLM_TENSOR_OUTPUT, "output" },
912
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
913
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
914
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
915
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
916
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
917
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
918
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
919
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
920
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
921
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
922
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
923
+ },
924
+ },
925
+ {
926
+ LLM_ARCH_COMMAND_R,
927
+ {
928
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
929
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
930
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
931
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
932
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
933
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
934
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
935
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
936
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
937
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
938
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
939
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
940
+ },
941
+ },
942
+ {
943
+ LLM_ARCH_COHERE2,
944
+ {
945
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
946
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
947
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
948
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
949
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
950
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
951
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
952
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
953
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
954
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
955
+ },
956
+ },
957
+ {
958
+ LLM_ARCH_DBRX,
959
+ {
960
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
961
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
962
+ { LLM_TENSOR_OUTPUT, "output" },
963
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
964
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
965
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
966
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
967
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
968
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
969
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
970
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
971
+ },
972
+ },
973
+ {
974
+ LLM_ARCH_OLMO,
975
+ {
976
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
977
+ { LLM_TENSOR_OUTPUT, "output" },
978
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
979
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
980
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
981
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
982
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
983
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
984
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
985
+ },
986
+ },
987
+ {
988
+ LLM_ARCH_OLMO2,
989
+ {
990
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
991
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
992
+ { LLM_TENSOR_OUTPUT, "output" },
993
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
994
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
995
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
996
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
997
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
998
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
999
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1000
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1001
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1002
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1003
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1004
+ },
1005
+ },
1006
+ {
1007
+ LLM_ARCH_OLMOE,
1008
+ {
1009
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1010
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1011
+ { LLM_TENSOR_OUTPUT, "output" },
1012
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1013
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1014
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1015
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1016
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1017
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1018
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1019
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1020
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1021
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1022
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1023
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1024
+ },
1025
+ },
1026
+ {
1027
+ LLM_ARCH_OPENELM,
1028
+ {
1029
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1030
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1031
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1032
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1033
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1034
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1035
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1036
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1037
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1038
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1039
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1040
+ },
1041
+ },
1042
+ {
1043
+ LLM_ARCH_ARCTIC,
1044
+ {
1045
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1046
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1047
+ { LLM_TENSOR_OUTPUT, "output" },
1048
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1049
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1050
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1051
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1052
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1053
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1054
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1055
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1056
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1057
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1058
+ { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
1059
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1060
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1061
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1062
+ },
1063
+ },
1064
+ {
1065
+ LLM_ARCH_DEEPSEEK,
1066
+ {
1067
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1068
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1069
+ { LLM_TENSOR_OUTPUT, "output" },
1070
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1071
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1072
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1073
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1074
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1075
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1076
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1077
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1078
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1079
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1080
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1081
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1082
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1083
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1084
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1085
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1086
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1087
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1088
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1089
+ },
1090
+ },
1091
+ {
1092
+ LLM_ARCH_DEEPSEEK2,
1093
+ {
1094
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1095
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1096
+ { LLM_TENSOR_OUTPUT, "output" },
1097
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1098
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1099
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1100
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1101
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1102
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1103
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1104
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1105
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1106
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1107
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1108
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1109
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1110
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1111
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1112
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1113
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1114
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1115
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1116
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1117
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1118
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1119
+ },
1120
+ },
1121
+ {
1122
+ LLM_ARCH_PLM,
1123
+ {
1124
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1125
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1126
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1127
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1128
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1129
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1130
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1131
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1132
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1133
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1134
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1135
+ },
1136
+ },
1137
+ {
1138
+ LLM_ARCH_CHATGLM,
1139
+ {
1140
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1141
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1142
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1143
+ { LLM_TENSOR_OUTPUT, "output" },
1144
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1145
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1146
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1147
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1148
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1149
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1150
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1151
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1152
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1153
+ },
1154
+ },
1155
+ {
1156
+ LLM_ARCH_BITNET,
1157
+ {
1158
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1159
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1160
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1161
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1162
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1163
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1164
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1165
+ { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1166
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1167
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1168
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1169
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1170
+ { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1171
+ },
1172
+ },
1173
+ {
1174
+ LLM_ARCH_T5,
1175
+ {
1176
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1177
+ { LLM_TENSOR_OUTPUT, "output" },
1178
+ { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1179
+ { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1180
+ { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1181
+ { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1182
+ { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1183
+ { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1184
+ { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1185
+ { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1186
+ { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1187
+ { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1188
+ { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1189
+ { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1190
+ { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1191
+ { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1192
+ { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1193
+ { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1194
+ { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1195
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1196
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1197
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1198
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1199
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1200
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1201
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1202
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1203
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1204
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1205
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1206
+ },
1207
+ },
1208
+ {
1209
+ LLM_ARCH_T5ENCODER,
1210
+ {
1211
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1212
+ { LLM_TENSOR_OUTPUT, "output" },
1213
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1214
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1215
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1216
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1217
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1218
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1219
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1220
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1221
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1222
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1223
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1224
+ },
1225
+ },
1226
+ {
1227
+ LLM_ARCH_JAIS,
1228
+ {
1229
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1230
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1231
+ { LLM_TENSOR_OUTPUT, "output" },
1232
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1233
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1234
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1235
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1236
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1237
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1238
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1239
+ },
1240
+ },
1241
+ {
1242
+ LLM_ARCH_NEMOTRON,
1243
+ {
1244
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1245
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1246
+ { LLM_TENSOR_OUTPUT, "output" },
1247
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1248
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1249
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1250
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1251
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1252
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1253
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1254
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1255
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1256
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1257
+ },
1258
+ },
1259
+ {
1260
+ LLM_ARCH_EXAONE,
1261
+ {
1262
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1263
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1264
+ { LLM_TENSOR_OUTPUT, "output" },
1265
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1266
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1267
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1268
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1269
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1270
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1271
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1272
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1273
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1274
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1275
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1276
+ },
1277
+ },
1278
+ {
1279
+ LLM_ARCH_RWKV6,
1280
+ {
1281
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1282
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1283
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1284
+ { LLM_TENSOR_OUTPUT, "output" },
1285
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1286
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1287
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1288
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1289
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1290
+ { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1291
+ { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1292
+ { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1293
+ { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1294
+ { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1295
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1296
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1297
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1298
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1299
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1300
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1301
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1302
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1303
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1304
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1305
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1306
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1307
+ { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1308
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1309
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1310
+ { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1311
+ },
1312
+ },
1313
+ {
1314
+ LLM_ARCH_RWKV6QWEN2,
1315
+ {
1316
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1317
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1318
+ { LLM_TENSOR_OUTPUT, "output" },
1319
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1320
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1321
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1322
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1323
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1324
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1325
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1326
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1327
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1328
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1329
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1330
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1331
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1332
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1333
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1334
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1335
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1336
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1337
+ },
1338
+ },
1339
+ {
1340
+ LLM_ARCH_RWKV7,
1341
+ {
1342
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1343
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1344
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1345
+ { LLM_TENSOR_OUTPUT, "output" },
1346
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1347
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1348
+ { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1349
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1350
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1351
+ { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1352
+ { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1353
+ { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1354
+ { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1355
+ { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1356
+ { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1357
+ { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1358
+ { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1359
+ { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1360
+ { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1361
+ { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1362
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1363
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1364
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1365
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1366
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1367
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1368
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1369
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1370
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1371
+ },
1372
+ },
1373
+ {
1374
+ LLM_ARCH_ARWKV7,
1375
+ {
1376
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1377
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1378
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1379
+ { LLM_TENSOR_OUTPUT, "output" },
1380
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1381
+ { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1382
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1383
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1384
+ { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1385
+ { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1386
+ { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1387
+ { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1388
+ { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1389
+ { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1390
+ { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1391
+ { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1392
+ { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1393
+ { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1394
+ { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1395
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1396
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1397
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1398
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1399
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1400
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1401
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1402
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1403
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1404
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1405
+ },
1406
+ },
1407
+ {
1408
+ LLM_ARCH_GRANITE,
1409
+ {
1410
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1411
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1412
+ { LLM_TENSOR_OUTPUT, "output" },
1413
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1414
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1415
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1416
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1417
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1418
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1419
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1420
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1421
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1422
+ },
1423
+ },
1424
+ {
1425
+ LLM_ARCH_GRANITE_MOE,
1426
+ {
1427
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1428
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1429
+ { LLM_TENSOR_OUTPUT, "output" },
1430
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1431
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1432
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1433
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1434
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1435
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1436
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1437
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1438
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1439
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1440
+ },
1441
+ },
1442
+ {
1443
+ LLM_ARCH_CHAMELEON,
1444
+ {
1445
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1446
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1447
+ { LLM_TENSOR_OUTPUT, "output" },
1448
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1449
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1450
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1451
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1452
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1453
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1454
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1455
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1456
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1457
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1458
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1459
+ },
1460
+ },
1461
+ {
1462
+ LLM_ARCH_WAVTOKENIZER_DEC,
1463
+ {
1464
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1465
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1466
+ { LLM_TENSOR_CONV1D, "conv1d" },
1467
+ { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1468
+ { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1469
+ { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1470
+ { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1471
+ { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1472
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1473
+ { LLM_TENSOR_OUTPUT, "output" },
1474
+ { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1475
+ { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1476
+ { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1477
+ { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1478
+ { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1479
+ { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1480
+ { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1481
+ { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1482
+ { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1483
+ { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1484
+ },
1485
+ },
1486
+ {
1487
+ LLM_ARCH_BAILINGMOE,
1488
+ {
1489
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1490
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1491
+ { LLM_TENSOR_OUTPUT, "output" },
1492
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1493
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1494
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1495
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1496
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1497
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1498
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1499
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1500
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1501
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1502
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1503
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1504
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1505
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1506
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1507
+ },
1508
+ },
1509
+ {
1510
+ LLM_ARCH_UNKNOWN,
1511
+ {
1512
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1513
+ },
1514
+ },
1515
+ };
1516
+
1517
+ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1518
+ {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1519
+ {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1520
+ {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1521
+ {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_GET_ROWS}},
1522
+ {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1523
+ {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1524
+ {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL_MAT}},
1525
+ {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1526
+ {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1527
+ {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, LM_GGML_OP_MUL}},
1528
+ {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1529
+ {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1530
+ {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ROPE}},
1531
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1532
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1533
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1534
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1535
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1536
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1537
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1538
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1539
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1540
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1541
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1542
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1543
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1544
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1545
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1546
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1547
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1548
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1549
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1550
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1551
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1552
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1553
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1554
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1555
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1556
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1557
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1558
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1559
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1560
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1561
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1562
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1563
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1564
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1565
+ {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1566
+ {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1567
+ {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1568
+ {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1569
+ {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1570
+ {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1571
+ {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1572
+ {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1573
+ {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1574
+ {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1575
+ {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1576
+ {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1577
+ {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1578
+ {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1579
+ {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1580
+ {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1581
+ {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1582
+ {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1583
+ {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1584
+ {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1585
+ {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1586
+ {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1587
+ {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1588
+ {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1589
+ {LLM_TENSOR_TIME_MIX_A1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1590
+ {LLM_TENSOR_TIME_MIX_A2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1591
+ {LLM_TENSOR_TIME_MIX_V1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1592
+ {LLM_TENSOR_TIME_MIX_V2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1593
+ {LLM_TENSOR_TIME_MIX_G1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1594
+ {LLM_TENSOR_TIME_MIX_G2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1595
+ {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1596
+ {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1597
+ {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1598
+ {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1599
+ {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1600
+ {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1601
+ {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1602
+ {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1603
+ {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1604
+ {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1605
+ {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_DIV}},
1606
+ {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_CONV}},
1607
+ {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_SSM_SCAN}},
1608
+ {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1609
+ {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1610
+ {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1611
+ {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1612
+ {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1613
+ {LLM_TENSOR_TIME_MIX_K_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1614
+ {LLM_TENSOR_TIME_MIX_K_A, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1615
+ {LLM_TENSOR_TIME_MIX_R_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1616
+ {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1617
+ {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1618
+ {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1619
+ {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1620
+ {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1621
+ {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1622
+ {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1623
+ {LLM_TENSOR_TIME_MIX_W0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1624
+ {LLM_TENSOR_TIME_MIX_A0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1625
+ {LLM_TENSOR_TIME_MIX_V0, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1626
+ {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_RWKV_WKV6}},
1627
+ {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1628
+ {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1629
+ {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1630
+ {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1631
+ {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1632
+ {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1633
+ {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1634
+ {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1635
+ {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1636
+ {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1637
+ {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1638
+ {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1639
+ {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1640
+ {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1641
+ {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1642
+ {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1643
+ {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1644
+ {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1645
+ {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1646
+ {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1647
+ {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_GET_ROWS}},
1648
+ {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1649
+ {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1650
+ {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT_ID}},
1651
+ {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_ADD}},
1652
+ // this tensor is loaded for T5, but never used
1653
+ {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_NONE}},
1654
+ {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, LM_GGML_OP_IM2COL}},
1655
+ {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1656
+ {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1657
+ {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1658
+ {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1659
+ {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1660
+ {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1661
+ {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1662
+ {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1663
+ {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1664
+ {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1665
+ {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_IM2COL}},
1666
+ {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1667
+ {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1668
+ {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL_MAT}},
1669
+ {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, LM_GGML_OP_MUL}},
1670
+ };
1671
+
1672
+ LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
1673
+
1674
+ std::string LLM_KV::operator()(llm_kv kv) const {
1675
+ return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
1676
+ : ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1677
+ }
1678
+
1679
+ std::string LLM_TN_IMPL::str() const {
1680
+ if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1681
+ return "__missing__";
1682
+ }
1683
+
1684
+ std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1685
+
1686
+ if (suffix != nullptr) {
1687
+ name += ".";
1688
+ name += suffix;
1689
+ }
1690
+
1691
+ return name;
1692
+ }
1693
+
1694
+ const char * llm_arch_name(llm_arch arch) {
1695
+ auto it = LLM_ARCH_NAMES.find(arch);
1696
+ if (it == LLM_ARCH_NAMES.end()) {
1697
+ return "unknown";
1698
+ }
1699
+ return it->second;
1700
+ }
1701
+
1702
+ llm_arch llm_arch_from_string(const std::string & name) {
1703
+ for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1704
+ if (kv.second == name) {
1705
+ return kv.first;
1706
+ }
1707
+ }
1708
+
1709
+ return LLM_ARCH_UNKNOWN;
1710
+ }
1711
+
1712
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1713
+ return LLM_TENSOR_INFOS.at(tensor);
1714
+ }