@fugood/llama.node 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +17 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +3 -1
  19. package/lib/index.js +16 -1
  20. package/lib/index.ts +16 -0
  21. package/package.json +1 -1
  22. package/src/EmbeddingWorker.cpp +4 -3
  23. package/src/LlamaCompletionWorker.cpp +4 -2
  24. package/src/LlamaContext.cpp +61 -6
  25. package/src/LlamaContext.h +1 -0
  26. package/src/common.hpp +6 -11
  27. package/src/llama.cpp/.github/workflows/build.yml +19 -17
  28. package/src/llama.cpp/.github/workflows/docker.yml +77 -30
  29. package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +22 -3
  31. package/src/llama.cpp/CMakeLists.txt +49 -24
  32. package/src/llama.cpp/common/arg.cpp +82 -26
  33. package/src/llama.cpp/common/arg.h +3 -0
  34. package/src/llama.cpp/common/common.cpp +192 -72
  35. package/src/llama.cpp/common/common.h +51 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +12 -12
  37. package/src/llama.cpp/common/ngram-cache.h +2 -2
  38. package/src/llama.cpp/common/sampling.cpp +11 -6
  39. package/src/llama.cpp/common/speculative.cpp +18 -15
  40. package/src/llama.cpp/docs/build.md +2 -0
  41. package/src/llama.cpp/examples/batched/batched.cpp +9 -7
  42. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
  43. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
  44. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
  45. package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
  46. package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
  47. package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
  48. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
  50. package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
  51. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
  52. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
  53. package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
  54. package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
  55. package/src/llama.cpp/examples/infill/infill.cpp +23 -24
  56. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
  57. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
  58. package/src/llama.cpp/examples/llava/clip.cpp +4 -2
  59. package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
  60. package/src/llama.cpp/examples/llava/llava.cpp +2 -2
  61. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
  62. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
  63. package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
  64. package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
  65. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
  66. package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
  67. package/src/llama.cpp/examples/main/main.cpp +51 -29
  68. package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
  69. package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
  70. package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
  71. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
  72. package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
  73. package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
  74. package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
  76. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
  77. package/src/llama.cpp/examples/run/run.cpp +175 -61
  78. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
  79. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
  80. package/src/llama.cpp/examples/server/httplib.h +1295 -409
  81. package/src/llama.cpp/examples/server/server.cpp +387 -181
  82. package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
  83. package/src/llama.cpp/examples/server/utils.hpp +170 -58
  84. package/src/llama.cpp/examples/simple/simple.cpp +9 -8
  85. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
  86. package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
  87. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
  88. package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
  89. package/src/llama.cpp/examples/tts/tts.cpp +64 -23
  90. package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
  91. package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
  92. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
  93. package/src/llama.cpp/ggml/include/ggml.h +36 -145
  94. package/src/llama.cpp/ggml/include/gguf.h +202 -0
  95. package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
  96. package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
  97. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
  98. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
  99. package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
  100. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
  101. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
  102. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
  103. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
  105. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
  106. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
  107. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
  109. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
  110. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
  111. package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
  112. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
  113. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
  115. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  116. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
  117. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  119. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
  120. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
  121. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
  122. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
  123. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
  124. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
  125. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
  126. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  128. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
  129. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
  130. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
  131. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
  132. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
  133. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
  134. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
  135. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
  136. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
  137. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
  138. package/src/llama.cpp/ggml/src/ggml.c +117 -1327
  139. package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
  140. package/src/llama.cpp/include/llama-cpp.h +6 -1
  141. package/src/llama.cpp/include/llama.h +138 -75
  142. package/src/llama.cpp/src/CMakeLists.txt +13 -1
  143. package/src/llama.cpp/src/llama-adapter.cpp +347 -0
  144. package/src/llama.cpp/src/llama-adapter.h +74 -0
  145. package/src/llama.cpp/src/llama-arch.cpp +1487 -0
  146. package/src/llama.cpp/src/llama-arch.h +400 -0
  147. package/src/llama.cpp/src/llama-batch.cpp +368 -0
  148. package/src/llama.cpp/src/llama-batch.h +88 -0
  149. package/src/llama.cpp/src/llama-chat.cpp +578 -0
  150. package/src/llama.cpp/src/llama-chat.h +52 -0
  151. package/src/llama.cpp/src/llama-context.cpp +1775 -0
  152. package/src/llama.cpp/src/llama-context.h +128 -0
  153. package/src/llama.cpp/src/llama-cparams.cpp +1 -0
  154. package/src/llama.cpp/src/llama-cparams.h +37 -0
  155. package/src/llama.cpp/src/llama-grammar.cpp +5 -4
  156. package/src/llama.cpp/src/llama-grammar.h +3 -1
  157. package/src/llama.cpp/src/llama-hparams.cpp +71 -0
  158. package/src/llama.cpp/src/llama-hparams.h +139 -0
  159. package/src/llama.cpp/src/llama-impl.cpp +167 -0
  160. package/src/llama.cpp/src/llama-impl.h +16 -136
  161. package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
  162. package/src/llama.cpp/src/llama-kv-cache.h +218 -0
  163. package/src/llama.cpp/src/llama-mmap.cpp +589 -0
  164. package/src/llama.cpp/src/llama-mmap.h +67 -0
  165. package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
  166. package/src/llama.cpp/src/llama-model-loader.h +167 -0
  167. package/src/llama.cpp/src/llama-model.cpp +3953 -0
  168. package/src/llama.cpp/src/llama-model.h +370 -0
  169. package/src/llama.cpp/src/llama-quant.cpp +934 -0
  170. package/src/llama.cpp/src/llama-quant.h +1 -0
  171. package/src/llama.cpp/src/llama-sampling.cpp +147 -32
  172. package/src/llama.cpp/src/llama-sampling.h +3 -19
  173. package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
  174. package/src/llama.cpp/src/llama-vocab.h +97 -142
  175. package/src/llama.cpp/src/llama.cpp +7160 -20314
  176. package/src/llama.cpp/src/unicode.cpp +8 -3
  177. package/src/llama.cpp/tests/CMakeLists.txt +2 -0
  178. package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
  179. package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
  180. package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
  181. package/src/llama.cpp/tests/test-gguf.cpp +222 -187
  182. package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
  183. package/src/llama.cpp/tests/test-sampling.cpp +0 -1
  184. package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
  185. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
  186. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
@@ -0,0 +1,1487 @@
1
+ #include "llama-arch.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include <map>
6
+
7
+ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
+ { LLM_ARCH_LLAMA, "llama" },
9
+ { LLM_ARCH_DECI, "deci" },
10
+ { LLM_ARCH_FALCON, "falcon" },
11
+ { LLM_ARCH_GROK, "grok" },
12
+ { LLM_ARCH_GPT2, "gpt2" },
13
+ { LLM_ARCH_GPTJ, "gptj" },
14
+ { LLM_ARCH_GPTNEOX, "gptneox" },
15
+ { LLM_ARCH_MPT, "mpt" },
16
+ { LLM_ARCH_BAICHUAN, "baichuan" },
17
+ { LLM_ARCH_STARCODER, "starcoder" },
18
+ { LLM_ARCH_REFACT, "refact" },
19
+ { LLM_ARCH_BERT, "bert" },
20
+ { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
21
+ { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
22
+ { LLM_ARCH_BLOOM, "bloom" },
23
+ { LLM_ARCH_STABLELM, "stablelm" },
24
+ { LLM_ARCH_QWEN, "qwen" },
25
+ { LLM_ARCH_QWEN2, "qwen2" },
26
+ { LLM_ARCH_QWEN2MOE, "qwen2moe" },
27
+ { LLM_ARCH_QWEN2VL, "qwen2vl" },
28
+ { LLM_ARCH_PHI2, "phi2" },
29
+ { LLM_ARCH_PHI3, "phi3" },
30
+ { LLM_ARCH_PHIMOE, "phimoe" },
31
+ { LLM_ARCH_PLAMO, "plamo" },
32
+ { LLM_ARCH_CODESHELL, "codeshell" },
33
+ { LLM_ARCH_ORION, "orion" },
34
+ { LLM_ARCH_INTERNLM2, "internlm2" },
35
+ { LLM_ARCH_MINICPM, "minicpm" },
36
+ { LLM_ARCH_MINICPM3, "minicpm3" },
37
+ { LLM_ARCH_GEMMA, "gemma" },
38
+ { LLM_ARCH_GEMMA2, "gemma2" },
39
+ { LLM_ARCH_STARCODER2, "starcoder2" },
40
+ { LLM_ARCH_MAMBA, "mamba" },
41
+ { LLM_ARCH_XVERSE, "xverse" },
42
+ { LLM_ARCH_COMMAND_R, "command-r" },
43
+ { LLM_ARCH_COHERE2, "cohere2" },
44
+ { LLM_ARCH_DBRX, "dbrx" },
45
+ { LLM_ARCH_OLMO, "olmo" },
46
+ { LLM_ARCH_OLMO2, "olmo2" },
47
+ { LLM_ARCH_OLMOE, "olmoe" },
48
+ { LLM_ARCH_OPENELM, "openelm" },
49
+ { LLM_ARCH_ARCTIC, "arctic" },
50
+ { LLM_ARCH_DEEPSEEK, "deepseek" },
51
+ { LLM_ARCH_DEEPSEEK2, "deepseek2" },
52
+ { LLM_ARCH_CHATGLM, "chatglm" },
53
+ { LLM_ARCH_BITNET, "bitnet" },
54
+ { LLM_ARCH_T5, "t5" },
55
+ { LLM_ARCH_T5ENCODER, "t5encoder" },
56
+ { LLM_ARCH_JAIS, "jais" },
57
+ { LLM_ARCH_NEMOTRON, "nemotron" },
58
+ { LLM_ARCH_EXAONE, "exaone" },
59
+ { LLM_ARCH_RWKV6, "rwkv6" },
60
+ { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
61
+ { LLM_ARCH_GRANITE, "granite" },
62
+ { LLM_ARCH_GRANITE_MOE, "granitemoe" },
63
+ { LLM_ARCH_CHAMELEON, "chameleon" },
64
+ { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
65
+ { LLM_ARCH_UNKNOWN, "(unknown)" },
66
+ };
67
+
68
+ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
69
+ { LLM_KV_GENERAL_TYPE, "general.type" },
70
+ { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
71
+ { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
72
+ { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
73
+ { LLM_KV_GENERAL_NAME, "general.name" },
74
+ { LLM_KV_GENERAL_AUTHOR, "general.author" },
75
+ { LLM_KV_GENERAL_VERSION, "general.version" },
76
+ { LLM_KV_GENERAL_URL, "general.url" },
77
+ { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
78
+ { LLM_KV_GENERAL_LICENSE, "general.license" },
79
+ { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
80
+ { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
81
+
82
+ { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
83
+ { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
84
+ { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
85
+ { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
86
+ { LLM_KV_BLOCK_COUNT, "%s.block_count" },
87
+ { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
88
+ { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
89
+ { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
90
+ { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
91
+ { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
92
+ { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
93
+ { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
94
+ { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
95
+ { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
96
+ { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
97
+ { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
98
+ { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
99
+ { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
100
+ { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
101
+ { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
102
+ { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
103
+ { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
104
+ { LLM_KV_SWIN_NORM, "%s.swin_norm" },
105
+ { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
106
+ { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
107
+ { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
108
+ { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
109
+ { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
110
+ { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
111
+
112
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
113
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
114
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
115
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
116
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
117
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
118
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
119
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
120
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
121
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
122
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
123
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
124
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
125
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
126
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
127
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
128
+
129
+ { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
130
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
131
+ { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
132
+ { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
133
+ { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
134
+ { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
135
+ { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
136
+ { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
137
+ { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
138
+ { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
139
+
140
+ { LLM_KV_SPLIT_NO, "split.no" },
141
+ { LLM_KV_SPLIT_COUNT, "split.count" },
142
+ { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
143
+
144
+ { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
145
+ { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
146
+ { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
147
+ { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
148
+ { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
149
+
150
+ { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
151
+
152
+ { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
153
+ { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
154
+
155
+ { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
156
+ { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
157
+
158
+ { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
159
+ { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
160
+ { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
161
+ { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
162
+ { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
163
+ { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
164
+ { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
165
+ { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
166
+ { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
167
+ { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
168
+ { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
169
+ { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
170
+ { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
171
+ { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
172
+ { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
173
+ { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
174
+ { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
175
+ { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
176
+ { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
177
+ { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
178
+ { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
179
+ { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
180
+ { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
181
+ { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
182
+ { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
183
+ { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
184
+ { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
185
+ { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
186
+ { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
187
+ { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
188
+
189
+ { LLM_KV_ADAPTER_TYPE, "adapter.type" },
190
+ { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
191
+
192
+ // deprecated
193
+ { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
194
+ { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
195
+ { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
196
+ };
197
+
198
+ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
199
+ {
200
+ LLM_ARCH_LLAMA,
201
+ {
202
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
203
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
204
+ { LLM_TENSOR_OUTPUT, "output" },
205
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
206
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
207
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
208
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
209
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
210
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
211
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
212
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
213
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
214
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
215
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
216
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
217
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
218
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
219
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
220
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
221
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
222
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
223
+ },
224
+ },
225
+ {
226
+ LLM_ARCH_DECI,
227
+ {
228
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
229
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
230
+ { LLM_TENSOR_OUTPUT, "output" },
231
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
232
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
233
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
234
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
235
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
236
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
237
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
238
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
239
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
240
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
241
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
242
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
243
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
244
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
245
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
246
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
247
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
248
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
249
+ },
250
+ },
251
+ {
252
+ LLM_ARCH_BAICHUAN,
253
+ {
254
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
255
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
256
+ { LLM_TENSOR_OUTPUT, "output" },
257
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
258
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
259
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
260
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
261
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
262
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
263
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
264
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
265
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
266
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
267
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
268
+ },
269
+ },
270
+ {
271
+ LLM_ARCH_FALCON,
272
+ {
273
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
274
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
275
+ { LLM_TENSOR_OUTPUT, "output" },
276
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
277
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
278
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
279
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
280
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
281
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
282
+ },
283
+ },
284
+ {
285
+ LLM_ARCH_GROK,
286
+ {
287
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
288
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
289
+ { LLM_TENSOR_OUTPUT, "output" },
290
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
291
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
292
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
293
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
294
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
295
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
296
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
297
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
298
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
299
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
300
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
301
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
302
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
303
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
304
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
305
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
306
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
307
+ },
308
+ },
309
+ {
310
+ LLM_ARCH_GPT2,
311
+ {
312
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
313
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
314
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
315
+ { LLM_TENSOR_OUTPUT, "output" },
316
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
317
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
318
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
319
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
320
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
321
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
322
+ },
323
+ },
324
+ {
325
+ LLM_ARCH_GPTJ,
326
+ {
327
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
328
+ },
329
+ },
330
+ {
331
+ LLM_ARCH_GPTNEOX,
332
+ {
333
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
334
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
335
+ { LLM_TENSOR_OUTPUT, "output" },
336
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
337
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
338
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
339
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
340
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
341
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
342
+ },
343
+ },
344
+ {
345
+ LLM_ARCH_MPT,
346
+ {
347
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
348
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
349
+ { LLM_TENSOR_OUTPUT, "output"},
350
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
351
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
352
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
353
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
354
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
355
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
356
+ { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
357
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
358
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
359
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
360
+ },
361
+ },
362
+ {
363
+ LLM_ARCH_STARCODER,
364
+ {
365
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
366
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
367
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
368
+ { LLM_TENSOR_OUTPUT, "output" },
369
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
370
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
371
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
372
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
373
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
374
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
375
+ },
376
+ },
377
+ {
378
+ LLM_ARCH_REFACT,
379
+ {
380
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
381
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
382
+ { LLM_TENSOR_OUTPUT, "output" },
383
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
384
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
385
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
386
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
387
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
388
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
389
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
390
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
391
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
392
+ },
393
+ },
394
+ {
395
+ LLM_ARCH_BERT,
396
+ {
397
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
398
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
399
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
400
+ { LLM_TENSOR_POS_EMBD, "position_embd" },
401
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
402
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
403
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
404
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
405
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
406
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
407
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
408
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
409
+ { LLM_TENSOR_CLS, "cls" },
410
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
411
+ },
412
+ },
413
+ {
414
+ LLM_ARCH_NOMIC_BERT,
415
+ {
416
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
417
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
418
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
419
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
420
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
421
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
422
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
423
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
424
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
425
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
426
+ },
427
+ },
428
+ {
429
+ LLM_ARCH_JINA_BERT_V2,
430
+ {
431
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
432
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
433
+ { LLM_TENSOR_TOKEN_TYPES, "token_types" },
434
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
435
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
436
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
437
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
438
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
439
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
440
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
441
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
442
+ { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
443
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
444
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
445
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
446
+ { LLM_TENSOR_CLS, "cls" },
447
+ },
448
+ },
449
+ {
450
+ LLM_ARCH_BLOOM,
451
+ {
452
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
453
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
454
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
455
+ { LLM_TENSOR_OUTPUT, "output" },
456
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
457
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
458
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
459
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
460
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
461
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
462
+ },
463
+ },
464
+ {
465
+ LLM_ARCH_STABLELM,
466
+ {
467
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
468
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
469
+ { LLM_TENSOR_OUTPUT, "output" },
470
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
471
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
472
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
473
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
474
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
475
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
476
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
477
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
478
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
479
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
480
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
481
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
482
+ },
483
+ },
484
+ {
485
+ LLM_ARCH_QWEN,
486
+ {
487
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
488
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
489
+ { LLM_TENSOR_OUTPUT, "output" },
490
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
491
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
492
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
493
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
494
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
495
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
496
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
497
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
498
+ },
499
+ },
500
+ {
501
+ LLM_ARCH_QWEN2,
502
+ {
503
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
504
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
505
+ { LLM_TENSOR_OUTPUT, "output" },
506
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
507
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
508
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
509
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
510
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
511
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
512
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
513
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
514
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
515
+ },
516
+ },
517
+ {
518
+ LLM_ARCH_QWEN2VL,
519
+ {
520
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
521
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
522
+ { LLM_TENSOR_OUTPUT, "output" },
523
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
524
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
525
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
526
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
527
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
528
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
529
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
530
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
531
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
532
+ },
533
+ },
534
+ {
535
+ LLM_ARCH_QWEN2MOE,
536
+ {
537
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
538
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
539
+ { LLM_TENSOR_OUTPUT, "output" },
540
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
541
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
542
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
543
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
544
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
545
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
546
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
547
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
548
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
549
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
550
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
551
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
552
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
553
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
554
+ },
555
+ },
556
+ {
557
+ LLM_ARCH_PHI2,
558
+ {
559
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
560
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
561
+ { LLM_TENSOR_OUTPUT, "output" },
562
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
563
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
564
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
565
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
566
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
567
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
568
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
569
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
570
+ },
571
+ },
572
+ {
573
+ LLM_ARCH_PHI3,
574
+ {
575
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
576
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
577
+ { LLM_TENSOR_OUTPUT, "output" },
578
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
579
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
580
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
581
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
582
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
583
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
584
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
585
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
586
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
587
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
588
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
589
+ },
590
+ },
591
+ {
592
+ LLM_ARCH_PHIMOE,
593
+ {
594
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
595
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
596
+ { LLM_TENSOR_OUTPUT, "output" },
597
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
598
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
599
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
600
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
601
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
602
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
603
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
604
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
605
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
606
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
607
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
608
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
609
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
610
+ },
611
+ },
612
+ {
613
+ LLM_ARCH_PLAMO,
614
+ {
615
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
616
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
617
+ { LLM_TENSOR_OUTPUT, "output" },
618
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
619
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
620
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
621
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
622
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
623
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
624
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
625
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
626
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
627
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
628
+ },
629
+ },
630
+ {
631
+ LLM_ARCH_CODESHELL,
632
+ {
633
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
634
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
635
+ { LLM_TENSOR_OUTPUT, "output" },
636
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
637
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
638
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
639
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
640
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
641
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
642
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
643
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
644
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
645
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
646
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
647
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
648
+ },
649
+ },
650
+ {
651
+ LLM_ARCH_ORION,
652
+ {
653
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
654
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
655
+ { LLM_TENSOR_OUTPUT, "output" },
656
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
657
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
658
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
659
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
660
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
661
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
662
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
663
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
664
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
665
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
666
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
667
+ },
668
+ },
669
+ {
670
+ LLM_ARCH_INTERNLM2,
671
+ {
672
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
673
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
674
+ { LLM_TENSOR_OUTPUT, "output" },
675
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
676
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
677
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
678
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
679
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
680
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
681
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
682
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
683
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
684
+ },
685
+ },
686
+ {
687
+ LLM_ARCH_MINICPM,
688
+ {
689
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
690
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
691
+ { LLM_TENSOR_OUTPUT, "output" },
692
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
693
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
694
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
695
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
696
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
697
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
698
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
699
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
700
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
701
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
702
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
703
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
704
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
705
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
706
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
707
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
708
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
709
+ },
710
+ },
711
+ {
712
+ LLM_ARCH_MINICPM3,
713
+ {
714
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
715
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
716
+ { LLM_TENSOR_OUTPUT, "output" },
717
+ { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
718
+ { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
719
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
720
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
721
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
722
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
723
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
724
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
725
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
726
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
727
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
728
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
729
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
730
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
731
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
732
+ },
733
+ },
734
+ {
735
+ LLM_ARCH_GEMMA,
736
+ {
737
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
738
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
739
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
740
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
741
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
742
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
743
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
744
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
745
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
746
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
747
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
748
+ },
749
+ },
750
+ {
751
+ LLM_ARCH_GEMMA2,
752
+ {
753
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
754
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
755
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
756
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
757
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
758
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
759
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
760
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
761
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
762
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
763
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
764
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
765
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
766
+ },
767
+ },
768
+ {
769
+ LLM_ARCH_STARCODER2,
770
+ {
771
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
772
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
773
+ { LLM_TENSOR_OUTPUT, "output" },
774
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
775
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
776
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
777
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
778
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
779
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
780
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
781
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
782
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
783
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
784
+ },
785
+ },
786
+ {
787
+ LLM_ARCH_MAMBA,
788
+ {
789
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
790
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
791
+ { LLM_TENSOR_OUTPUT, "output" },
792
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
793
+ { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
794
+ { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
795
+ { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
796
+ { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
797
+ { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
798
+ { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
799
+ { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
800
+ },
801
+ },
802
+ {
803
+ LLM_ARCH_XVERSE,
804
+ {
805
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
806
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
807
+ { LLM_TENSOR_OUTPUT, "output" },
808
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
809
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
810
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
811
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
812
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
813
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
814
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
815
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
816
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
817
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
818
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
819
+ },
820
+ },
821
+ {
822
+ LLM_ARCH_COMMAND_R,
823
+ {
824
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
825
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
826
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
827
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
828
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
829
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
830
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
831
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
832
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
833
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
834
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
835
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
836
+ },
837
+ },
838
+ {
839
+ LLM_ARCH_COHERE2,
840
+ {
841
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
842
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
843
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
844
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
845
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
846
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
847
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
848
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
849
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
850
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
851
+ },
852
+ },
853
+ {
854
+ LLM_ARCH_DBRX,
855
+ {
856
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
857
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
858
+ { LLM_TENSOR_OUTPUT, "output" },
859
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
860
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
861
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
862
+ { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
863
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
864
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
865
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
866
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
867
+ },
868
+ },
869
+ {
870
+ LLM_ARCH_OLMO,
871
+ {
872
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
873
+ { LLM_TENSOR_OUTPUT, "output" },
874
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
875
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
876
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
877
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
878
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
879
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
880
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
881
+ },
882
+ },
883
+ {
884
+ LLM_ARCH_OLMO2,
885
+ {
886
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
887
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
888
+ { LLM_TENSOR_OUTPUT, "output" },
889
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
890
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
891
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
892
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
893
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
894
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
895
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
896
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
897
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
898
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
899
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
900
+ },
901
+ },
902
+ {
903
+ LLM_ARCH_OLMOE,
904
+ {
905
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
906
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
907
+ { LLM_TENSOR_OUTPUT, "output" },
908
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
909
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
910
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
911
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
912
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
913
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
914
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
915
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
916
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
917
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
918
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
919
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
920
+ },
921
+ },
922
+ {
923
+ LLM_ARCH_OPENELM,
924
+ {
925
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
926
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
927
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
928
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
929
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
930
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
931
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
932
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
933
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
934
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
935
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
936
+ },
937
+ },
938
+ {
939
+ LLM_ARCH_ARCTIC,
940
+ {
941
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
942
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
943
+ { LLM_TENSOR_OUTPUT, "output" },
944
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
945
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
946
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
947
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
948
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
949
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
950
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
951
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
952
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
953
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
954
+ { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
955
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
956
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
957
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
958
+ },
959
+ },
960
+ {
961
+ LLM_ARCH_DEEPSEEK,
962
+ {
963
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
964
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
965
+ { LLM_TENSOR_OUTPUT, "output" },
966
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
967
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
968
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
969
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
970
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
971
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
972
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
973
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
974
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
975
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
976
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
977
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
978
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
979
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
980
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
981
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
982
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
983
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
984
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
985
+ },
986
+ },
987
+ {
988
+ LLM_ARCH_DEEPSEEK2,
989
+ {
990
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
991
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
992
+ { LLM_TENSOR_OUTPUT, "output" },
993
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
994
+ { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
995
+ { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
996
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
997
+ { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
998
+ { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
999
+ { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1000
+ { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1001
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1002
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1003
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1004
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1005
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1006
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1007
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1008
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1009
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1010
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1011
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1012
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1013
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1014
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1015
+ },
1016
+ },
1017
+ {
1018
+ LLM_ARCH_CHATGLM,
1019
+ {
1020
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1021
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1022
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1023
+ { LLM_TENSOR_OUTPUT, "output" },
1024
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1025
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1026
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1027
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1028
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1029
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1030
+ },
1031
+ },
1032
+ {
1033
+ LLM_ARCH_BITNET,
1034
+ {
1035
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1036
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1037
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1038
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1039
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1040
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1041
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1042
+ { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1043
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1044
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1045
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1046
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1047
+ { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1048
+ },
1049
+ },
1050
+ {
1051
+ LLM_ARCH_T5,
1052
+ {
1053
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1054
+ { LLM_TENSOR_OUTPUT, "output" },
1055
+ { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1056
+ { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1057
+ { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1058
+ { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1059
+ { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1060
+ { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1061
+ { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1062
+ { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1063
+ { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1064
+ { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1065
+ { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1066
+ { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1067
+ { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1068
+ { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1069
+ { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1070
+ { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1071
+ { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1072
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1073
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1074
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1075
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1076
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1077
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1078
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1079
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1080
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1081
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1082
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1083
+ },
1084
+ },
1085
+ {
1086
+ LLM_ARCH_T5ENCODER,
1087
+ {
1088
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1089
+ { LLM_TENSOR_OUTPUT, "output" },
1090
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1091
+ { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1092
+ { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1093
+ { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1094
+ { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1095
+ { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1096
+ { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1097
+ { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1098
+ { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1099
+ { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1100
+ { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1101
+ },
1102
+ },
1103
+ {
1104
+ LLM_ARCH_JAIS,
1105
+ {
1106
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1107
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1108
+ { LLM_TENSOR_OUTPUT, "output" },
1109
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1110
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1111
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1112
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1113
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1114
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1115
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1116
+ },
1117
+ },
1118
+ {
1119
+ LLM_ARCH_NEMOTRON,
1120
+ {
1121
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1122
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1123
+ { LLM_TENSOR_OUTPUT, "output" },
1124
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1125
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1126
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1127
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1128
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1129
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1130
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1131
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1132
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1133
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1134
+ },
1135
+ },
1136
+ {
1137
+ LLM_ARCH_EXAONE,
1138
+ {
1139
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1140
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1141
+ { LLM_TENSOR_OUTPUT, "output" },
1142
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1143
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1144
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1145
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1146
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1147
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1148
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1149
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1150
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1151
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1152
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1153
+ },
1154
+ },
1155
+ {
1156
+ LLM_ARCH_RWKV6,
1157
+ {
1158
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1159
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1160
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1161
+ { LLM_TENSOR_OUTPUT, "output" },
1162
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1163
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1164
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1165
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1166
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1167
+ { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1168
+ { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1169
+ { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1170
+ { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1171
+ { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1172
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1173
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1174
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1175
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1176
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1177
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1178
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1179
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1180
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1181
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1182
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1183
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1184
+ { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1185
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1186
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1187
+ { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1188
+ },
1189
+ },
1190
+ {
1191
+ LLM_ARCH_RWKV6QWEN2,
1192
+ {
1193
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1194
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1195
+ { LLM_TENSOR_OUTPUT, "output" },
1196
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1197
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1198
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1199
+ { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1200
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1201
+ { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1202
+ { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1203
+ { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1204
+ { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1205
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1206
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1207
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1208
+ { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1209
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1210
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1211
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1212
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1213
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1214
+ },
1215
+ },
1216
+ {
1217
+ LLM_ARCH_GRANITE,
1218
+ {
1219
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1220
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1221
+ { LLM_TENSOR_OUTPUT, "output" },
1222
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1223
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1224
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1225
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1226
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1227
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1228
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1229
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1230
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1231
+ },
1232
+ },
1233
+ {
1234
+ LLM_ARCH_GRANITE_MOE,
1235
+ {
1236
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1237
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1238
+ { LLM_TENSOR_OUTPUT, "output" },
1239
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1240
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1241
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1242
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1243
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1244
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1245
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1246
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1247
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1248
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1249
+ },
1250
+ },
1251
+ {
1252
+ LLM_ARCH_CHAMELEON,
1253
+ {
1254
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1255
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1256
+ { LLM_TENSOR_OUTPUT, "output" },
1257
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1258
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1259
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1260
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1261
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1262
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1263
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1264
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1265
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1266
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1267
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1268
+ },
1269
+ },
1270
+ {
1271
+ LLM_ARCH_WAVTOKENIZER_DEC,
1272
+ {
1273
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1274
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1275
+ { LLM_TENSOR_CONV1D, "conv1d" },
1276
+ { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1277
+ { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1278
+ { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1279
+ { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1280
+ { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1281
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1282
+ { LLM_TENSOR_OUTPUT, "output" },
1283
+ { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1284
+ { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1285
+ { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1286
+ { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1287
+ { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1288
+ { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1289
+ { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1290
+ { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1291
+ { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1292
+ { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1293
+ },
1294
+ },
1295
+ {
1296
+ LLM_ARCH_UNKNOWN,
1297
+ {
1298
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1299
+ },
1300
+ },
1301
+ };
1302
+
1303
+ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1304
+ {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
1305
+ {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
1306
+ {LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
1307
+ {LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
1308
+ {LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1309
+ {LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1310
+ {LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1311
+ {LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
1312
+ {LLM_TENSOR_DEC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
1313
+ {LLM_TENSOR_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
1314
+ {LLM_TENSOR_ROPE_FREQS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
1315
+ {LLM_TENSOR_ROPE_FACTORS_LONG, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
1316
+ {LLM_TENSOR_ROPE_FACTORS_SHORT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
1317
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1318
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1319
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1320
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1321
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1322
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1323
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1324
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1325
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1326
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1327
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1328
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1329
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1330
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1331
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1332
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1333
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1334
+ {LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1335
+ {LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1336
+ {LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1337
+ {LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1338
+ {LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1339
+ {LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1340
+ {LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1341
+ {LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1342
+ {LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1343
+ {LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1344
+ {LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1345
+ {LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1346
+ {LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1347
+ {LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1348
+ {LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1349
+ {LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1350
+ {LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1351
+ {LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1352
+ {LLM_TENSOR_DEC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1353
+ {LLM_TENSOR_DEC_CROSS_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1354
+ {LLM_TENSOR_DEC_CROSS_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1355
+ {LLM_TENSOR_DEC_CROSS_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1356
+ {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1357
+ {LLM_TENSOR_DEC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1358
+ {LLM_TENSOR_DEC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1359
+ {LLM_TENSOR_DEC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1360
+ {LLM_TENSOR_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1361
+ {LLM_TENSOR_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1362
+ {LLM_TENSOR_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1363
+ {LLM_TENSOR_ENC_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1364
+ {LLM_TENSOR_ENC_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1365
+ {LLM_TENSOR_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1366
+ {LLM_TENSOR_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1367
+ {LLM_TENSOR_FFN_GATE_INP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1368
+ {LLM_TENSOR_FFN_GATE_INP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1369
+ {LLM_TENSOR_SSM_IN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1370
+ {LLM_TENSOR_SSM_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1371
+ {LLM_TENSOR_SSM_DT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1372
+ {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1373
+ {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1374
+ {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1375
+ {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1376
+ {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1377
+ {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1378
+ {LLM_TENSOR_TIME_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1379
+ {LLM_TENSOR_TIME_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1380
+ {LLM_TENSOR_TIME_MIX_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1381
+ {LLM_TENSOR_TIME_MIX_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1382
+ {LLM_TENSOR_CHANNEL_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1383
+ {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1384
+ {LLM_TENSOR_CHANNEL_MIX_VALUE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1385
+ {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
1386
+ {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
1387
+ {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
1388
+ {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1389
+ {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1390
+ {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1391
+ {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1392
+ {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1393
+ {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1394
+ {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1395
+ {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1396
+ {LLM_TENSOR_TIME_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1397
+ {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1398
+ {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1399
+ {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1400
+ {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
1401
+ {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1402
+ {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1403
+ {LLM_TENSOR_ATTN_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1404
+ {LLM_TENSOR_ATTN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1405
+ {LLM_TENSOR_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1406
+ {LLM_TENSOR_FFN_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1407
+ {LLM_TENSOR_FFN_NORM_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1408
+ {LLM_TENSOR_ATTN_Q_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1409
+ {LLM_TENSOR_ATTN_K_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1410
+ {LLM_TENSOR_LAYER_OUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1411
+ {LLM_TENSOR_ATTN_Q_A_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1412
+ {LLM_TENSOR_ATTN_KV_A_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1413
+ {LLM_TENSOR_ATTN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1414
+ {LLM_TENSOR_FFN_SUB_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1415
+ {LLM_TENSOR_DEC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1416
+ {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1417
+ {LLM_TENSOR_DEC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1418
+ {LLM_TENSOR_ENC_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1419
+ {LLM_TENSOR_ENC_FFN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1420
+ {LLM_TENSOR_DEC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
1421
+ {LLM_TENSOR_ENC_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
1422
+ {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1423
+ {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1424
+ {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1425
+ {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1426
+ // this tensor is loaded for T5, but never used
1427
+ {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
1428
+ {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
1429
+ {LLM_TENSOR_POS_NET_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1430
+ {LLM_TENSOR_POS_NET_NORM1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1431
+ {LLM_TENSOR_POS_NET_NORM2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1432
+ {LLM_TENSOR_POS_NET_CONV1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
1433
+ {LLM_TENSOR_POS_NET_CONV2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
1434
+ {LLM_TENSOR_POS_NET_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1435
+ {LLM_TENSOR_POS_NET_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1436
+ {LLM_TENSOR_POS_NET_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1437
+ {LLM_TENSOR_POS_NET_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1438
+ {LLM_TENSOR_POS_NET_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1439
+ {LLM_TENSOR_CONVNEXT_DW, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
1440
+ {LLM_TENSOR_CONVNEXT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1441
+ {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1442
+ {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1443
+ {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1444
+ };
1445
+
1446
+ LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
1447
+
1448
+ std::string LLM_KV::operator()(llm_kv kv) const {
1449
+ return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1450
+ }
1451
+
1452
+ std::string LLM_TN_IMPL::str() const {
1453
+ if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
1454
+ return "__missing__";
1455
+ }
1456
+
1457
+ std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
1458
+
1459
+ if (suffix != nullptr) {
1460
+ name += ".";
1461
+ name += suffix;
1462
+ }
1463
+
1464
+ return name;
1465
+ }
1466
+
1467
+ const char * llm_arch_name(llm_arch arch) {
1468
+ auto it = LLM_ARCH_NAMES.find(arch);
1469
+ if (it == LLM_ARCH_NAMES.end()) {
1470
+ return "unknown";
1471
+ }
1472
+ return it->second;
1473
+ }
1474
+
1475
+ llm_arch llm_arch_from_string(const std::string & name) {
1476
+ for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
1477
+ if (kv.second == name) {
1478
+ return kv.first;
1479
+ }
1480
+ }
1481
+
1482
+ return LLM_ARCH_UNKNOWN;
1483
+ }
1484
+
1485
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1486
+ return LLM_TENSOR_INFOS.at(tensor);
1487
+ }