bigdl-core-cpp 2.5.0rc1__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +1673 -278
  2. bigdl/cpp/convert_hf_to_gguf_update.py +381 -0
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +450 -0
  4. bigdl/cpp/convert_lora_to_gguf.py +461 -0
  5. bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
  6. bigdl/cpp/gguf-py/gguf/constants.py +698 -171
  7. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  8. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  9. bigdl/cpp/gguf-py/gguf/gguf_writer.py +108 -17
  10. bigdl/cpp/gguf-py/gguf/lazy.py +3 -1
  11. bigdl/cpp/gguf-py/gguf/metadata.py +195 -76
  12. bigdl/cpp/gguf-py/gguf/quants.py +1210 -64
  13. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +262 -43
  14. bigdl/cpp/gguf-py/gguf/utility.py +2 -2
  15. bigdl/cpp/gguf-py/gguf/vocab.py +325 -3
  16. bigdl/cpp/libs/common.lib +0 -0
  17. bigdl/cpp/libs/ggml-base.dll +0 -0
  18. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  19. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  20. bigdl/cpp/libs/ggml.dll +0 -0
  21. bigdl/cpp/libs/libc++.dll +0 -0
  22. bigdl/cpp/libs/llama-batched.exe +0 -0
  23. bigdl/cpp/libs/llama-bench.exe +0 -0
  24. bigdl/cpp/libs/llama-cli.exe +0 -0
  25. bigdl/cpp/libs/llama-embedding.exe +0 -0
  26. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  27. bigdl/cpp/libs/llama-gguf.exe +0 -0
  28. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  29. bigdl/cpp/libs/llama-lookup.exe +0 -0
  30. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  31. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  32. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  33. bigdl/cpp/libs/llama-quantize.exe +0 -0
  34. bigdl/cpp/libs/llama-server.exe +0 -0
  35. bigdl/cpp/libs/llama-simple.exe +0 -0
  36. bigdl/cpp/libs/llama-speculative.exe +0 -0
  37. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  38. bigdl/cpp/libs/llama.dll +0 -0
  39. bigdl/cpp/libs/llava_shared.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  42. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  43. bigdl/cpp/libs/ollama-lib.exe +0 -0
  44. bigdl/cpp/libs/ollama.exe +0 -0
  45. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  46. bigdl/cpp/libs/ollama_llama.dll +0 -0
  47. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  48. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +7 -2
  49. bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
  50. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
  51. bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
  52. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
  53. bigdl/cpp/convert.py +0 -1714
  54. bigdl/cpp/libs/baby-llama.exe +0 -0
  55. bigdl/cpp/libs/batched-bench.exe +0 -0
  56. bigdl/cpp/libs/batched.exe +0 -0
  57. bigdl/cpp/libs/beam-search.exe +0 -0
  58. bigdl/cpp/libs/benchmark.exe +0 -0
  59. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  60. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  61. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  62. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  63. bigdl/cpp/libs/embedding.exe +0 -0
  64. bigdl/cpp/libs/export-lora.exe +0 -0
  65. bigdl/cpp/libs/finetune.exe +0 -0
  66. bigdl/cpp/libs/ggml_shared.dll +0 -0
  67. bigdl/cpp/libs/gguf.exe +0 -0
  68. bigdl/cpp/libs/gritlm.exe +0 -0
  69. bigdl/cpp/libs/imatrix.exe +0 -0
  70. bigdl/cpp/libs/infill.exe +0 -0
  71. bigdl/cpp/libs/llava-cli.exe +0 -0
  72. bigdl/cpp/libs/lookahead.exe +0 -0
  73. bigdl/cpp/libs/lookup.exe +0 -0
  74. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  75. bigdl/cpp/libs/main.exe +0 -0
  76. bigdl/cpp/libs/parallel.exe +0 -0
  77. bigdl/cpp/libs/passkey.exe +0 -0
  78. bigdl/cpp/libs/perplexity.exe +0 -0
  79. bigdl/cpp/libs/q8dot.exe +0 -0
  80. bigdl/cpp/libs/quantize-stats.exe +0 -0
  81. bigdl/cpp/libs/quantize.exe +0 -0
  82. bigdl/cpp/libs/save-load-state.exe +0 -0
  83. bigdl/cpp/libs/server.exe +0 -0
  84. bigdl/cpp/libs/simple.exe +0 -0
  85. bigdl/cpp/libs/speculative.exe +0 -0
  86. bigdl/cpp/libs/tokenize.exe +0 -0
  87. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  88. bigdl/cpp/libs/vdot.exe +0 -0
  89. bigdl_core_cpp-2.5.0rc1.data/scripts/init-ollama.bat +0 -13
  90. bigdl_core_cpp-2.5.0rc1.dist-info/RECORD +0 -63
  91. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
  92. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -64,20 +64,33 @@ class Keys:
64
64
  BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65
65
  BASE_MODEL_VERSION = "general.base_model.{id}.version"
66
66
  BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67
+ BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
67
68
  BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
68
69
  BASE_MODEL_DOI = "general.base_model.{id}.doi"
69
70
  BASE_MODEL_UUID = "general.base_model.{id}.uuid"
70
71
  BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
71
72
 
73
+ # Dataset Source
74
+ DATASET_COUNT = "general.dataset.count"
75
+ DATASET_NAME = "general.dataset.{id}.name"
76
+ DATASET_AUTHOR = "general.dataset.{id}.author"
77
+ DATASET_VERSION = "general.dataset.{id}.version"
78
+ DATASET_ORGANIZATION = "general.dataset.{id}.organization"
79
+ DATASET_DESCRIPTION = "general.dataset.{id}.description"
80
+ DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
81
+ DATASET_DOI = "general.dataset.{id}.doi"
82
+ DATASET_UUID = "general.dataset.{id}.uuid"
83
+ DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
84
+
72
85
  # Array based KV stores
73
86
  TAGS = "general.tags"
74
87
  LANGUAGES = "general.languages"
75
- DATASETS = "general.datasets"
76
88
 
77
89
  class LLM:
78
90
  VOCAB_SIZE = "{arch}.vocab_size"
79
91
  CONTEXT_LENGTH = "{arch}.context_length"
80
92
  EMBEDDING_LENGTH = "{arch}.embedding_length"
93
+ FEATURES_LENGTH = "{arch}.features_length"
81
94
  BLOCK_COUNT = "{arch}.block_count"
82
95
  LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
83
96
  FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
@@ -89,11 +102,20 @@ class Keys:
89
102
  EXPERT_USED_COUNT = "{arch}.expert_used_count"
90
103
  EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
91
104
  EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
105
+ EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106
+ EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
92
107
  POOLING_TYPE = "{arch}.pooling_type"
93
108
  LOGIT_SCALE = "{arch}.logit_scale"
94
109
  DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
95
110
  ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
96
111
  FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
112
+ SWIN_NORM = "{arch}.swin_norm"
113
+ RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
114
+ TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
115
+ TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
116
+ RESIDUAL_SCALE = "{arch}.residual_scale"
117
+ EMBEDDING_SCALE = "{arch}.embedding_scale"
118
+ TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
97
119
 
98
120
  class Attention:
99
121
  HEAD_COUNT = "{arch}.attention.head_count"
@@ -104,14 +126,18 @@ class Keys:
104
126
  VALUE_LENGTH = "{arch}.attention.value_length"
105
127
  LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
106
128
  LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
129
+ GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
130
+ GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
107
131
  CAUSAL = "{arch}.attention.causal"
108
132
  Q_LORA_RANK = "{arch}.attention.q_lora_rank"
109
133
  KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
110
134
  REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
111
135
  SLIDING_WINDOW = "{arch}.attention.sliding_window"
136
+ SCALE = "{arch}.attention.scale"
112
137
 
113
138
  class Rope:
114
139
  DIMENSION_COUNT = "{arch}.rope.dimension_count"
140
+ DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
115
141
  FREQ_BASE = "{arch}.rope.freq_base"
116
142
  SCALING_TYPE = "{arch}.rope.scaling.type"
117
143
  SCALING_FACTOR = "{arch}.rope.scaling.factor"
@@ -130,6 +156,18 @@ class Keys:
130
156
  INNER_SIZE = "{arch}.ssm.inner_size"
131
157
  STATE_SIZE = "{arch}.ssm.state_size"
132
158
  TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
159
+ DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
160
+
161
+ class WKV:
162
+ HEAD_SIZE = "{arch}.wkv.head_size"
163
+
164
+ class PosNet:
165
+ EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
166
+ BLOCK_COUNT = "{arch}.posnet.block_count"
167
+
168
+ class ConvNext:
169
+ EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
170
+ BLOCK_COUNT = "{arch}.convnext.block_count"
133
171
 
134
172
  class Tokenizer:
135
173
  MODEL = "tokenizer.ggml.model"
@@ -141,10 +179,11 @@ class Keys:
141
179
  MERGES = "tokenizer.ggml.merges"
142
180
  BOS_ID = "tokenizer.ggml.bos_token_id"
143
181
  EOS_ID = "tokenizer.ggml.eos_token_id"
182
+ EOT_ID = "tokenizer.ggml.eot_token_id"
183
+ EOM_ID = "tokenizer.ggml.eom_token_id"
144
184
  UNK_ID = "tokenizer.ggml.unknown_token_id"
145
185
  SEP_ID = "tokenizer.ggml.seperator_token_id"
146
186
  PAD_ID = "tokenizer.ggml.padding_token_id"
147
- CLS_ID = "tokenizer.ggml.cls_token_id"
148
187
  MASK_ID = "tokenizer.ggml.mask_token_id"
149
188
  ADD_BOS = "tokenizer.ggml.add_bos_token"
150
189
  ADD_EOS = "tokenizer.ggml.add_eos_token"
@@ -157,10 +196,16 @@ class Keys:
157
196
  CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
158
197
  CHAT_TEMPLATES = "tokenizer.chat_templates"
159
198
  # FIM/Infill special tokens constants
199
+ FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
200
+ FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
201
+ FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
202
+ FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
203
+ FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
204
+ FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
205
+ # deprecated:
160
206
  PREFIX_ID = "tokenizer.ggml.prefix_token_id"
161
207
  SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
162
208
  MIDDLE_ID = "tokenizer.ggml.middle_token_id"
163
- EOT_ID = "tokenizer.ggml.eot_token_id"
164
209
 
165
210
  class Adapter:
166
211
  TYPE = "adapter.type"
@@ -177,46 +222,63 @@ class GGUFType:
177
222
 
178
223
 
179
224
  class MODEL_ARCH(IntEnum):
180
- LLAMA = auto()
181
- FALCON = auto()
182
- BAICHUAN = auto()
183
- GROK = auto()
184
- GPT2 = auto()
185
- GPTJ = auto()
186
- GPTNEOX = auto()
187
- MPT = auto()
188
- STARCODER = auto()
189
- REFACT = auto()
190
- BERT = auto()
191
- NOMIC_BERT = auto()
192
- JINA_BERT_V2 = auto()
193
- BLOOM = auto()
194
- STABLELM = auto()
195
- QWEN = auto()
196
- QWEN2 = auto()
197
- QWEN2MOE = auto()
198
- PHI2 = auto()
199
- PHI3 = auto()
200
- PLAMO = auto()
201
- CODESHELL = auto()
202
- ORION = auto()
203
- INTERNLM2 = auto()
204
- MINICPM = auto()
205
- GEMMA = auto()
206
- GEMMA2 = auto()
207
- STARCODER2 = auto()
208
- MAMBA = auto()
209
- XVERSE = auto()
210
- COMMAND_R = auto()
211
- DBRX = auto()
212
- OLMO = auto()
213
- OPENELM = auto()
214
- ARCTIC = auto()
215
- DEEPSEEK2 = auto()
216
- CHATGLM = auto()
217
- BITNET = auto()
218
- T5 = auto()
219
- JAIS = auto()
225
+ LLAMA = auto()
226
+ DECI = auto()
227
+ FALCON = auto()
228
+ BAICHUAN = auto()
229
+ GROK = auto()
230
+ GPT2 = auto()
231
+ GPTJ = auto()
232
+ GPTNEOX = auto()
233
+ MPT = auto()
234
+ STARCODER = auto()
235
+ REFACT = auto()
236
+ BERT = auto()
237
+ NOMIC_BERT = auto()
238
+ JINA_BERT_V2 = auto()
239
+ BLOOM = auto()
240
+ STABLELM = auto()
241
+ QWEN = auto()
242
+ QWEN2 = auto()
243
+ QWEN2MOE = auto()
244
+ QWEN2VL = auto()
245
+ PHI2 = auto()
246
+ PHI3 = auto()
247
+ PHIMOE = auto()
248
+ PLAMO = auto()
249
+ CODESHELL = auto()
250
+ ORION = auto()
251
+ INTERNLM2 = auto()
252
+ MINICPM = auto()
253
+ MINICPM3 = auto()
254
+ GEMMA = auto()
255
+ GEMMA2 = auto()
256
+ STARCODER2 = auto()
257
+ RWKV6 = auto()
258
+ RWKV6QWEN2 = auto()
259
+ MAMBA = auto()
260
+ XVERSE = auto()
261
+ COMMAND_R = auto()
262
+ COHERE2 = auto()
263
+ DBRX = auto()
264
+ OLMO = auto()
265
+ OLMO2 = auto()
266
+ OLMOE = auto()
267
+ OPENELM = auto()
268
+ ARCTIC = auto()
269
+ DEEPSEEK = auto()
270
+ DEEPSEEK2 = auto()
271
+ CHATGLM = auto()
272
+ BITNET = auto()
273
+ T5 = auto()
274
+ T5ENCODER = auto()
275
+ JAIS = auto()
276
+ NEMOTRON = auto()
277
+ EXAONE = auto()
278
+ GRANITE = auto()
279
+ GRANITE_MOE = auto()
280
+ CHAMELEON = auto()
281
+ WAVTOKENIZER_DEC = auto()
220
282
 
221
283
 
222
284
  class MODEL_TENSOR(IntEnum):
@@ -255,6 +317,7 @@ class MODEL_TENSOR(IntEnum):
255
317
  FFN_GATE_SHEXP = auto()
256
318
  FFN_DOWN_SHEXP = auto()
257
319
  FFN_UP_SHEXP = auto()
320
+ FFN_EXP_PROBS_B = auto()
258
321
  ATTN_Q_NORM = auto()
259
322
  ATTN_K_NORM = auto()
260
323
  LAYER_OUT_NORM = auto()
@@ -265,6 +328,30 @@ class MODEL_TENSOR(IntEnum):
265
328
  SSM_A = auto()
266
329
  SSM_D = auto()
267
330
  SSM_OUT = auto()
331
+ TIME_MIX_W1 = auto()
332
+ TIME_MIX_W2 = auto()
333
+ TIME_MIX_LERP_X = auto()
334
+ TIME_MIX_LERP_K = auto()
335
+ TIME_MIX_LERP_V = auto()
336
+ TIME_MIX_LERP_R = auto()
337
+ TIME_MIX_LERP_G = auto()
338
+ TIME_MIX_LERP_FUSED = auto()
339
+ TIME_MIX_LERP_W = auto()
340
+ TIME_MIX_FIRST = auto()
341
+ TIME_MIX_DECAY = auto()
342
+ TIME_MIX_DECAY_W1 = auto()
343
+ TIME_MIX_DECAY_W2 = auto()
344
+ TIME_MIX_KEY = auto()
345
+ TIME_MIX_VALUE = auto()
346
+ TIME_MIX_RECEPTANCE = auto()
347
+ TIME_MIX_GATE = auto()
348
+ TIME_MIX_LN = auto()
349
+ TIME_MIX_OUTPUT = auto()
350
+ CHANNEL_MIX_LERP_K = auto()
351
+ CHANNEL_MIX_LERP_R = auto()
352
+ CHANNEL_MIX_KEY = auto()
353
+ CHANNEL_MIX_RECEPTANCE = auto()
354
+ CHANNEL_MIX_VALUE = auto()
268
355
  ATTN_Q_A = auto()
269
356
  ATTN_Q_B = auto()
270
357
  ATTN_KV_A_MQA = auto()
@@ -301,133 +388,211 @@ class MODEL_TENSOR(IntEnum):
301
388
  ENC_FFN_DOWN = auto()
302
389
  ENC_FFN_UP = auto()
303
390
  ENC_OUTPUT_NORM = auto()
391
+ CLS = auto() # classifier
392
+ CLS_OUT = auto() # classifier output projection
393
+ CONV1D = auto()
394
+ CONVNEXT_DW = auto()
395
+ CONVNEXT_NORM = auto()
396
+ CONVNEXT_PW1 = auto()
397
+ CONVNEXT_PW2 = auto()
398
+ CONVNEXT_GAMMA = auto()
399
+ POSNET_CONV1 = auto()
400
+ POSNET_CONV2 = auto()
401
+ POSNET_NORM = auto()
402
+ POSNET_NORM1 = auto()
403
+ POSNET_NORM2 = auto()
404
+ POSNET_ATTN_NORM = auto()
405
+ POSNET_ATTN_Q = auto()
406
+ POSNET_ATTN_K = auto()
407
+ POSNET_ATTN_V = auto()
408
+ POSNET_ATTN_OUT = auto()
304
409
 
305
410
 
306
411
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
307
- MODEL_ARCH.LLAMA: "llama",
308
- MODEL_ARCH.FALCON: "falcon",
309
- MODEL_ARCH.BAICHUAN: "baichuan",
310
- MODEL_ARCH.GROK: "grok",
311
- MODEL_ARCH.GPT2: "gpt2",
312
- MODEL_ARCH.GPTJ: "gptj",
313
- MODEL_ARCH.GPTNEOX: "gptneox",
314
- MODEL_ARCH.MPT: "mpt",
315
- MODEL_ARCH.STARCODER: "starcoder",
316
- MODEL_ARCH.REFACT: "refact",
317
- MODEL_ARCH.BERT: "bert",
318
- MODEL_ARCH.NOMIC_BERT: "nomic-bert",
319
- MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
320
- MODEL_ARCH.BLOOM: "bloom",
321
- MODEL_ARCH.STABLELM: "stablelm",
322
- MODEL_ARCH.QWEN: "qwen",
323
- MODEL_ARCH.QWEN2: "qwen2",
324
- MODEL_ARCH.QWEN2MOE: "qwen2moe",
325
- MODEL_ARCH.PHI2: "phi2",
326
- MODEL_ARCH.PHI3: "phi3",
327
- MODEL_ARCH.PLAMO: "plamo",
328
- MODEL_ARCH.CODESHELL: "codeshell",
329
- MODEL_ARCH.ORION: "orion",
330
- MODEL_ARCH.INTERNLM2: "internlm2",
331
- MODEL_ARCH.MINICPM: "minicpm",
332
- MODEL_ARCH.GEMMA: "gemma",
333
- MODEL_ARCH.GEMMA2: "gemma2",
334
- MODEL_ARCH.STARCODER2: "starcoder2",
335
- MODEL_ARCH.MAMBA: "mamba",
336
- MODEL_ARCH.XVERSE: "xverse",
337
- MODEL_ARCH.COMMAND_R: "command-r",
338
- MODEL_ARCH.DBRX: "dbrx",
339
- MODEL_ARCH.OLMO: "olmo",
340
- MODEL_ARCH.OPENELM: "openelm",
341
- MODEL_ARCH.ARCTIC: "arctic",
342
- MODEL_ARCH.DEEPSEEK2: "deepseek2",
343
- MODEL_ARCH.CHATGLM: "chatglm",
344
- MODEL_ARCH.BITNET: "bitnet",
345
- MODEL_ARCH.T5: "t5",
346
- MODEL_ARCH.JAIS: "jais",
412
+ MODEL_ARCH.LLAMA: "llama",
413
+ MODEL_ARCH.DECI: "deci",
414
+ MODEL_ARCH.FALCON: "falcon",
415
+ MODEL_ARCH.BAICHUAN: "baichuan",
416
+ MODEL_ARCH.GROK: "grok",
417
+ MODEL_ARCH.GPT2: "gpt2",
418
+ MODEL_ARCH.GPTJ: "gptj",
419
+ MODEL_ARCH.GPTNEOX: "gptneox",
420
+ MODEL_ARCH.MPT: "mpt",
421
+ MODEL_ARCH.STARCODER: "starcoder",
422
+ MODEL_ARCH.REFACT: "refact",
423
+ MODEL_ARCH.BERT: "bert",
424
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
425
+ MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
426
+ MODEL_ARCH.BLOOM: "bloom",
427
+ MODEL_ARCH.STABLELM: "stablelm",
428
+ MODEL_ARCH.QWEN: "qwen",
429
+ MODEL_ARCH.QWEN2: "qwen2",
430
+ MODEL_ARCH.QWEN2MOE: "qwen2moe",
431
+ MODEL_ARCH.QWEN2VL: "qwen2vl",
432
+ MODEL_ARCH.PHI2: "phi2",
433
+ MODEL_ARCH.PHI3: "phi3",
434
+ MODEL_ARCH.PHIMOE: "phimoe",
435
+ MODEL_ARCH.PLAMO: "plamo",
436
+ MODEL_ARCH.CODESHELL: "codeshell",
437
+ MODEL_ARCH.ORION: "orion",
438
+ MODEL_ARCH.INTERNLM2: "internlm2",
439
+ MODEL_ARCH.MINICPM: "minicpm",
440
+ MODEL_ARCH.MINICPM3: "minicpm3",
441
+ MODEL_ARCH.GEMMA: "gemma",
442
+ MODEL_ARCH.GEMMA2: "gemma2",
443
+ MODEL_ARCH.STARCODER2: "starcoder2",
444
+ MODEL_ARCH.RWKV6: "rwkv6",
445
+ MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
446
+ MODEL_ARCH.MAMBA: "mamba",
447
+ MODEL_ARCH.XVERSE: "xverse",
448
+ MODEL_ARCH.COMMAND_R: "command-r",
449
+ MODEL_ARCH.COHERE2: "cohere2",
450
+ MODEL_ARCH.DBRX: "dbrx",
451
+ MODEL_ARCH.OLMO: "olmo",
452
+ MODEL_ARCH.OLMO2: "olmo2",
453
+ MODEL_ARCH.OLMOE: "olmoe",
454
+ MODEL_ARCH.OPENELM: "openelm",
455
+ MODEL_ARCH.ARCTIC: "arctic",
456
+ MODEL_ARCH.DEEPSEEK: "deepseek",
457
+ MODEL_ARCH.DEEPSEEK2: "deepseek2",
458
+ MODEL_ARCH.CHATGLM: "chatglm",
459
+ MODEL_ARCH.BITNET: "bitnet",
460
+ MODEL_ARCH.T5: "t5",
461
+ MODEL_ARCH.T5ENCODER: "t5encoder",
462
+ MODEL_ARCH.JAIS: "jais",
463
+ MODEL_ARCH.NEMOTRON: "nemotron",
464
+ MODEL_ARCH.EXAONE: "exaone",
465
+ MODEL_ARCH.GRANITE: "granite",
466
+ MODEL_ARCH.GRANITE_MOE: "granitemoe",
467
+ MODEL_ARCH.CHAMELEON: "chameleon",
468
+ MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
347
469
  }
348
470
 
349
471
  TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
350
- MODEL_TENSOR.TOKEN_EMBD: "token_embd",
351
- MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
352
- MODEL_TENSOR.TOKEN_TYPES: "token_types",
353
- MODEL_TENSOR.POS_EMBD: "position_embd",
354
- MODEL_TENSOR.OUTPUT_NORM: "output_norm",
355
- MODEL_TENSOR.OUTPUT: "output",
356
- MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
357
- MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
358
- MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
359
- MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
360
- MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
361
- MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
362
- MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
363
- MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
364
- MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
365
- MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
366
- MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
367
- MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
368
- MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
369
- MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
370
- MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
371
- MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
372
- MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
373
- MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
374
- MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
375
- MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
376
- MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
377
- MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
378
- MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
379
- MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
380
- MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
381
- MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
382
- MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
383
- MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
384
- MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
385
- MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
386
- MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
387
- MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
388
- MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
389
- MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
390
- MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
391
- MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
392
- MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
393
- MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
394
- MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
395
- MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
396
- MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
397
- MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
398
- MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
399
- MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
400
- MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
401
- MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
402
- MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
403
- MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
404
- MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
405
- MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
406
- MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
407
- MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
408
- MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
409
- MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
410
- MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
411
- MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
412
- MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
413
- MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
414
- MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
415
- MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
416
- MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
417
- MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
418
- MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
419
- MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
420
- MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
421
- MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
422
- MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
423
- MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
424
- MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
425
- MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
426
- MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
427
- MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
428
- MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
429
- MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
430
- MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
472
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
473
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
474
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
475
+ MODEL_TENSOR.POS_EMBD: "position_embd",
476
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
477
+ MODEL_TENSOR.OUTPUT: "output",
478
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
479
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
480
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
481
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
482
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
483
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
484
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
485
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
486
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
487
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
488
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
489
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
490
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
491
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
492
+ MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
493
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
494
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
495
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
496
+ MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
497
+ MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
498
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
499
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
500
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
501
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
502
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
503
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
504
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
505
+ MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
506
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
507
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
508
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
509
+ MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
510
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
511
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
512
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
513
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
514
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
515
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
516
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
517
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
518
+ MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
519
+ MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
520
+ MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
521
+ MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
522
+ MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
523
+ MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
524
+ MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
525
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
526
+ MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
527
+ MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
528
+ MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
529
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
530
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
531
+ MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
532
+ MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
533
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
534
+ MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
535
+ MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
536
+ MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
537
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
538
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
539
+ MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
540
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
541
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
542
+ MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
543
+ MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
544
+ MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
545
+ MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
546
+ MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
547
+ MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
548
+ MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
549
+ MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
550
+ MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
551
+ MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
552
+ MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
553
+ MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
554
+ MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
555
+ MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
556
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
557
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
558
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
559
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
560
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
561
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
562
+ MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
563
+ MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
564
+ MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
565
+ MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
566
+ MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
567
+ MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
568
+ MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
569
+ MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
570
+ MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
571
+ MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
572
+ MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
573
+ MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
574
+ MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
575
+ MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
576
+ MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
577
+ MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
578
+ MODEL_TENSOR.CLS: "cls",
579
+ MODEL_TENSOR.CLS_OUT: "cls.output",
580
+ MODEL_TENSOR.CONV1D: "conv1d",
581
+ MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
582
+ MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
583
+ MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
584
+ MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
585
+ MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
586
+ MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
587
+ MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
588
+ MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
589
+ MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
590
+ MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
591
+ MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
592
+ MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
593
+ MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
594
+ MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
595
+ MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
431
596
  }
432
597
 
433
598
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -451,6 +616,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
451
616
  MODEL_TENSOR.FFN_DOWN_EXP,
452
617
  MODEL_TENSOR.FFN_UP_EXP,
453
618
  ],
619
+ MODEL_ARCH.DECI: [
620
+ MODEL_TENSOR.TOKEN_EMBD,
621
+ MODEL_TENSOR.OUTPUT_NORM,
622
+ MODEL_TENSOR.OUTPUT,
623
+ MODEL_TENSOR.ROPE_FREQS,
624
+ MODEL_TENSOR.ATTN_NORM,
625
+ MODEL_TENSOR.ATTN_Q,
626
+ MODEL_TENSOR.ATTN_K,
627
+ MODEL_TENSOR.ATTN_V,
628
+ MODEL_TENSOR.ATTN_OUT,
629
+ MODEL_TENSOR.ATTN_ROT_EMBD,
630
+ MODEL_TENSOR.FFN_GATE_INP,
631
+ MODEL_TENSOR.FFN_NORM,
632
+ MODEL_TENSOR.FFN_GATE,
633
+ MODEL_TENSOR.FFN_DOWN,
634
+ MODEL_TENSOR.FFN_UP,
635
+ MODEL_TENSOR.FFN_GATE_EXP,
636
+ MODEL_TENSOR.FFN_DOWN_EXP,
637
+ MODEL_TENSOR.FFN_UP_EXP,
638
+ ],
454
639
  MODEL_ARCH.GROK: [
455
640
  MODEL_TENSOR.TOKEN_EMBD,
456
641
  MODEL_TENSOR.OUTPUT_NORM,
@@ -537,6 +722,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
537
722
  MODEL_TENSOR.FFN_DOWN,
538
723
  MODEL_TENSOR.FFN_UP,
539
724
  MODEL_TENSOR.LAYER_OUT_NORM,
725
+ MODEL_TENSOR.CLS,
726
+ MODEL_TENSOR.CLS_OUT,
540
727
  ],
541
728
  MODEL_ARCH.NOMIC_BERT: [
542
729
  MODEL_TENSOR.TOKEN_EMBD,
@@ -568,6 +755,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
568
755
  MODEL_TENSOR.FFN_GATE,
569
756
  MODEL_TENSOR.FFN_DOWN,
570
757
  MODEL_TENSOR.LAYER_OUT_NORM,
758
+ MODEL_TENSOR.CLS,
571
759
  ],
572
760
  MODEL_ARCH.MPT: [
573
761
  MODEL_TENSOR.TOKEN_EMBD,
@@ -654,6 +842,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
654
842
  MODEL_TENSOR.FFN_UP,
655
843
  ],
656
844
  MODEL_ARCH.QWEN2: [
845
+ MODEL_TENSOR.TOKEN_EMBD,
846
+ MODEL_TENSOR.OUTPUT_NORM,
847
+ MODEL_TENSOR.OUTPUT,
848
+ MODEL_TENSOR.ROPE_FREQS,
849
+ MODEL_TENSOR.ATTN_NORM,
850
+ MODEL_TENSOR.ATTN_Q,
851
+ MODEL_TENSOR.ATTN_K,
852
+ MODEL_TENSOR.ATTN_V,
853
+ MODEL_TENSOR.ATTN_OUT,
854
+ MODEL_TENSOR.FFN_NORM,
855
+ MODEL_TENSOR.FFN_GATE,
856
+ MODEL_TENSOR.FFN_DOWN,
857
+ MODEL_TENSOR.FFN_UP,
858
+ ],
859
+ MODEL_ARCH.QWEN2VL: [
657
860
  MODEL_TENSOR.TOKEN_EMBD,
658
861
  MODEL_TENSOR.OUTPUT_NORM,
659
862
  MODEL_TENSOR.OUTPUT,
@@ -731,6 +934,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
731
934
  MODEL_TENSOR.TOKEN_EMBD,
732
935
  MODEL_TENSOR.OUTPUT_NORM,
733
936
  MODEL_TENSOR.OUTPUT,
937
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
938
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
734
939
  MODEL_TENSOR.ATTN_NORM,
735
940
  MODEL_TENSOR.ATTN_QKV,
736
941
  MODEL_TENSOR.ATTN_Q,
@@ -741,6 +946,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
741
946
  MODEL_TENSOR.FFN_DOWN,
742
947
  MODEL_TENSOR.FFN_UP,
743
948
  ],
949
+ MODEL_ARCH.PHIMOE: [
950
+ MODEL_TENSOR.TOKEN_EMBD,
951
+ MODEL_TENSOR.OUTPUT_NORM,
952
+ MODEL_TENSOR.OUTPUT,
953
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
954
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
955
+ MODEL_TENSOR.ATTN_NORM,
956
+ MODEL_TENSOR.ATTN_QKV,
957
+ MODEL_TENSOR.ATTN_Q,
958
+ MODEL_TENSOR.ATTN_K,
959
+ MODEL_TENSOR.ATTN_V,
960
+ MODEL_TENSOR.ATTN_OUT,
961
+ MODEL_TENSOR.FFN_NORM,
962
+ MODEL_TENSOR.FFN_GATE_INP,
963
+ MODEL_TENSOR.FFN_GATE_EXP,
964
+ MODEL_TENSOR.FFN_DOWN_EXP,
965
+ MODEL_TENSOR.FFN_UP_EXP,
966
+ ],
744
967
  MODEL_ARCH.CODESHELL: [
745
968
  MODEL_TENSOR.TOKEN_EMBD,
746
969
  MODEL_TENSOR.POS_EMBD,
@@ -790,6 +1013,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
790
1013
  MODEL_TENSOR.OUTPUT,
791
1014
  MODEL_TENSOR.OUTPUT_NORM,
792
1015
  MODEL_TENSOR.ROPE_FREQS,
1016
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1017
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
793
1018
  MODEL_TENSOR.ATTN_NORM,
794
1019
  MODEL_TENSOR.ATTN_Q,
795
1020
  MODEL_TENSOR.ATTN_K,
@@ -805,6 +1030,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
805
1030
  MODEL_TENSOR.FFN_DOWN_EXP,
806
1031
  MODEL_TENSOR.FFN_UP_EXP,
807
1032
  ],
1033
+ MODEL_ARCH.MINICPM3: [
1034
+ MODEL_TENSOR.TOKEN_EMBD,
1035
+ MODEL_TENSOR.OUTPUT_NORM,
1036
+ MODEL_TENSOR.OUTPUT,
1037
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1038
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
1039
+ MODEL_TENSOR.ATTN_NORM,
1040
+ MODEL_TENSOR.ATTN_Q_A,
1041
+ MODEL_TENSOR.ATTN_Q_B,
1042
+ MODEL_TENSOR.ATTN_KV_A_MQA,
1043
+ MODEL_TENSOR.ATTN_KV_B,
1044
+ MODEL_TENSOR.ATTN_Q_A_NORM,
1045
+ MODEL_TENSOR.ATTN_KV_A_NORM,
1046
+ MODEL_TENSOR.ATTN_OUT,
1047
+ MODEL_TENSOR.FFN_NORM,
1048
+ MODEL_TENSOR.FFN_GATE,
1049
+ MODEL_TENSOR.FFN_DOWN,
1050
+ MODEL_TENSOR.FFN_UP,
1051
+ ],
808
1052
  MODEL_ARCH.GEMMA: [
809
1053
  MODEL_TENSOR.TOKEN_EMBD,
810
1054
  MODEL_TENSOR.OUTPUT_NORM,
@@ -848,6 +1092,67 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
848
1092
  MODEL_TENSOR.FFN_DOWN,
849
1093
  MODEL_TENSOR.FFN_UP,
850
1094
  ],
1095
+ MODEL_ARCH.RWKV6: [
1096
+ MODEL_TENSOR.TOKEN_EMBD,
1097
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1098
+ MODEL_TENSOR.OUTPUT_NORM,
1099
+ MODEL_TENSOR.OUTPUT,
1100
+ MODEL_TENSOR.ATTN_NORM,
1101
+ MODEL_TENSOR.ATTN_NORM_2,
1102
+ MODEL_TENSOR.TIME_MIX_W1,
1103
+ MODEL_TENSOR.TIME_MIX_W2,
1104
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1105
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1106
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1107
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1108
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1109
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1110
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1111
+ MODEL_TENSOR.TIME_MIX_FIRST,
1112
+ MODEL_TENSOR.TIME_MIX_DECAY,
1113
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1114
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1115
+ MODEL_TENSOR.TIME_MIX_KEY,
1116
+ MODEL_TENSOR.TIME_MIX_VALUE,
1117
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1118
+ MODEL_TENSOR.TIME_MIX_GATE,
1119
+ MODEL_TENSOR.TIME_MIX_LN,
1120
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1121
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K,
1122
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R,
1123
+ MODEL_TENSOR.CHANNEL_MIX_KEY,
1124
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
1125
+ MODEL_TENSOR.CHANNEL_MIX_VALUE,
1126
+ ],
1127
+ MODEL_ARCH.RWKV6QWEN2: [
1128
+ MODEL_TENSOR.TOKEN_EMBD,
1129
+ MODEL_TENSOR.OUTPUT_NORM,
1130
+ MODEL_TENSOR.OUTPUT,
1131
+ MODEL_TENSOR.ATTN_NORM,
1132
+ MODEL_TENSOR.TIME_MIX_W1,
1133
+ MODEL_TENSOR.TIME_MIX_W2,
1134
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1135
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1136
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1137
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1138
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1139
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1140
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1141
+ MODEL_TENSOR.TIME_MIX_FIRST,
1142
+ MODEL_TENSOR.TIME_MIX_DECAY,
1143
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1144
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1145
+ MODEL_TENSOR.TIME_MIX_KEY,
1146
+ MODEL_TENSOR.TIME_MIX_VALUE,
1147
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1148
+ MODEL_TENSOR.TIME_MIX_GATE,
1149
+ MODEL_TENSOR.TIME_MIX_LN,
1150
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1151
+ MODEL_TENSOR.FFN_NORM,
1152
+ MODEL_TENSOR.FFN_GATE,
1153
+ MODEL_TENSOR.FFN_DOWN,
1154
+ MODEL_TENSOR.FFN_UP,
1155
+ ],
851
1156
  MODEL_ARCH.MAMBA: [
852
1157
  MODEL_TENSOR.TOKEN_EMBD,
853
1158
  MODEL_TENSOR.OUTPUT_NORM,
@@ -891,6 +1196,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
891
1196
  MODEL_TENSOR.ATTN_K_NORM,
892
1197
  MODEL_TENSOR.ATTN_Q_NORM,
893
1198
  ],
1199
+ MODEL_ARCH.COHERE2: [
1200
+ MODEL_TENSOR.TOKEN_EMBD,
1201
+ MODEL_TENSOR.OUTPUT_NORM,
1202
+ MODEL_TENSOR.ATTN_NORM,
1203
+ MODEL_TENSOR.ATTN_Q,
1204
+ MODEL_TENSOR.ATTN_K,
1205
+ MODEL_TENSOR.ATTN_V,
1206
+ MODEL_TENSOR.ATTN_OUT,
1207
+ MODEL_TENSOR.FFN_GATE,
1208
+ MODEL_TENSOR.FFN_DOWN,
1209
+ MODEL_TENSOR.FFN_UP,
1210
+ ],
894
1211
  MODEL_ARCH.DBRX: [
895
1212
  MODEL_TENSOR.TOKEN_EMBD,
896
1213
  MODEL_TENSOR.OUTPUT_NORM,
@@ -915,6 +1232,39 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
915
1232
  MODEL_TENSOR.FFN_DOWN,
916
1233
  MODEL_TENSOR.FFN_UP,
917
1234
  ],
1235
+ MODEL_ARCH.OLMO2: [
1236
+ MODEL_TENSOR.TOKEN_EMBD,
1237
+ MODEL_TENSOR.OUTPUT_NORM,
1238
+ MODEL_TENSOR.OUTPUT,
1239
+ MODEL_TENSOR.ATTN_Q,
1240
+ MODEL_TENSOR.ATTN_K,
1241
+ MODEL_TENSOR.ATTN_V,
1242
+ MODEL_TENSOR.ATTN_OUT,
1243
+ MODEL_TENSOR.ATTN_POST_NORM,
1244
+ MODEL_TENSOR.ATTN_Q_NORM,
1245
+ MODEL_TENSOR.ATTN_K_NORM,
1246
+ MODEL_TENSOR.FFN_POST_NORM,
1247
+ MODEL_TENSOR.FFN_GATE,
1248
+ MODEL_TENSOR.FFN_DOWN,
1249
+ MODEL_TENSOR.FFN_UP,
1250
+ ],
1251
+ MODEL_ARCH.OLMOE: [
1252
+ MODEL_TENSOR.TOKEN_EMBD,
1253
+ MODEL_TENSOR.OUTPUT_NORM,
1254
+ MODEL_TENSOR.OUTPUT,
1255
+ MODEL_TENSOR.ATTN_OUT,
1256
+ MODEL_TENSOR.ATTN_Q,
1257
+ MODEL_TENSOR.ATTN_K,
1258
+ MODEL_TENSOR.ATTN_V,
1259
+ MODEL_TENSOR.ATTN_NORM,
1260
+ MODEL_TENSOR.ATTN_Q_NORM,
1261
+ MODEL_TENSOR.ATTN_K_NORM,
1262
+ MODEL_TENSOR.FFN_NORM,
1263
+ MODEL_TENSOR.FFN_GATE_INP,
1264
+ MODEL_TENSOR.FFN_GATE_EXP,
1265
+ MODEL_TENSOR.FFN_UP_EXP,
1266
+ MODEL_TENSOR.FFN_DOWN_EXP,
1267
+ ],
918
1268
  MODEL_ARCH.OPENELM: [
919
1269
  MODEL_TENSOR.TOKEN_EMBD,
920
1270
  MODEL_TENSOR.OUTPUT_NORM,
@@ -949,6 +1299,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
949
1299
  MODEL_TENSOR.FFN_DOWN_EXP,
950
1300
  MODEL_TENSOR.FFN_UP_EXP,
951
1301
  ],
1302
+ MODEL_ARCH.DEEPSEEK: [
1303
+ MODEL_TENSOR.TOKEN_EMBD,
1304
+ MODEL_TENSOR.OUTPUT_NORM,
1305
+ MODEL_TENSOR.OUTPUT,
1306
+ MODEL_TENSOR.ROPE_FREQS,
1307
+ MODEL_TENSOR.ATTN_NORM,
1308
+ MODEL_TENSOR.ATTN_Q,
1309
+ MODEL_TENSOR.ATTN_K,
1310
+ MODEL_TENSOR.ATTN_V,
1311
+ MODEL_TENSOR.ATTN_OUT,
1312
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1313
+ MODEL_TENSOR.FFN_GATE_INP,
1314
+ MODEL_TENSOR.FFN_NORM,
1315
+ MODEL_TENSOR.FFN_GATE,
1316
+ MODEL_TENSOR.FFN_DOWN,
1317
+ MODEL_TENSOR.FFN_UP,
1318
+ MODEL_TENSOR.FFN_GATE_EXP,
1319
+ MODEL_TENSOR.FFN_DOWN_EXP,
1320
+ MODEL_TENSOR.FFN_UP_EXP,
1321
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1322
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1323
+ MODEL_TENSOR.FFN_UP_SHEXP,
1324
+ ],
952
1325
  MODEL_ARCH.DEEPSEEK2: [
953
1326
  MODEL_TENSOR.TOKEN_EMBD,
954
1327
  MODEL_TENSOR.OUTPUT_NORM,
@@ -975,6 +1348,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
975
1348
  MODEL_TENSOR.FFN_GATE_SHEXP,
976
1349
  MODEL_TENSOR.FFN_DOWN_SHEXP,
977
1350
  MODEL_TENSOR.FFN_UP_SHEXP,
1351
+ MODEL_TENSOR.FFN_EXP_PROBS_B,
978
1352
  ],
979
1353
  MODEL_ARCH.CHATGLM : [
980
1354
  MODEL_TENSOR.TOKEN_EMBD,
@@ -983,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
983
1357
  MODEL_TENSOR.OUTPUT,
984
1358
  MODEL_TENSOR.ATTN_NORM,
985
1359
  MODEL_TENSOR.ATTN_QKV,
1360
+ MODEL_TENSOR.ATTN_Q,
1361
+ MODEL_TENSOR.ATTN_K,
1362
+ MODEL_TENSOR.ATTN_V,
986
1363
  MODEL_TENSOR.ATTN_OUT,
987
1364
  MODEL_TENSOR.FFN_NORM,
988
1365
  MODEL_TENSOR.FFN_DOWN,
@@ -1035,6 +1412,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1035
1412
  MODEL_TENSOR.ENC_FFN_UP,
1036
1413
  MODEL_TENSOR.ENC_OUTPUT_NORM,
1037
1414
  ],
1415
+ MODEL_ARCH.T5ENCODER: [
1416
+ MODEL_TENSOR.TOKEN_EMBD,
1417
+ MODEL_TENSOR.OUTPUT,
1418
+ MODEL_TENSOR.ENC_ATTN_NORM,
1419
+ MODEL_TENSOR.ENC_ATTN_Q,
1420
+ MODEL_TENSOR.ENC_ATTN_K,
1421
+ MODEL_TENSOR.ENC_ATTN_V,
1422
+ MODEL_TENSOR.ENC_ATTN_OUT,
1423
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1424
+ MODEL_TENSOR.ENC_FFN_NORM,
1425
+ MODEL_TENSOR.ENC_FFN_GATE,
1426
+ MODEL_TENSOR.ENC_FFN_DOWN,
1427
+ MODEL_TENSOR.ENC_FFN_UP,
1428
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1429
+ ],
1038
1430
  MODEL_ARCH.JAIS: [
1039
1431
  MODEL_TENSOR.TOKEN_EMBD,
1040
1432
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1047,6 +1439,104 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1047
1439
  MODEL_TENSOR.FFN_GATE,
1048
1440
  MODEL_TENSOR.FFN_UP,
1049
1441
  ],
1442
+ MODEL_ARCH.NEMOTRON: [
1443
+ MODEL_TENSOR.TOKEN_EMBD,
1444
+ MODEL_TENSOR.OUTPUT_NORM,
1445
+ MODEL_TENSOR.OUTPUT,
1446
+ MODEL_TENSOR.ROPE_FREQS,
1447
+ MODEL_TENSOR.ATTN_NORM,
1448
+ MODEL_TENSOR.ATTN_Q,
1449
+ MODEL_TENSOR.ATTN_K,
1450
+ MODEL_TENSOR.ATTN_V,
1451
+ MODEL_TENSOR.ATTN_OUT,
1452
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1453
+ MODEL_TENSOR.FFN_NORM,
1454
+ MODEL_TENSOR.FFN_DOWN,
1455
+ MODEL_TENSOR.FFN_UP,
1456
+ ],
1457
+ MODEL_ARCH.EXAONE: [
1458
+ MODEL_TENSOR.TOKEN_EMBD,
1459
+ MODEL_TENSOR.OUTPUT_NORM,
1460
+ MODEL_TENSOR.OUTPUT,
1461
+ MODEL_TENSOR.ROPE_FREQS,
1462
+ MODEL_TENSOR.ATTN_NORM,
1463
+ MODEL_TENSOR.ATTN_Q,
1464
+ MODEL_TENSOR.ATTN_K,
1465
+ MODEL_TENSOR.ATTN_V,
1466
+ MODEL_TENSOR.ATTN_OUT,
1467
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1468
+ MODEL_TENSOR.FFN_NORM,
1469
+ MODEL_TENSOR.FFN_GATE,
1470
+ MODEL_TENSOR.FFN_DOWN,
1471
+ MODEL_TENSOR.FFN_UP,
1472
+ ],
1473
+ MODEL_ARCH.GRANITE: [
1474
+ MODEL_TENSOR.TOKEN_EMBD,
1475
+ MODEL_TENSOR.OUTPUT_NORM,
1476
+ MODEL_TENSOR.OUTPUT,
1477
+ MODEL_TENSOR.ATTN_NORM,
1478
+ MODEL_TENSOR.ATTN_Q,
1479
+ MODEL_TENSOR.ATTN_K,
1480
+ MODEL_TENSOR.ATTN_V,
1481
+ MODEL_TENSOR.ATTN_OUT,
1482
+ MODEL_TENSOR.FFN_NORM,
1483
+ MODEL_TENSOR.FFN_GATE,
1484
+ MODEL_TENSOR.FFN_DOWN,
1485
+ MODEL_TENSOR.FFN_UP,
1486
+ ],
1487
+ MODEL_ARCH.GRANITE_MOE: [
1488
+ MODEL_TENSOR.TOKEN_EMBD,
1489
+ MODEL_TENSOR.OUTPUT_NORM,
1490
+ MODEL_TENSOR.OUTPUT,
1491
+ MODEL_TENSOR.ATTN_NORM,
1492
+ MODEL_TENSOR.ATTN_Q,
1493
+ MODEL_TENSOR.ATTN_K,
1494
+ MODEL_TENSOR.ATTN_V,
1495
+ MODEL_TENSOR.ATTN_OUT,
1496
+ MODEL_TENSOR.FFN_NORM,
1497
+ MODEL_TENSOR.FFN_GATE_INP,
1498
+ MODEL_TENSOR.FFN_GATE_EXP,
1499
+ MODEL_TENSOR.FFN_DOWN_EXP,
1500
+ MODEL_TENSOR.FFN_UP_EXP,
1501
+ ],
1502
+ MODEL_ARCH.CHAMELEON: [
1503
+ MODEL_TENSOR.TOKEN_EMBD,
1504
+ MODEL_TENSOR.OUTPUT_NORM,
1505
+ MODEL_TENSOR.OUTPUT,
1506
+ MODEL_TENSOR.ATTN_NORM,
1507
+ MODEL_TENSOR.ATTN_Q,
1508
+ MODEL_TENSOR.ATTN_Q_NORM,
1509
+ MODEL_TENSOR.ATTN_K,
1510
+ MODEL_TENSOR.ATTN_K_NORM,
1511
+ MODEL_TENSOR.ATTN_V,
1512
+ MODEL_TENSOR.ATTN_OUT,
1513
+ MODEL_TENSOR.FFN_NORM,
1514
+ MODEL_TENSOR.FFN_GATE,
1515
+ MODEL_TENSOR.FFN_DOWN,
1516
+ MODEL_TENSOR.FFN_UP,
1517
+ ],
1518
+ MODEL_ARCH.WAVTOKENIZER_DEC: [
1519
+ MODEL_TENSOR.TOKEN_EMBD,
1520
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1521
+ MODEL_TENSOR.CONV1D,
1522
+ MODEL_TENSOR.CONVNEXT_DW,
1523
+ MODEL_TENSOR.CONVNEXT_NORM,
1524
+ MODEL_TENSOR.CONVNEXT_PW1,
1525
+ MODEL_TENSOR.CONVNEXT_PW2,
1526
+ MODEL_TENSOR.CONVNEXT_GAMMA,
1527
+ MODEL_TENSOR.OUTPUT,
1528
+ MODEL_TENSOR.OUTPUT_NORM,
1529
+ MODEL_TENSOR.POSNET_CONV1,
1530
+ MODEL_TENSOR.POSNET_CONV2,
1531
+ MODEL_TENSOR.POSNET_NORM,
1532
+ MODEL_TENSOR.POSNET_NORM1,
1533
+ MODEL_TENSOR.POSNET_NORM2,
1534
+ MODEL_TENSOR.POSNET_ATTN_NORM,
1535
+ MODEL_TENSOR.POSNET_ATTN_Q,
1536
+ MODEL_TENSOR.POSNET_ATTN_K,
1537
+ MODEL_TENSOR.POSNET_ATTN_V,
1538
+ MODEL_TENSOR.POSNET_ATTN_OUT,
1539
+ ],
1050
1540
  # TODO
1051
1541
  }
1052
1542
 
@@ -1056,6 +1546,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1056
1546
  MODEL_TENSOR.ROPE_FREQS,
1057
1547
  MODEL_TENSOR.ATTN_ROT_EMBD,
1058
1548
  ],
1549
+ MODEL_ARCH.DECI: [
1550
+ MODEL_TENSOR.ROPE_FREQS,
1551
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1552
+ ],
1059
1553
  MODEL_ARCH.BAICHUAN: [
1060
1554
  MODEL_TENSOR.ROPE_FREQS,
1061
1555
  MODEL_TENSOR.ATTN_ROT_EMBD,
@@ -1080,6 +1574,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1080
1574
  MODEL_TENSOR.ROPE_FREQS,
1081
1575
  MODEL_TENSOR.ATTN_ROT_EMBD,
1082
1576
  ],
1577
+ MODEL_ARCH.DEEPSEEK: [
1578
+ MODEL_TENSOR.ROPE_FREQS,
1579
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1580
+ ],
1083
1581
  MODEL_ARCH.DEEPSEEK2: [
1084
1582
  MODEL_TENSOR.ROPE_FREQS,
1085
1583
  MODEL_TENSOR.ATTN_ROT_EMBD,
@@ -1087,6 +1585,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1087
1585
  MODEL_ARCH.CHATGLM: [
1088
1586
  MODEL_TENSOR.ROPE_FREQS,
1089
1587
  ],
1588
+ MODEL_ARCH.NEMOTRON: [
1589
+ MODEL_TENSOR.ROPE_FREQS,
1590
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1591
+ ],
1090
1592
  }
1091
1593
 
1092
1594
  #
@@ -1104,9 +1606,10 @@ class TokenType(IntEnum):
1104
1606
 
1105
1607
 
1106
1608
  class RopeScalingType(Enum):
1107
- NONE = 'none'
1108
- LINEAR = 'linear'
1109
- YARN = 'yarn'
1609
+ NONE = 'none'
1610
+ LINEAR = 'linear'
1611
+ YARN = 'yarn'
1612
+ LONGROPE = 'longrope'
1110
1613
 
1111
1614
 
1112
1615
  class PoolingType(IntEnum):
@@ -1145,6 +1648,13 @@ class GGMLQuantizationType(IntEnum):
1145
1648
  F64 = 28
1146
1649
  IQ1_M = 29
1147
1650
  BF16 = 30
1651
+ TQ1_0 = 34
1652
+ TQ2_0 = 35
1653
+
1654
+
1655
+ class ExpertGatingFuncType(IntEnum):
1656
+ SOFTMAX = 1
1657
+ SIGMOID = 2
1148
1658
 
1149
1659
 
1150
1660
  # TODO: add GGMLFileType from ggml_ftype in ggml.h
@@ -1157,7 +1667,7 @@ class LlamaFileType(IntEnum):
1157
1667
  MOSTLY_F16 = 1 # except 1d tensors
1158
1668
  MOSTLY_Q4_0 = 2 # except 1d tensors
1159
1669
  MOSTLY_Q4_1 = 3 # except 1d tensors
1160
- MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
1670
+ # MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
1161
1671
  # MOSTLY_Q4_2 = 5 # support has been removed
1162
1672
  # MOSTLY_Q4_3 = 6 # support has been removed
1163
1673
  MOSTLY_Q8_0 = 7 # except 1d tensors
@@ -1186,6 +1696,11 @@ class LlamaFileType(IntEnum):
1186
1696
  MOSTLY_IQ4_XS = 30 # except 1d tensors
1187
1697
  MOSTLY_IQ1_M = 31 # except 1d tensors
1188
1698
  MOSTLY_BF16 = 32 # except 1d tensors
1699
+ # MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
1700
+ # MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
1701
+ # MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
1702
+ MOSTLY_TQ1_0 = 36 # except 1d tensors
1703
+ MOSTLY_TQ2_0 = 37 # except 1d tensors
1189
1704
 
1190
1705
  GUESSED = 1024 # not specified in the model file
1191
1706
 
@@ -1259,6 +1774,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
1259
1774
  GGMLQuantizationType.F64: (1, 8),
1260
1775
  GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
1261
1776
  GGMLQuantizationType.BF16: (1, 2),
1777
+ GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
1778
+ GGMLQuantizationType.TQ2_0: (256, 2 + 64),
1262
1779
  }
1263
1780
 
1264
1781
 
@@ -1306,6 +1823,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
1306
1823
  KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
1307
1824
  KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
1308
1825
  KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
1826
+ KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
1309
1827
 
1310
1828
  # tokenization
1311
1829
  KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
@@ -1316,14 +1834,23 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
1316
1834
  KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
1317
1835
  KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
1318
1836
  KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
1837
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
1838
+ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
1319
1839
  KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
1320
1840
  KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
1321
1841
  KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
1322
- KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
1323
1842
  KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
1324
1843
  KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
1325
1844
  KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
1326
- KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
1845
+
1846
+ KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
1847
+ KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
1848
+ KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
1849
+ KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
1850
+ KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
1851
+ KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
1852
+
1853
+ # deprecated
1854
+ KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
1327
1855
  KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
1328
1856
  KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
1329
- KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID