bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +1196 -147
  2. bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
  4. bigdl/cpp/convert_lora_to_gguf.py +82 -14
  5. bigdl/cpp/gguf-py/gguf/constants.py +645 -187
  6. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  7. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  8. bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
  9. bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
  10. bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
  11. bigdl/cpp/gguf-py/gguf/quants.py +81 -0
  12. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
  13. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  14. bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
  15. bigdl/cpp/libs/common.lib +0 -0
  16. bigdl/cpp/libs/ggml-base.dll +0 -0
  17. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  18. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  19. bigdl/cpp/libs/ggml.dll +0 -0
  20. bigdl/cpp/libs/libc++.dll +0 -0
  21. bigdl/cpp/libs/llama-batched.exe +0 -0
  22. bigdl/cpp/libs/llama-bench.exe +0 -0
  23. bigdl/cpp/libs/llama-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-embedding.exe +0 -0
  25. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  26. bigdl/cpp/libs/llama-gguf.exe +0 -0
  27. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  28. bigdl/cpp/libs/llama-lookup.exe +0 -0
  29. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  30. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  31. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  32. bigdl/cpp/libs/llama-quantize.exe +0 -0
  33. bigdl/cpp/libs/llama-server.exe +0 -0
  34. bigdl/cpp/libs/llama-simple.exe +0 -0
  35. bigdl/cpp/libs/llama-speculative.exe +0 -0
  36. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  37. bigdl/cpp/libs/llama.dll +0 -0
  38. bigdl/cpp/libs/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  42. bigdl/cpp/libs/ollama-lib.exe +0 -0
  43. bigdl/cpp/libs/ollama.exe +0 -0
  44. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  45. bigdl/cpp/libs/ollama_llama.dll +0 -0
  46. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  47. bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
  48. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
  49. bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
  50. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
  51. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
  52. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
  53. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
  54. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
  55. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
  56. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
  57. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
  58. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
  59. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
  60. bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
  61. bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
  62. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
  63. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
  64. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -64,20 +64,33 @@ class Keys:
64
64
  BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65
65
  BASE_MODEL_VERSION = "general.base_model.{id}.version"
66
66
  BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67
+ BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
67
68
  BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
68
69
  BASE_MODEL_DOI = "general.base_model.{id}.doi"
69
70
  BASE_MODEL_UUID = "general.base_model.{id}.uuid"
70
71
  BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
71
72
 
73
+ # Dataset Source
74
+ DATASET_COUNT = "general.dataset.count"
75
+ DATASET_NAME = "general.dataset.{id}.name"
76
+ DATASET_AUTHOR = "general.dataset.{id}.author"
77
+ DATASET_VERSION = "general.dataset.{id}.version"
78
+ DATASET_ORGANIZATION = "general.dataset.{id}.organization"
79
+ DATASET_DESCRIPTION = "general.dataset.{id}.description"
80
+ DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
81
+ DATASET_DOI = "general.dataset.{id}.doi"
82
+ DATASET_UUID = "general.dataset.{id}.uuid"
83
+ DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
84
+
72
85
  # Array based KV stores
73
86
  TAGS = "general.tags"
74
87
  LANGUAGES = "general.languages"
75
- DATASETS = "general.datasets"
76
88
 
77
89
  class LLM:
78
90
  VOCAB_SIZE = "{arch}.vocab_size"
79
91
  CONTEXT_LENGTH = "{arch}.context_length"
80
92
  EMBEDDING_LENGTH = "{arch}.embedding_length"
93
+ FEATURES_LENGTH = "{arch}.features_length"
81
94
  BLOCK_COUNT = "{arch}.block_count"
82
95
  LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
83
96
  FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
@@ -89,11 +102,20 @@ class Keys:
89
102
  EXPERT_USED_COUNT = "{arch}.expert_used_count"
90
103
  EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
91
104
  EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
105
+ EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106
+ EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
92
107
  POOLING_TYPE = "{arch}.pooling_type"
93
108
  LOGIT_SCALE = "{arch}.logit_scale"
94
109
  DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
95
110
  ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
96
111
  FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
112
+ SWIN_NORM = "{arch}.swin_norm"
113
+ RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
114
+ TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
115
+ TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
116
+ RESIDUAL_SCALE = "{arch}.residual_scale"
117
+ EMBEDDING_SCALE = "{arch}.embedding_scale"
118
+ TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
97
119
 
98
120
  class Attention:
99
121
  HEAD_COUNT = "{arch}.attention.head_count"
@@ -104,14 +126,18 @@ class Keys:
104
126
  VALUE_LENGTH = "{arch}.attention.value_length"
105
127
  LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
106
128
  LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
129
+ GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
130
+ GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
107
131
  CAUSAL = "{arch}.attention.causal"
108
132
  Q_LORA_RANK = "{arch}.attention.q_lora_rank"
109
133
  KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
110
134
  REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
111
135
  SLIDING_WINDOW = "{arch}.attention.sliding_window"
136
+ SCALE = "{arch}.attention.scale"
112
137
 
113
138
  class Rope:
114
139
  DIMENSION_COUNT = "{arch}.rope.dimension_count"
140
+ DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
115
141
  FREQ_BASE = "{arch}.rope.freq_base"
116
142
  SCALING_TYPE = "{arch}.rope.scaling.type"
117
143
  SCALING_FACTOR = "{arch}.rope.scaling.factor"
@@ -132,6 +158,17 @@ class Keys:
132
158
  TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
133
159
  DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
134
160
 
161
+ class WKV:
162
+ HEAD_SIZE = "{arch}.wkv.head_size"
163
+
164
+ class PosNet:
165
+ EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
166
+ BLOCK_COUNT = "{arch}.posnet.block_count"
167
+
168
+ class ConvNext:
169
+ EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
170
+ BLOCK_COUNT = "{arch}.convnext.block_count"
171
+
135
172
  class Tokenizer:
136
173
  MODEL = "tokenizer.ggml.model"
137
174
  PRE = "tokenizer.ggml.pre"
@@ -142,10 +179,11 @@ class Keys:
142
179
  MERGES = "tokenizer.ggml.merges"
143
180
  BOS_ID = "tokenizer.ggml.bos_token_id"
144
181
  EOS_ID = "tokenizer.ggml.eos_token_id"
182
+ EOT_ID = "tokenizer.ggml.eot_token_id"
183
+ EOM_ID = "tokenizer.ggml.eom_token_id"
145
184
  UNK_ID = "tokenizer.ggml.unknown_token_id"
146
185
  SEP_ID = "tokenizer.ggml.seperator_token_id"
147
186
  PAD_ID = "tokenizer.ggml.padding_token_id"
148
- CLS_ID = "tokenizer.ggml.cls_token_id"
149
187
  MASK_ID = "tokenizer.ggml.mask_token_id"
150
188
  ADD_BOS = "tokenizer.ggml.add_bos_token"
151
189
  ADD_EOS = "tokenizer.ggml.add_eos_token"
@@ -158,11 +196,16 @@ class Keys:
158
196
  CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
159
197
  CHAT_TEMPLATES = "tokenizer.chat_templates"
160
198
  # FIM/Infill special tokens constants
199
+ FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
200
+ FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
201
+ FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
202
+ FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
203
+ FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
204
+ FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
205
+ # deprecated:
161
206
  PREFIX_ID = "tokenizer.ggml.prefix_token_id"
162
207
  SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
163
208
  MIDDLE_ID = "tokenizer.ggml.middle_token_id"
164
- EOT_ID = "tokenizer.ggml.eot_token_id"
165
- EOM_ID = "tokenizer.ggml.eom_token_id"
166
209
 
167
210
  class Adapter:
168
211
  TYPE = "adapter.type"
@@ -179,49 +222,63 @@ class GGUFType:
179
222
 
180
223
 
181
224
  class MODEL_ARCH(IntEnum):
182
- LLAMA = auto()
183
- FALCON = auto()
184
- BAICHUAN = auto()
185
- GROK = auto()
186
- GPT2 = auto()
187
- GPTJ = auto()
188
- GPTNEOX = auto()
189
- MPT = auto()
190
- STARCODER = auto()
191
- REFACT = auto()
192
- BERT = auto()
193
- NOMIC_BERT = auto()
194
- JINA_BERT_V2 = auto()
195
- BLOOM = auto()
196
- STABLELM = auto()
197
- QWEN = auto()
198
- QWEN2 = auto()
199
- QWEN2MOE = auto()
200
- PHI2 = auto()
201
- PHI3 = auto()
202
- PLAMO = auto()
203
- CODESHELL = auto()
204
- ORION = auto()
205
- INTERNLM2 = auto()
206
- MINICPM = auto()
207
- GEMMA = auto()
208
- GEMMA2 = auto()
209
- STARCODER2 = auto()
210
- MAMBA = auto()
211
- XVERSE = auto()
212
- COMMAND_R = auto()
213
- DBRX = auto()
214
- OLMO = auto()
215
- OPENELM = auto()
216
- ARCTIC = auto()
217
- DEEPSEEK2 = auto()
218
- CHATGLM = auto()
219
- BITNET = auto()
220
- T5 = auto()
221
- T5ENCODER = auto()
222
- JAIS = auto()
223
- NEMOTRON = auto()
224
- EXAONE = auto()
225
+ LLAMA = auto()
226
+ DECI = auto()
227
+ FALCON = auto()
228
+ BAICHUAN = auto()
229
+ GROK = auto()
230
+ GPT2 = auto()
231
+ GPTJ = auto()
232
+ GPTNEOX = auto()
233
+ MPT = auto()
234
+ STARCODER = auto()
235
+ REFACT = auto()
236
+ BERT = auto()
237
+ NOMIC_BERT = auto()
238
+ JINA_BERT_V2 = auto()
239
+ BLOOM = auto()
240
+ STABLELM = auto()
241
+ QWEN = auto()
242
+ QWEN2 = auto()
243
+ QWEN2MOE = auto()
244
+ QWEN2VL = auto()
245
+ PHI2 = auto()
246
+ PHI3 = auto()
247
+ PHIMOE = auto()
248
+ PLAMO = auto()
249
+ CODESHELL = auto()
250
+ ORION = auto()
251
+ INTERNLM2 = auto()
252
+ MINICPM = auto()
253
+ MINICPM3 = auto()
254
+ GEMMA = auto()
255
+ GEMMA2 = auto()
256
+ STARCODER2 = auto()
257
+ RWKV6 = auto()
258
+ RWKV6QWEN2 = auto()
259
+ MAMBA = auto()
260
+ XVERSE = auto()
261
+ COMMAND_R = auto()
262
+ COHERE2 = auto()
263
+ DBRX = auto()
264
+ OLMO = auto()
265
+ OLMO2 = auto()
266
+ OLMOE = auto()
267
+ OPENELM = auto()
268
+ ARCTIC = auto()
269
+ DEEPSEEK = auto()
270
+ DEEPSEEK2 = auto()
271
+ CHATGLM = auto()
272
+ BITNET = auto()
273
+ T5 = auto()
274
+ T5ENCODER = auto()
275
+ JAIS = auto()
276
+ NEMOTRON = auto()
277
+ EXAONE = auto()
278
+ GRANITE = auto()
279
+ GRANITE_MOE = auto()
280
+ CHAMELEON = auto()
281
+ WAVTOKENIZER_DEC = auto()
225
282
 
226
283
 
227
284
  class MODEL_TENSOR(IntEnum):
@@ -260,6 +317,7 @@ class MODEL_TENSOR(IntEnum):
260
317
  FFN_GATE_SHEXP = auto()
261
318
  FFN_DOWN_SHEXP = auto()
262
319
  FFN_UP_SHEXP = auto()
320
+ FFN_EXP_PROBS_B = auto()
263
321
  ATTN_Q_NORM = auto()
264
322
  ATTN_K_NORM = auto()
265
323
  LAYER_OUT_NORM = auto()
@@ -270,6 +328,30 @@ class MODEL_TENSOR(IntEnum):
270
328
  SSM_A = auto()
271
329
  SSM_D = auto()
272
330
  SSM_OUT = auto()
331
+ TIME_MIX_W1 = auto()
332
+ TIME_MIX_W2 = auto()
333
+ TIME_MIX_LERP_X = auto()
334
+ TIME_MIX_LERP_K = auto()
335
+ TIME_MIX_LERP_V = auto()
336
+ TIME_MIX_LERP_R = auto()
337
+ TIME_MIX_LERP_G = auto()
338
+ TIME_MIX_LERP_FUSED = auto()
339
+ TIME_MIX_LERP_W = auto()
340
+ TIME_MIX_FIRST = auto()
341
+ TIME_MIX_DECAY = auto()
342
+ TIME_MIX_DECAY_W1 = auto()
343
+ TIME_MIX_DECAY_W2 = auto()
344
+ TIME_MIX_KEY = auto()
345
+ TIME_MIX_VALUE = auto()
346
+ TIME_MIX_RECEPTANCE = auto()
347
+ TIME_MIX_GATE = auto()
348
+ TIME_MIX_LN = auto()
349
+ TIME_MIX_OUTPUT = auto()
350
+ CHANNEL_MIX_LERP_K = auto()
351
+ CHANNEL_MIX_LERP_R = auto()
352
+ CHANNEL_MIX_KEY = auto()
353
+ CHANNEL_MIX_RECEPTANCE = auto()
354
+ CHANNEL_MIX_VALUE = auto()
273
355
  ATTN_Q_A = auto()
274
356
  ATTN_Q_B = auto()
275
357
  ATTN_KV_A_MQA = auto()
@@ -306,136 +388,211 @@ class MODEL_TENSOR(IntEnum):
306
388
  ENC_FFN_DOWN = auto()
307
389
  ENC_FFN_UP = auto()
308
390
  ENC_OUTPUT_NORM = auto()
391
+ CLS = auto() # classifier
392
+ CLS_OUT = auto() # classifier output projection
393
+ CONV1D = auto()
394
+ CONVNEXT_DW = auto()
395
+ CONVNEXT_NORM = auto()
396
+ CONVNEXT_PW1 = auto()
397
+ CONVNEXT_PW2 = auto()
398
+ CONVNEXT_GAMMA = auto()
399
+ POSNET_CONV1 = auto()
400
+ POSNET_CONV2 = auto()
401
+ POSNET_NORM = auto()
402
+ POSNET_NORM1 = auto()
403
+ POSNET_NORM2 = auto()
404
+ POSNET_ATTN_NORM = auto()
405
+ POSNET_ATTN_Q = auto()
406
+ POSNET_ATTN_K = auto()
407
+ POSNET_ATTN_V = auto()
408
+ POSNET_ATTN_OUT = auto()
309
409
 
310
410
 
311
411
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
312
- MODEL_ARCH.LLAMA: "llama",
313
- MODEL_ARCH.FALCON: "falcon",
314
- MODEL_ARCH.BAICHUAN: "baichuan",
315
- MODEL_ARCH.GROK: "grok",
316
- MODEL_ARCH.GPT2: "gpt2",
317
- MODEL_ARCH.GPTJ: "gptj",
318
- MODEL_ARCH.GPTNEOX: "gptneox",
319
- MODEL_ARCH.MPT: "mpt",
320
- MODEL_ARCH.STARCODER: "starcoder",
321
- MODEL_ARCH.REFACT: "refact",
322
- MODEL_ARCH.BERT: "bert",
323
- MODEL_ARCH.NOMIC_BERT: "nomic-bert",
324
- MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
325
- MODEL_ARCH.BLOOM: "bloom",
326
- MODEL_ARCH.STABLELM: "stablelm",
327
- MODEL_ARCH.QWEN: "qwen",
328
- MODEL_ARCH.QWEN2: "qwen2",
329
- MODEL_ARCH.QWEN2MOE: "qwen2moe",
330
- MODEL_ARCH.PHI2: "phi2",
331
- MODEL_ARCH.PHI3: "phi3",
332
- MODEL_ARCH.PLAMO: "plamo",
333
- MODEL_ARCH.CODESHELL: "codeshell",
334
- MODEL_ARCH.ORION: "orion",
335
- MODEL_ARCH.INTERNLM2: "internlm2",
336
- MODEL_ARCH.MINICPM: "minicpm",
337
- MODEL_ARCH.GEMMA: "gemma",
338
- MODEL_ARCH.GEMMA2: "gemma2",
339
- MODEL_ARCH.STARCODER2: "starcoder2",
340
- MODEL_ARCH.MAMBA: "mamba",
341
- MODEL_ARCH.XVERSE: "xverse",
342
- MODEL_ARCH.COMMAND_R: "command-r",
343
- MODEL_ARCH.DBRX: "dbrx",
344
- MODEL_ARCH.OLMO: "olmo",
345
- MODEL_ARCH.OPENELM: "openelm",
346
- MODEL_ARCH.ARCTIC: "arctic",
347
- MODEL_ARCH.DEEPSEEK2: "deepseek2",
348
- MODEL_ARCH.CHATGLM: "chatglm",
349
- MODEL_ARCH.BITNET: "bitnet",
350
- MODEL_ARCH.T5: "t5",
351
- MODEL_ARCH.T5ENCODER: "t5encoder",
352
- MODEL_ARCH.JAIS: "jais",
353
- MODEL_ARCH.NEMOTRON: "nemotron",
354
- MODEL_ARCH.EXAONE: "exaone",
412
+ MODEL_ARCH.LLAMA: "llama",
413
+ MODEL_ARCH.DECI: "deci",
414
+ MODEL_ARCH.FALCON: "falcon",
415
+ MODEL_ARCH.BAICHUAN: "baichuan",
416
+ MODEL_ARCH.GROK: "grok",
417
+ MODEL_ARCH.GPT2: "gpt2",
418
+ MODEL_ARCH.GPTJ: "gptj",
419
+ MODEL_ARCH.GPTNEOX: "gptneox",
420
+ MODEL_ARCH.MPT: "mpt",
421
+ MODEL_ARCH.STARCODER: "starcoder",
422
+ MODEL_ARCH.REFACT: "refact",
423
+ MODEL_ARCH.BERT: "bert",
424
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
425
+ MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
426
+ MODEL_ARCH.BLOOM: "bloom",
427
+ MODEL_ARCH.STABLELM: "stablelm",
428
+ MODEL_ARCH.QWEN: "qwen",
429
+ MODEL_ARCH.QWEN2: "qwen2",
430
+ MODEL_ARCH.QWEN2MOE: "qwen2moe",
431
+ MODEL_ARCH.QWEN2VL: "qwen2vl",
432
+ MODEL_ARCH.PHI2: "phi2",
433
+ MODEL_ARCH.PHI3: "phi3",
434
+ MODEL_ARCH.PHIMOE: "phimoe",
435
+ MODEL_ARCH.PLAMO: "plamo",
436
+ MODEL_ARCH.CODESHELL: "codeshell",
437
+ MODEL_ARCH.ORION: "orion",
438
+ MODEL_ARCH.INTERNLM2: "internlm2",
439
+ MODEL_ARCH.MINICPM: "minicpm",
440
+ MODEL_ARCH.MINICPM3: "minicpm3",
441
+ MODEL_ARCH.GEMMA: "gemma",
442
+ MODEL_ARCH.GEMMA2: "gemma2",
443
+ MODEL_ARCH.STARCODER2: "starcoder2",
444
+ MODEL_ARCH.RWKV6: "rwkv6",
445
+ MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
446
+ MODEL_ARCH.MAMBA: "mamba",
447
+ MODEL_ARCH.XVERSE: "xverse",
448
+ MODEL_ARCH.COMMAND_R: "command-r",
449
+ MODEL_ARCH.COHERE2: "cohere2",
450
+ MODEL_ARCH.DBRX: "dbrx",
451
+ MODEL_ARCH.OLMO: "olmo",
452
+ MODEL_ARCH.OLMO2: "olmo2",
453
+ MODEL_ARCH.OLMOE: "olmoe",
454
+ MODEL_ARCH.OPENELM: "openelm",
455
+ MODEL_ARCH.ARCTIC: "arctic",
456
+ MODEL_ARCH.DEEPSEEK: "deepseek",
457
+ MODEL_ARCH.DEEPSEEK2: "deepseek2",
458
+ MODEL_ARCH.CHATGLM: "chatglm",
459
+ MODEL_ARCH.BITNET: "bitnet",
460
+ MODEL_ARCH.T5: "t5",
461
+ MODEL_ARCH.T5ENCODER: "t5encoder",
462
+ MODEL_ARCH.JAIS: "jais",
463
+ MODEL_ARCH.NEMOTRON: "nemotron",
464
+ MODEL_ARCH.EXAONE: "exaone",
465
+ MODEL_ARCH.GRANITE: "granite",
466
+ MODEL_ARCH.GRANITE_MOE: "granitemoe",
467
+ MODEL_ARCH.CHAMELEON: "chameleon",
468
+ MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
355
469
  }
356
470
 
357
471
  TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
358
- MODEL_TENSOR.TOKEN_EMBD: "token_embd",
359
- MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
360
- MODEL_TENSOR.TOKEN_TYPES: "token_types",
361
- MODEL_TENSOR.POS_EMBD: "position_embd",
362
- MODEL_TENSOR.OUTPUT_NORM: "output_norm",
363
- MODEL_TENSOR.OUTPUT: "output",
364
- MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
365
- MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
366
- MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
367
- MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
368
- MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
369
- MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
370
- MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
371
- MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
372
- MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
373
- MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
374
- MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
375
- MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
376
- MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
377
- MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
378
- MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
379
- MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
380
- MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
381
- MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
382
- MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
383
- MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
384
- MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
385
- MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
386
- MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
387
- MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
388
- MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
389
- MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
390
- MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
391
- MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
392
- MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
393
- MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
394
- MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
395
- MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
396
- MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
397
- MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
398
- MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
399
- MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
400
- MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
401
- MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
402
- MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
403
- MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
404
- MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
405
- MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
406
- MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
407
- MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
408
- MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
409
- MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
410
- MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
411
- MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
412
- MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
413
- MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
414
- MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
415
- MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
416
- MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
417
- MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
418
- MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
419
- MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
420
- MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
421
- MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
422
- MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
423
- MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
424
- MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
425
- MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
426
- MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
427
- MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
428
- MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
429
- MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
430
- MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
431
- MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
432
- MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
433
- MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
434
- MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
435
- MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
436
- MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
437
- MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
438
- MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
472
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
473
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
474
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
475
+ MODEL_TENSOR.POS_EMBD: "position_embd",
476
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
477
+ MODEL_TENSOR.OUTPUT: "output",
478
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
479
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
480
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
481
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
482
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
483
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
484
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
485
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
486
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
487
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
488
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
489
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
490
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
491
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
492
+ MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
493
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
494
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
495
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
496
+ MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
497
+ MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
498
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
499
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
500
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
501
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
502
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
503
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
504
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
505
+ MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
506
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
507
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
508
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
509
+ MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
510
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
511
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
512
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
513
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
514
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
515
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
516
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
517
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
518
+ MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
519
+ MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
520
+ MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
521
+ MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
522
+ MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
523
+ MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
524
+ MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
525
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
526
+ MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
527
+ MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
528
+ MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
529
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
530
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
531
+ MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
532
+ MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
533
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
534
+ MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
535
+ MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
536
+ MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
537
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
538
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
539
+ MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
540
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
541
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
542
+ MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
543
+ MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
544
+ MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
545
+ MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
546
+ MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
547
+ MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
548
+ MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
549
+ MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
550
+ MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
551
+ MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
552
+ MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
553
+ MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
554
+ MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
555
+ MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
556
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
557
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
558
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
559
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
560
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
561
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
562
+ MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
563
+ MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
564
+ MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
565
+ MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
566
+ MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
567
+ MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
568
+ MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
569
+ MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
570
+ MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
571
+ MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
572
+ MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
573
+ MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
574
+ MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
575
+ MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
576
+ MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
577
+ MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
578
+ MODEL_TENSOR.CLS: "cls",
579
+ MODEL_TENSOR.CLS_OUT: "cls.output",
580
+ MODEL_TENSOR.CONV1D: "conv1d",
581
+ MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
582
+ MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
583
+ MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
584
+ MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
585
+ MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
586
+ MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
587
+ MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
588
+ MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
589
+ MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
590
+ MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
591
+ MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
592
+ MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
593
+ MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
594
+ MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
595
+ MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
439
596
  }
440
597
 
441
598
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -459,6 +616,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
459
616
  MODEL_TENSOR.FFN_DOWN_EXP,
460
617
  MODEL_TENSOR.FFN_UP_EXP,
461
618
  ],
619
+ MODEL_ARCH.DECI: [
620
+ MODEL_TENSOR.TOKEN_EMBD,
621
+ MODEL_TENSOR.OUTPUT_NORM,
622
+ MODEL_TENSOR.OUTPUT,
623
+ MODEL_TENSOR.ROPE_FREQS,
624
+ MODEL_TENSOR.ATTN_NORM,
625
+ MODEL_TENSOR.ATTN_Q,
626
+ MODEL_TENSOR.ATTN_K,
627
+ MODEL_TENSOR.ATTN_V,
628
+ MODEL_TENSOR.ATTN_OUT,
629
+ MODEL_TENSOR.ATTN_ROT_EMBD,
630
+ MODEL_TENSOR.FFN_GATE_INP,
631
+ MODEL_TENSOR.FFN_NORM,
632
+ MODEL_TENSOR.FFN_GATE,
633
+ MODEL_TENSOR.FFN_DOWN,
634
+ MODEL_TENSOR.FFN_UP,
635
+ MODEL_TENSOR.FFN_GATE_EXP,
636
+ MODEL_TENSOR.FFN_DOWN_EXP,
637
+ MODEL_TENSOR.FFN_UP_EXP,
638
+ ],
462
639
  MODEL_ARCH.GROK: [
463
640
  MODEL_TENSOR.TOKEN_EMBD,
464
641
  MODEL_TENSOR.OUTPUT_NORM,
@@ -545,6 +722,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
545
722
  MODEL_TENSOR.FFN_DOWN,
546
723
  MODEL_TENSOR.FFN_UP,
547
724
  MODEL_TENSOR.LAYER_OUT_NORM,
725
+ MODEL_TENSOR.CLS,
726
+ MODEL_TENSOR.CLS_OUT,
548
727
  ],
549
728
  MODEL_ARCH.NOMIC_BERT: [
550
729
  MODEL_TENSOR.TOKEN_EMBD,
@@ -576,6 +755,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
576
755
  MODEL_TENSOR.FFN_GATE,
577
756
  MODEL_TENSOR.FFN_DOWN,
578
757
  MODEL_TENSOR.LAYER_OUT_NORM,
758
+ MODEL_TENSOR.CLS,
579
759
  ],
580
760
  MODEL_ARCH.MPT: [
581
761
  MODEL_TENSOR.TOKEN_EMBD,
@@ -662,6 +842,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
662
842
  MODEL_TENSOR.FFN_UP,
663
843
  ],
664
844
  MODEL_ARCH.QWEN2: [
845
+ MODEL_TENSOR.TOKEN_EMBD,
846
+ MODEL_TENSOR.OUTPUT_NORM,
847
+ MODEL_TENSOR.OUTPUT,
848
+ MODEL_TENSOR.ROPE_FREQS,
849
+ MODEL_TENSOR.ATTN_NORM,
850
+ MODEL_TENSOR.ATTN_Q,
851
+ MODEL_TENSOR.ATTN_K,
852
+ MODEL_TENSOR.ATTN_V,
853
+ MODEL_TENSOR.ATTN_OUT,
854
+ MODEL_TENSOR.FFN_NORM,
855
+ MODEL_TENSOR.FFN_GATE,
856
+ MODEL_TENSOR.FFN_DOWN,
857
+ MODEL_TENSOR.FFN_UP,
858
+ ],
859
+ MODEL_ARCH.QWEN2VL: [
665
860
  MODEL_TENSOR.TOKEN_EMBD,
666
861
  MODEL_TENSOR.OUTPUT_NORM,
667
862
  MODEL_TENSOR.OUTPUT,
@@ -739,6 +934,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
739
934
  MODEL_TENSOR.TOKEN_EMBD,
740
935
  MODEL_TENSOR.OUTPUT_NORM,
741
936
  MODEL_TENSOR.OUTPUT,
937
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
938
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
742
939
  MODEL_TENSOR.ATTN_NORM,
743
940
  MODEL_TENSOR.ATTN_QKV,
744
941
  MODEL_TENSOR.ATTN_Q,
@@ -749,6 +946,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
749
946
  MODEL_TENSOR.FFN_DOWN,
750
947
  MODEL_TENSOR.FFN_UP,
751
948
  ],
949
+ MODEL_ARCH.PHIMOE: [
950
+ MODEL_TENSOR.TOKEN_EMBD,
951
+ MODEL_TENSOR.OUTPUT_NORM,
952
+ MODEL_TENSOR.OUTPUT,
953
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
954
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
955
+ MODEL_TENSOR.ATTN_NORM,
956
+ MODEL_TENSOR.ATTN_QKV,
957
+ MODEL_TENSOR.ATTN_Q,
958
+ MODEL_TENSOR.ATTN_K,
959
+ MODEL_TENSOR.ATTN_V,
960
+ MODEL_TENSOR.ATTN_OUT,
961
+ MODEL_TENSOR.FFN_NORM,
962
+ MODEL_TENSOR.FFN_GATE_INP,
963
+ MODEL_TENSOR.FFN_GATE_EXP,
964
+ MODEL_TENSOR.FFN_DOWN_EXP,
965
+ MODEL_TENSOR.FFN_UP_EXP,
966
+ ],
752
967
  MODEL_ARCH.CODESHELL: [
753
968
  MODEL_TENSOR.TOKEN_EMBD,
754
969
  MODEL_TENSOR.POS_EMBD,
@@ -798,6 +1013,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
798
1013
  MODEL_TENSOR.OUTPUT,
799
1014
  MODEL_TENSOR.OUTPUT_NORM,
800
1015
  MODEL_TENSOR.ROPE_FREQS,
1016
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1017
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
801
1018
  MODEL_TENSOR.ATTN_NORM,
802
1019
  MODEL_TENSOR.ATTN_Q,
803
1020
  MODEL_TENSOR.ATTN_K,
@@ -813,6 +1030,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
813
1030
  MODEL_TENSOR.FFN_DOWN_EXP,
814
1031
  MODEL_TENSOR.FFN_UP_EXP,
815
1032
  ],
1033
+ MODEL_ARCH.MINICPM3: [
1034
+ MODEL_TENSOR.TOKEN_EMBD,
1035
+ MODEL_TENSOR.OUTPUT_NORM,
1036
+ MODEL_TENSOR.OUTPUT,
1037
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1038
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
1039
+ MODEL_TENSOR.ATTN_NORM,
1040
+ MODEL_TENSOR.ATTN_Q_A,
1041
+ MODEL_TENSOR.ATTN_Q_B,
1042
+ MODEL_TENSOR.ATTN_KV_A_MQA,
1043
+ MODEL_TENSOR.ATTN_KV_B,
1044
+ MODEL_TENSOR.ATTN_Q_A_NORM,
1045
+ MODEL_TENSOR.ATTN_KV_A_NORM,
1046
+ MODEL_TENSOR.ATTN_OUT,
1047
+ MODEL_TENSOR.FFN_NORM,
1048
+ MODEL_TENSOR.FFN_GATE,
1049
+ MODEL_TENSOR.FFN_DOWN,
1050
+ MODEL_TENSOR.FFN_UP,
1051
+ ],
816
1052
  MODEL_ARCH.GEMMA: [
817
1053
  MODEL_TENSOR.TOKEN_EMBD,
818
1054
  MODEL_TENSOR.OUTPUT_NORM,
@@ -856,6 +1092,67 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
856
1092
  MODEL_TENSOR.FFN_DOWN,
857
1093
  MODEL_TENSOR.FFN_UP,
858
1094
  ],
1095
+ MODEL_ARCH.RWKV6: [
1096
+ MODEL_TENSOR.TOKEN_EMBD,
1097
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1098
+ MODEL_TENSOR.OUTPUT_NORM,
1099
+ MODEL_TENSOR.OUTPUT,
1100
+ MODEL_TENSOR.ATTN_NORM,
1101
+ MODEL_TENSOR.ATTN_NORM_2,
1102
+ MODEL_TENSOR.TIME_MIX_W1,
1103
+ MODEL_TENSOR.TIME_MIX_W2,
1104
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1105
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1106
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1107
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1108
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1109
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1110
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1111
+ MODEL_TENSOR.TIME_MIX_FIRST,
1112
+ MODEL_TENSOR.TIME_MIX_DECAY,
1113
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1114
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1115
+ MODEL_TENSOR.TIME_MIX_KEY,
1116
+ MODEL_TENSOR.TIME_MIX_VALUE,
1117
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1118
+ MODEL_TENSOR.TIME_MIX_GATE,
1119
+ MODEL_TENSOR.TIME_MIX_LN,
1120
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1121
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K,
1122
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R,
1123
+ MODEL_TENSOR.CHANNEL_MIX_KEY,
1124
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
1125
+ MODEL_TENSOR.CHANNEL_MIX_VALUE,
1126
+ ],
1127
+ MODEL_ARCH.RWKV6QWEN2: [
1128
+ MODEL_TENSOR.TOKEN_EMBD,
1129
+ MODEL_TENSOR.OUTPUT_NORM,
1130
+ MODEL_TENSOR.OUTPUT,
1131
+ MODEL_TENSOR.ATTN_NORM,
1132
+ MODEL_TENSOR.TIME_MIX_W1,
1133
+ MODEL_TENSOR.TIME_MIX_W2,
1134
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1135
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1136
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1137
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1138
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1139
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1140
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1141
+ MODEL_TENSOR.TIME_MIX_FIRST,
1142
+ MODEL_TENSOR.TIME_MIX_DECAY,
1143
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1144
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1145
+ MODEL_TENSOR.TIME_MIX_KEY,
1146
+ MODEL_TENSOR.TIME_MIX_VALUE,
1147
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1148
+ MODEL_TENSOR.TIME_MIX_GATE,
1149
+ MODEL_TENSOR.TIME_MIX_LN,
1150
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1151
+ MODEL_TENSOR.FFN_NORM,
1152
+ MODEL_TENSOR.FFN_GATE,
1153
+ MODEL_TENSOR.FFN_DOWN,
1154
+ MODEL_TENSOR.FFN_UP,
1155
+ ],
859
1156
  MODEL_ARCH.MAMBA: [
860
1157
  MODEL_TENSOR.TOKEN_EMBD,
861
1158
  MODEL_TENSOR.OUTPUT_NORM,
@@ -899,6 +1196,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
899
1196
  MODEL_TENSOR.ATTN_K_NORM,
900
1197
  MODEL_TENSOR.ATTN_Q_NORM,
901
1198
  ],
1199
+ MODEL_ARCH.COHERE2: [
1200
+ MODEL_TENSOR.TOKEN_EMBD,
1201
+ MODEL_TENSOR.OUTPUT_NORM,
1202
+ MODEL_TENSOR.ATTN_NORM,
1203
+ MODEL_TENSOR.ATTN_Q,
1204
+ MODEL_TENSOR.ATTN_K,
1205
+ MODEL_TENSOR.ATTN_V,
1206
+ MODEL_TENSOR.ATTN_OUT,
1207
+ MODEL_TENSOR.FFN_GATE,
1208
+ MODEL_TENSOR.FFN_DOWN,
1209
+ MODEL_TENSOR.FFN_UP,
1210
+ ],
902
1211
  MODEL_ARCH.DBRX: [
903
1212
  MODEL_TENSOR.TOKEN_EMBD,
904
1213
  MODEL_TENSOR.OUTPUT_NORM,
@@ -923,6 +1232,39 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
923
1232
  MODEL_TENSOR.FFN_DOWN,
924
1233
  MODEL_TENSOR.FFN_UP,
925
1234
  ],
1235
+ MODEL_ARCH.OLMO2: [
1236
+ MODEL_TENSOR.TOKEN_EMBD,
1237
+ MODEL_TENSOR.OUTPUT_NORM,
1238
+ MODEL_TENSOR.OUTPUT,
1239
+ MODEL_TENSOR.ATTN_Q,
1240
+ MODEL_TENSOR.ATTN_K,
1241
+ MODEL_TENSOR.ATTN_V,
1242
+ MODEL_TENSOR.ATTN_OUT,
1243
+ MODEL_TENSOR.ATTN_POST_NORM,
1244
+ MODEL_TENSOR.ATTN_Q_NORM,
1245
+ MODEL_TENSOR.ATTN_K_NORM,
1246
+ MODEL_TENSOR.FFN_POST_NORM,
1247
+ MODEL_TENSOR.FFN_GATE,
1248
+ MODEL_TENSOR.FFN_DOWN,
1249
+ MODEL_TENSOR.FFN_UP,
1250
+ ],
1251
+ MODEL_ARCH.OLMOE: [
1252
+ MODEL_TENSOR.TOKEN_EMBD,
1253
+ MODEL_TENSOR.OUTPUT_NORM,
1254
+ MODEL_TENSOR.OUTPUT,
1255
+ MODEL_TENSOR.ATTN_OUT,
1256
+ MODEL_TENSOR.ATTN_Q,
1257
+ MODEL_TENSOR.ATTN_K,
1258
+ MODEL_TENSOR.ATTN_V,
1259
+ MODEL_TENSOR.ATTN_NORM,
1260
+ MODEL_TENSOR.ATTN_Q_NORM,
1261
+ MODEL_TENSOR.ATTN_K_NORM,
1262
+ MODEL_TENSOR.FFN_NORM,
1263
+ MODEL_TENSOR.FFN_GATE_INP,
1264
+ MODEL_TENSOR.FFN_GATE_EXP,
1265
+ MODEL_TENSOR.FFN_UP_EXP,
1266
+ MODEL_TENSOR.FFN_DOWN_EXP,
1267
+ ],
926
1268
  MODEL_ARCH.OPENELM: [
927
1269
  MODEL_TENSOR.TOKEN_EMBD,
928
1270
  MODEL_TENSOR.OUTPUT_NORM,
@@ -957,6 +1299,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
957
1299
  MODEL_TENSOR.FFN_DOWN_EXP,
958
1300
  MODEL_TENSOR.FFN_UP_EXP,
959
1301
  ],
1302
+ MODEL_ARCH.DEEPSEEK: [
1303
+ MODEL_TENSOR.TOKEN_EMBD,
1304
+ MODEL_TENSOR.OUTPUT_NORM,
1305
+ MODEL_TENSOR.OUTPUT,
1306
+ MODEL_TENSOR.ROPE_FREQS,
1307
+ MODEL_TENSOR.ATTN_NORM,
1308
+ MODEL_TENSOR.ATTN_Q,
1309
+ MODEL_TENSOR.ATTN_K,
1310
+ MODEL_TENSOR.ATTN_V,
1311
+ MODEL_TENSOR.ATTN_OUT,
1312
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1313
+ MODEL_TENSOR.FFN_GATE_INP,
1314
+ MODEL_TENSOR.FFN_NORM,
1315
+ MODEL_TENSOR.FFN_GATE,
1316
+ MODEL_TENSOR.FFN_DOWN,
1317
+ MODEL_TENSOR.FFN_UP,
1318
+ MODEL_TENSOR.FFN_GATE_EXP,
1319
+ MODEL_TENSOR.FFN_DOWN_EXP,
1320
+ MODEL_TENSOR.FFN_UP_EXP,
1321
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1322
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1323
+ MODEL_TENSOR.FFN_UP_SHEXP,
1324
+ ],
960
1325
  MODEL_ARCH.DEEPSEEK2: [
961
1326
  MODEL_TENSOR.TOKEN_EMBD,
962
1327
  MODEL_TENSOR.OUTPUT_NORM,
@@ -983,6 +1348,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
983
1348
  MODEL_TENSOR.FFN_GATE_SHEXP,
984
1349
  MODEL_TENSOR.FFN_DOWN_SHEXP,
985
1350
  MODEL_TENSOR.FFN_UP_SHEXP,
1351
+ MODEL_TENSOR.FFN_EXP_PROBS_B,
986
1352
  ],
987
1353
  MODEL_ARCH.CHATGLM : [
988
1354
  MODEL_TENSOR.TOKEN_EMBD,
@@ -991,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
991
1357
  MODEL_TENSOR.OUTPUT,
992
1358
  MODEL_TENSOR.ATTN_NORM,
993
1359
  MODEL_TENSOR.ATTN_QKV,
1360
+ MODEL_TENSOR.ATTN_Q,
1361
+ MODEL_TENSOR.ATTN_K,
1362
+ MODEL_TENSOR.ATTN_V,
994
1363
  MODEL_TENSOR.ATTN_OUT,
995
1364
  MODEL_TENSOR.FFN_NORM,
996
1365
  MODEL_TENSOR.FFN_DOWN,
@@ -1101,6 +1470,73 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1101
1470
  MODEL_TENSOR.FFN_DOWN,
1102
1471
  MODEL_TENSOR.FFN_UP,
1103
1472
  ],
1473
+ MODEL_ARCH.GRANITE: [
1474
+ MODEL_TENSOR.TOKEN_EMBD,
1475
+ MODEL_TENSOR.OUTPUT_NORM,
1476
+ MODEL_TENSOR.OUTPUT,
1477
+ MODEL_TENSOR.ATTN_NORM,
1478
+ MODEL_TENSOR.ATTN_Q,
1479
+ MODEL_TENSOR.ATTN_K,
1480
+ MODEL_TENSOR.ATTN_V,
1481
+ MODEL_TENSOR.ATTN_OUT,
1482
+ MODEL_TENSOR.FFN_NORM,
1483
+ MODEL_TENSOR.FFN_GATE,
1484
+ MODEL_TENSOR.FFN_DOWN,
1485
+ MODEL_TENSOR.FFN_UP,
1486
+ ],
1487
+ MODEL_ARCH.GRANITE_MOE: [
1488
+ MODEL_TENSOR.TOKEN_EMBD,
1489
+ MODEL_TENSOR.OUTPUT_NORM,
1490
+ MODEL_TENSOR.OUTPUT,
1491
+ MODEL_TENSOR.ATTN_NORM,
1492
+ MODEL_TENSOR.ATTN_Q,
1493
+ MODEL_TENSOR.ATTN_K,
1494
+ MODEL_TENSOR.ATTN_V,
1495
+ MODEL_TENSOR.ATTN_OUT,
1496
+ MODEL_TENSOR.FFN_NORM,
1497
+ MODEL_TENSOR.FFN_GATE_INP,
1498
+ MODEL_TENSOR.FFN_GATE_EXP,
1499
+ MODEL_TENSOR.FFN_DOWN_EXP,
1500
+ MODEL_TENSOR.FFN_UP_EXP,
1501
+ ],
1502
+ MODEL_ARCH.CHAMELEON: [
1503
+ MODEL_TENSOR.TOKEN_EMBD,
1504
+ MODEL_TENSOR.OUTPUT_NORM,
1505
+ MODEL_TENSOR.OUTPUT,
1506
+ MODEL_TENSOR.ATTN_NORM,
1507
+ MODEL_TENSOR.ATTN_Q,
1508
+ MODEL_TENSOR.ATTN_Q_NORM,
1509
+ MODEL_TENSOR.ATTN_K,
1510
+ MODEL_TENSOR.ATTN_K_NORM,
1511
+ MODEL_TENSOR.ATTN_V,
1512
+ MODEL_TENSOR.ATTN_OUT,
1513
+ MODEL_TENSOR.FFN_NORM,
1514
+ MODEL_TENSOR.FFN_GATE,
1515
+ MODEL_TENSOR.FFN_DOWN,
1516
+ MODEL_TENSOR.FFN_UP,
1517
+ ],
1518
+ MODEL_ARCH.WAVTOKENIZER_DEC: [
1519
+ MODEL_TENSOR.TOKEN_EMBD,
1520
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1521
+ MODEL_TENSOR.CONV1D,
1522
+ MODEL_TENSOR.CONVNEXT_DW,
1523
+ MODEL_TENSOR.CONVNEXT_NORM,
1524
+ MODEL_TENSOR.CONVNEXT_PW1,
1525
+ MODEL_TENSOR.CONVNEXT_PW2,
1526
+ MODEL_TENSOR.CONVNEXT_GAMMA,
1527
+ MODEL_TENSOR.OUTPUT,
1528
+ MODEL_TENSOR.OUTPUT_NORM,
1529
+ MODEL_TENSOR.POSNET_CONV1,
1530
+ MODEL_TENSOR.POSNET_CONV2,
1531
+ MODEL_TENSOR.POSNET_NORM,
1532
+ MODEL_TENSOR.POSNET_NORM1,
1533
+ MODEL_TENSOR.POSNET_NORM2,
1534
+ MODEL_TENSOR.POSNET_ATTN_NORM,
1535
+ MODEL_TENSOR.POSNET_ATTN_Q,
1536
+ MODEL_TENSOR.POSNET_ATTN_K,
1537
+ MODEL_TENSOR.POSNET_ATTN_V,
1538
+ MODEL_TENSOR.POSNET_ATTN_OUT,
1539
+ ],
1104
1540
  # TODO
1105
1541
  }
1106
1542
 
@@ -1110,6 +1546,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1110
1546
  MODEL_TENSOR.ROPE_FREQS,
1111
1547
  MODEL_TENSOR.ATTN_ROT_EMBD,
1112
1548
  ],
1549
+ MODEL_ARCH.DECI: [
1550
+ MODEL_TENSOR.ROPE_FREQS,
1551
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1552
+ ],
1113
1553
  MODEL_ARCH.BAICHUAN: [
1114
1554
  MODEL_TENSOR.ROPE_FREQS,
1115
1555
  MODEL_TENSOR.ATTN_ROT_EMBD,
@@ -1134,6 +1574,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1134
1574
  MODEL_TENSOR.ROPE_FREQS,
1135
1575
  MODEL_TENSOR.ATTN_ROT_EMBD,
1136
1576
  ],
1577
+ MODEL_ARCH.DEEPSEEK: [
1578
+ MODEL_TENSOR.ROPE_FREQS,
1579
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1580
+ ],
1137
1581
  MODEL_ARCH.DEEPSEEK2: [
1138
1582
  MODEL_TENSOR.ROPE_FREQS,
1139
1583
  MODEL_TENSOR.ATTN_ROT_EMBD,
@@ -1162,9 +1606,10 @@ class TokenType(IntEnum):
1162
1606
 
1163
1607
 
1164
1608
  class RopeScalingType(Enum):
1165
- NONE = 'none'
1166
- LINEAR = 'linear'
1167
- YARN = 'yarn'
1609
+ NONE = 'none'
1610
+ LINEAR = 'linear'
1611
+ YARN = 'yarn'
1612
+ LONGROPE = 'longrope'
1168
1613
 
1169
1614
 
1170
1615
  class PoolingType(IntEnum):
@@ -1203,9 +1648,13 @@ class GGMLQuantizationType(IntEnum):
1203
1648
  F64 = 28
1204
1649
  IQ1_M = 29
1205
1650
  BF16 = 30
1206
- Q4_0_4_4 = 31
1207
- Q4_0_4_8 = 32
1208
- Q4_0_8_8 = 33
1651
+ TQ1_0 = 34
1652
+ TQ2_0 = 35
1653
+
1654
+
1655
+ class ExpertGatingFuncType(IntEnum):
1656
+ SOFTMAX = 1
1657
+ SIGMOID = 2
1209
1658
 
1210
1659
 
1211
1660
  # TODO: add GGMLFileType from ggml_ftype in ggml.h
@@ -1247,9 +1696,11 @@ class LlamaFileType(IntEnum):
1247
1696
  MOSTLY_IQ4_XS = 30 # except 1d tensors
1248
1697
  MOSTLY_IQ1_M = 31 # except 1d tensors
1249
1698
  MOSTLY_BF16 = 32 # except 1d tensors
1250
- MOSTLY_Q4_0_4_4 = 33 # except 1d tensors
1251
- MOSTLY_Q4_0_4_8 = 34 # except 1d tensors
1252
- MOSTLY_Q4_0_8_8 = 35 # except 1d tensors
1699
+ # MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
1700
+ # MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
1701
+ # MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
1702
+ MOSTLY_TQ1_0 = 36 # except 1d tensors
1703
+ MOSTLY_TQ2_0 = 37 # except 1d tensors
1253
1704
 
1254
1705
  GUESSED = 1024 # not specified in the model file
1255
1706
 
@@ -1323,9 +1774,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
1323
1774
  GGMLQuantizationType.F64: (1, 8),
1324
1775
  GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
1325
1776
  GGMLQuantizationType.BF16: (1, 2),
1326
- GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
1327
- GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
1328
- GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
1777
+ GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
1778
+ GGMLQuantizationType.TQ2_0: (256, 2 + 64),
1329
1779
  }
1330
1780
 
1331
1781
 
@@ -1384,15 +1834,23 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
1384
1834
  KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
1385
1835
  KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
1386
1836
  KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
1837
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
1838
+ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
1387
1839
  KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
1388
1840
  KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
1389
1841
  KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
1390
- KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
1391
1842
  KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
1392
1843
  KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
1393
1844
  KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
1394
- KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
1845
+
1846
+ KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
1847
+ KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
1848
+ KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
1849
+ KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
1850
+ KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
1851
+ KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
1852
+
1853
+ # deprecated
1854
+ KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
1395
1855
  KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
1396
1856
  KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
1397
- KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
1398
- KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID