bigdl-core-cpp 2.7.0b20250630__py3-none-win_amd64.whl → 2.7.0b20250701__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +1987 -558
  2. bigdl/cpp/convert_hf_to_gguf_update.py +131 -67
  3. bigdl/cpp/convert_lora_to_gguf.py +3 -3
  4. bigdl/cpp/gguf-py/gguf/constants.py +546 -16
  5. bigdl/cpp/gguf-py/gguf/gguf_reader.py +57 -6
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +119 -7
  7. bigdl/cpp/gguf-py/gguf/lazy.py +10 -0
  8. bigdl/cpp/gguf-py/gguf/metadata.py +28 -8
  9. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +461 -48
  10. bigdl/cpp/gguf-py/gguf/utility.py +195 -0
  11. bigdl/cpp/gguf-py/gguf/vocab.py +6 -1
  12. bigdl/cpp/libs/llama_cpp/ggml-base.dll +0 -0
  13. bigdl/cpp/libs/llama_cpp/ggml-cpu.dll +0 -0
  14. bigdl/cpp/libs/llama_cpp/ggml-sycl.dll +0 -0
  15. bigdl/cpp/libs/llama_cpp/ggml.dll +0 -0
  16. bigdl/cpp/libs/llama_cpp/llama-batched.exe +0 -0
  17. bigdl/cpp/libs/llama_cpp/llama-bench.exe +0 -0
  18. bigdl/cpp/libs/llama_cpp/llama-cli.exe +0 -0
  19. bigdl/cpp/libs/llama_cpp/llama-embedding.exe +0 -0
  20. bigdl/cpp/libs/llama_cpp/llama-gemma3-cli.exe +0 -0
  21. bigdl/cpp/libs/llama_cpp/llama-gguf.exe +0 -0
  22. bigdl/cpp/libs/llama_cpp/llama-llava-cli.exe +0 -0
  23. bigdl/cpp/libs/llama_cpp/llama-lookup.exe +0 -0
  24. bigdl/cpp/libs/llama_cpp/llama-ls-sycl-device.exe +0 -0
  25. bigdl/cpp/libs/llama_cpp/llama-minicpmv-cli.exe +0 -0
  26. bigdl/cpp/libs/llama_cpp/llama-perplexity.exe +0 -0
  27. bigdl/cpp/libs/llama_cpp/llama-quantize.exe +0 -0
  28. bigdl/cpp/libs/llama_cpp/llama-server.exe +0 -0
  29. bigdl/cpp/libs/llama_cpp/llama-simple.exe +0 -0
  30. bigdl/cpp/libs/llama_cpp/llama-speculative.exe +0 -0
  31. bigdl/cpp/libs/llama_cpp/llama-tokenize.exe +0 -0
  32. bigdl/cpp/libs/llama_cpp/llama.dll +0 -0
  33. bigdl/cpp/libs/ollama/ggml-base.dll +0 -0
  34. bigdl/cpp/libs/ollama/ggml-cpu.dll +0 -0
  35. bigdl/cpp/libs/ollama/ggml-sycl.dll +0 -0
  36. bigdl/cpp/libs/ollama/ggml.dll +0 -0
  37. bigdl/cpp/libs/ollama/llama.dll +0 -0
  38. bigdl/cpp/libs/ollama/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama/mtmd_shared.dll +0 -0
  40. bigdl/cpp/libs/ollama/ollama-lib.exe +0 -0
  41. bigdl/cpp/libs/ollama/ollama.exe +0 -0
  42. {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/METADATA +1 -1
  43. bigdl_core_cpp-2.7.0b20250701.dist-info/RECORD +56 -0
  44. bigdl/cpp/libs/llama_cpp/llava_shared.dll +0 -0
  45. bigdl_core_cpp-2.7.0b20250630.dist-info/RECORD +0 -57
  46. {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-llama-cpp.bat +0 -0
  47. {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-llama-cpp.ps1 +0 -0
  48. {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-ollama.bat +0 -0
  49. {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/WHEEL +0 -0
  50. {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/top_level.txt +0 -0
@@ -104,6 +104,7 @@ class Keys:
104
104
  EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
105
105
  EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106
106
  EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
107
+ MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
107
108
  POOLING_TYPE = "{arch}.pooling_type"
108
109
  LOGIT_SCALE = "{arch}.logit_scale"
109
110
  DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@@ -116,24 +117,31 @@ class Keys:
116
117
  RESIDUAL_SCALE = "{arch}.residual_scale"
117
118
  EMBEDDING_SCALE = "{arch}.embedding_scale"
118
119
  TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
120
+ INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
119
121
 
120
122
  class Attention:
121
- HEAD_COUNT = "{arch}.attention.head_count"
122
- HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
123
- MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
124
- CLAMP_KQV = "{arch}.attention.clamp_kqv"
125
- KEY_LENGTH = "{arch}.attention.key_length"
126
- VALUE_LENGTH = "{arch}.attention.value_length"
127
- LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
128
- LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
129
- GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
130
- GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
131
- CAUSAL = "{arch}.attention.causal"
132
- Q_LORA_RANK = "{arch}.attention.q_lora_rank"
133
- KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
134
- REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
135
- SLIDING_WINDOW = "{arch}.attention.sliding_window"
136
- SCALE = "{arch}.attention.scale"
123
+ HEAD_COUNT = "{arch}.attention.head_count"
124
+ HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
125
+ MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
126
+ CLAMP_KQV = "{arch}.attention.clamp_kqv"
127
+ KEY_LENGTH = "{arch}.attention.key_length"
128
+ VALUE_LENGTH = "{arch}.attention.value_length"
129
+ LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
130
+ LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
131
+ GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
132
+ GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
133
+ CAUSAL = "{arch}.attention.causal"
134
+ Q_LORA_RANK = "{arch}.attention.q_lora_rank"
135
+ KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
136
+ DECAY_LORA_RANK = "{arch}.attention.decay_lora_rank"
137
+ ICLR_LORA_RANK = "{arch}.attention.iclr_lora_rank"
138
+ VALUE_RESIDUAL_MIX_LORA_RANK = "{arch}.attention.value_residual_mix_lora_rank"
139
+ GATE_LORA_RANK = "{arch}.attention.gate_lora_rank"
140
+ REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
141
+ SLIDING_WINDOW = "{arch}.attention.sliding_window"
142
+ SCALE = "{arch}.attention.scale"
143
+ KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
144
+ VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
137
145
 
138
146
  class Rope:
139
147
  DIMENSION_COUNT = "{arch}.rope.dimension_count"
@@ -169,6 +177,9 @@ class Keys:
169
177
  EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
170
178
  BLOCK_COUNT = "{arch}.convnext.block_count"
171
179
 
180
+ class Classifier:
181
+ OUTPUT_LABELS = "{arch}.classifier.output_labels"
182
+
172
183
  class Tokenizer:
173
184
  MODEL = "tokenizer.ggml.model"
174
185
  PRE = "tokenizer.ggml.pre"
@@ -211,6 +222,47 @@ class Keys:
211
222
  TYPE = "adapter.type"
212
223
  LORA_ALPHA = "adapter.lora.alpha"
213
224
 
225
+ class Clip:
226
+ PROJECTOR_TYPE = "clip.projector_type"
227
+ HAS_VISION_ENCODER = "clip.has_vision_encoder"
228
+ HAS_AUDIO_ENCODER = "clip.has_audio_encoder"
229
+ HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
230
+
231
+ class ClipVision:
232
+ IMAGE_SIZE = "clip.vision.image_size"
233
+ PATCH_SIZE = "clip.vision.patch_size"
234
+ EMBEDDING_LENGTH = "clip.vision.embedding_length"
235
+ FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"
236
+ PROJECTION_DIM = "clip.vision.projection_dim"
237
+ BLOCK_COUNT = "clip.vision.block_count"
238
+ IMAGE_MEAN = "clip.vision.image_mean"
239
+ IMAGE_STD = "clip.vision.image_std"
240
+ SPATIAL_MERGE_SIZE = "clip.vision.spatial_merge_size"
241
+ USE_GELU = "clip.use_gelu"
242
+ USE_SILU = "clip.use_silu"
243
+ N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
244
+
245
+ class Attention:
246
+ HEAD_COUNT = "clip.vision.attention.head_count"
247
+ LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
248
+
249
+ class Projector:
250
+ SCALE_FACTOR = "clip.vision.projector.scale_factor"
251
+
252
+ class ClipAudio:
253
+ NUM_MEL_BINS = "clip.audio.num_mel_bins"
254
+ EMBEDDING_LENGTH = "clip.audio.embedding_length"
255
+ FEED_FORWARD_LENGTH = "clip.audio.feed_forward_length"
256
+ PROJECTION_DIM = "clip.audio.projection_dim"
257
+ BLOCK_COUNT = "clip.audio.block_count"
258
+
259
+ class Attention:
260
+ HEAD_COUNT = "clip.audio.attention.head_count"
261
+ LAYERNORM_EPS = "clip.audio.attention.layer_norm_epsilon"
262
+
263
+ class Projector:
264
+ STACK_FACTOR = "clip.audio.projector.stack_factor"
265
+
214
266
  #
215
267
  # recommended mapping of model tensor names for storage in gguf
216
268
  #
@@ -219,10 +271,13 @@ class Keys:
219
271
  class GGUFType:
220
272
  MODEL = "model"
221
273
  ADAPTER = "adapter"
274
+ MMPROJ = "mmproj" # dummy, unused for now
222
275
 
223
276
 
224
277
  class MODEL_ARCH(IntEnum):
278
+ MMPROJ = auto() # dummy arch for clip.cpp
225
279
  LLAMA = auto()
280
+ LLAMA4 = auto()
226
281
  DECI = auto()
227
282
  FALCON = auto()
228
283
  BAICHUAN = auto()
@@ -235,6 +290,7 @@ class MODEL_ARCH(IntEnum):
235
290
  REFACT = auto()
236
291
  BERT = auto()
237
292
  NOMIC_BERT = auto()
293
+ NOMIC_BERT_MOE = auto()
238
294
  JINA_BERT_V2 = auto()
239
295
  BLOOM = auto()
240
296
  STABLELM = auto()
@@ -255,9 +311,12 @@ class MODEL_ARCH(IntEnum):
255
311
  MINICPM3 = auto()
256
312
  GEMMA = auto()
257
313
  GEMMA2 = auto()
314
+ GEMMA3 = auto()
258
315
  STARCODER2 = auto()
259
316
  RWKV6 = auto()
260
317
  RWKV6QWEN2 = auto()
318
+ RWKV7 = auto()
319
+ ARWKV7 = auto()
261
320
  MAMBA = auto()
262
321
  XVERSE = auto()
263
322
  COMMAND_R = auto()
@@ -271,6 +330,7 @@ class MODEL_ARCH(IntEnum):
271
330
  DEEPSEEK = auto()
272
331
  DEEPSEEK2 = auto()
273
332
  CHATGLM = auto()
333
+ GLM4 = auto()
274
334
  BITNET = auto()
275
335
  T5 = auto()
276
336
  T5ENCODER = auto()
@@ -281,6 +341,20 @@ class MODEL_ARCH(IntEnum):
281
341
  GRANITE_MOE = auto()
282
342
  CHAMELEON = auto()
283
343
  WAVTOKENIZER_DEC = auto()
344
+ PLM = auto()
345
+ BAILINGMOE = auto()
346
+ DOTS1 = auto()
347
+ ARCEE = auto()
348
+
349
+
350
+ class VISION_PROJECTOR_TYPE(IntEnum):
351
+ MLP = auto()
352
+ LDP = auto()
353
+ LDPV2 = auto()
354
+ RESAMPLER = auto()
355
+ GLM_EDGE = auto()
356
+ MERGER = auto()
357
+ GEMMA3 = auto()
284
358
 
285
359
 
286
360
  class MODEL_TENSOR(IntEnum):
@@ -330,8 +404,20 @@ class MODEL_TENSOR(IntEnum):
330
404
  SSM_A = auto()
331
405
  SSM_D = auto()
332
406
  SSM_OUT = auto()
407
+ TIME_MIX_W0 = auto()
333
408
  TIME_MIX_W1 = auto()
334
409
  TIME_MIX_W2 = auto()
410
+ TIME_MIX_A0 = auto()
411
+ TIME_MIX_A1 = auto()
412
+ TIME_MIX_A2 = auto()
413
+ TIME_MIX_V0 = auto()
414
+ TIME_MIX_V1 = auto()
415
+ TIME_MIX_V2 = auto()
416
+ TIME_MIX_G1 = auto()
417
+ TIME_MIX_G2 = auto()
418
+ TIME_MIX_K_K = auto()
419
+ TIME_MIX_K_A = auto()
420
+ TIME_MIX_R_K = auto()
335
421
  TIME_MIX_LERP_X = auto()
336
422
  TIME_MIX_LERP_K = auto()
337
423
  TIME_MIX_LERP_V = auto()
@@ -358,6 +444,8 @@ class MODEL_TENSOR(IntEnum):
358
444
  ATTN_Q_B = auto()
359
445
  ATTN_KV_A_MQA = auto()
360
446
  ATTN_KV_B = auto()
447
+ ATTN_K_B = auto()
448
+ ATTN_V_B = auto()
361
449
  ATTN_Q_A_NORM = auto()
362
450
  ATTN_KV_A_NORM = auto()
363
451
  FFN_SUB_NORM = auto()
@@ -408,10 +496,70 @@ class MODEL_TENSOR(IntEnum):
408
496
  POSNET_ATTN_K = auto()
409
497
  POSNET_ATTN_V = auto()
410
498
  POSNET_ATTN_OUT = auto()
499
+ # vision
500
+ V_MMPROJ = auto()
501
+ V_MMPROJ_FC = auto()
502
+ V_MMPROJ_MLP = auto()
503
+ V_MMPROJ_PEG = auto()
504
+ V_ENC_EMBD_CLS = auto()
505
+ V_ENC_EMBD_PATCH = auto()
506
+ V_ENC_EMBD_POS = auto()
507
+ V_ENC_INPUT_NORM = auto()
508
+ V_ENC_ATTN_Q = auto()
509
+ V_ENC_ATTN_Q_NORM = auto()
510
+ V_ENC_ATTN_K = auto()
511
+ V_ENC_ATTN_K_NORM = auto()
512
+ V_ENC_ATTN_V = auto()
513
+ V_ENC_ATTN_O = auto()
514
+ V_ENC_ATTN_O_NORM = auto()
515
+ V_ENC_POST_ATTN_NORM = auto()
516
+ V_ENC_FFN_UP = auto()
517
+ V_ENC_FFN_GATE = auto()
518
+ V_ENC_FFN_DOWN = auto()
519
+ V_LAYER_SCALE_1 = auto()
520
+ V_LAYER_SCALE_2 = auto()
521
+ V_PRE_NORM = auto()
522
+ V_POST_NORM = auto()
523
+ V_MM_INP_NORM = auto()
524
+ V_MM_INP_PROJ = auto() # gemma3
525
+ V_MM_SOFT_EMB_NORM = auto() # gemma3
526
+ V_RESMPL_POS_EMBD_K = auto() # minicpmv
527
+ V_RESMPL_ATTN_Q = auto() # minicpmv
528
+ V_RESMPL_ATTN_K = auto() # minicpmv
529
+ V_RESMPL_ATTN_V = auto() # minicpmv
530
+ V_RESMPL_ATTN_OUT = auto() # minicpmv
531
+ V_RESMPL_KV = auto() # minicpmv
532
+ V_RESMPL_KV_NORM = auto() # minicpmv
533
+ V_RESMPL_POST_NORM = auto() # minicpmv
534
+ V_RESMPL_Q_NORM = auto() # minicpmv
535
+ V_RESMPL_PROJ = auto() # minicpmv
536
+ V_RESMPL_QUERY = auto() # minicpmv
537
+ V_TOK_EMBD_IMG_BREAK = auto() # pixtral
538
+ V_MM_PATCH_MERGER = auto() # mistral small 3.1
539
+ # audio (mtmd)
540
+ A_ENC_EMBD_POS = auto()
541
+ A_ENC_CONV1D = auto()
542
+ A_PRE_NORM = auto()
543
+ A_POST_NORM = auto()
544
+ A_ENC_ATTN_Q = auto()
545
+ A_ENC_ATTN_K = auto()
546
+ A_ENC_ATTN_V = auto()
547
+ A_ENC_INPUT_NORM = auto()
548
+ A_ENC_OUTPUT = auto()
549
+ A_ENC_OUTPUT_NORM = auto()
550
+ A_ENC_FFN_UP = auto()
551
+ A_ENC_FFN_GATE = auto()
552
+ A_ENC_FFN_DOWN = auto()
553
+ A_MMPROJ = auto()
554
+ A_MMPROJ_FC = auto()
555
+ A_MM_NORM_PRE = auto()
556
+ A_MM_NORM_MID = auto()
411
557
 
412
558
 
413
559
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
560
+ MODEL_ARCH.MMPROJ: "clip", # dummy arch for clip.cpp
414
561
  MODEL_ARCH.LLAMA: "llama",
562
+ MODEL_ARCH.LLAMA4: "llama4",
415
563
  MODEL_ARCH.DECI: "deci",
416
564
  MODEL_ARCH.FALCON: "falcon",
417
565
  MODEL_ARCH.BAICHUAN: "baichuan",
@@ -424,6 +572,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
424
572
  MODEL_ARCH.REFACT: "refact",
425
573
  MODEL_ARCH.BERT: "bert",
426
574
  MODEL_ARCH.NOMIC_BERT: "nomic-bert",
575
+ MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
427
576
  MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
428
577
  MODEL_ARCH.BLOOM: "bloom",
429
578
  MODEL_ARCH.STABLELM: "stablelm",
@@ -444,9 +593,12 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
444
593
  MODEL_ARCH.MINICPM3: "minicpm3",
445
594
  MODEL_ARCH.GEMMA: "gemma",
446
595
  MODEL_ARCH.GEMMA2: "gemma2",
596
+ MODEL_ARCH.GEMMA3: "gemma3",
447
597
  MODEL_ARCH.STARCODER2: "starcoder2",
448
598
  MODEL_ARCH.RWKV6: "rwkv6",
449
599
  MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
600
+ MODEL_ARCH.RWKV7: "rwkv7",
601
+ MODEL_ARCH.ARWKV7: "arwkv7",
450
602
  MODEL_ARCH.MAMBA: "mamba",
451
603
  MODEL_ARCH.XVERSE: "xverse",
452
604
  MODEL_ARCH.COMMAND_R: "command-r",
@@ -460,6 +612,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
460
612
  MODEL_ARCH.DEEPSEEK: "deepseek",
461
613
  MODEL_ARCH.DEEPSEEK2: "deepseek2",
462
614
  MODEL_ARCH.CHATGLM: "chatglm",
615
+ MODEL_ARCH.GLM4: "glm4",
463
616
  MODEL_ARCH.BITNET: "bitnet",
464
617
  MODEL_ARCH.T5: "t5",
465
618
  MODEL_ARCH.T5ENCODER: "t5encoder",
@@ -470,6 +623,20 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
470
623
  MODEL_ARCH.GRANITE_MOE: "granitemoe",
471
624
  MODEL_ARCH.CHAMELEON: "chameleon",
472
625
  MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
626
+ MODEL_ARCH.PLM: "plm",
627
+ MODEL_ARCH.BAILINGMOE: "bailingmoe",
628
+ MODEL_ARCH.DOTS1: "dots1",
629
+ MODEL_ARCH.ARCEE: "arcee",
630
+ }
631
+
632
+ VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
633
+ VISION_PROJECTOR_TYPE.MLP: "mlp",
634
+ VISION_PROJECTOR_TYPE.LDP: "ldp",
635
+ VISION_PROJECTOR_TYPE.LDPV2: "ldpv2",
636
+ VISION_PROJECTOR_TYPE.RESAMPLER: "resampler",
637
+ VISION_PROJECTOR_TYPE.GLM_EDGE: "adapter",
638
+ VISION_PROJECTOR_TYPE.MERGER: "qwen2vl_merger",
639
+ VISION_PROJECTOR_TYPE.GEMMA3: "gemma3",
473
640
  }
474
641
 
475
642
  TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -519,8 +686,20 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
519
686
  MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
520
687
  MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
521
688
  MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
689
+ MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
522
690
  MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
523
691
  MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
692
+ MODEL_TENSOR.TIME_MIX_A0: "blk.{bid}.time_mix_a0",
693
+ MODEL_TENSOR.TIME_MIX_A1: "blk.{bid}.time_mix_a1",
694
+ MODEL_TENSOR.TIME_MIX_A2: "blk.{bid}.time_mix_a2",
695
+ MODEL_TENSOR.TIME_MIX_V0: "blk.{bid}.time_mix_v0",
696
+ MODEL_TENSOR.TIME_MIX_V1: "blk.{bid}.time_mix_v1",
697
+ MODEL_TENSOR.TIME_MIX_V2: "blk.{bid}.time_mix_v2",
698
+ MODEL_TENSOR.TIME_MIX_G1: "blk.{bid}.time_mix_g1",
699
+ MODEL_TENSOR.TIME_MIX_G2: "blk.{bid}.time_mix_g2",
700
+ MODEL_TENSOR.TIME_MIX_K_K: "blk.{bid}.time_mix_k_k",
701
+ MODEL_TENSOR.TIME_MIX_K_A: "blk.{bid}.time_mix_k_a",
702
+ MODEL_TENSOR.TIME_MIX_R_K: "blk.{bid}.time_mix_r_k",
524
703
  MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
525
704
  MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
526
705
  MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
@@ -547,6 +726,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
547
726
  MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
548
727
  MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
549
728
  MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
729
+ MODEL_TENSOR.ATTN_K_B: "blk.{bid}.attn_k_b",
730
+ MODEL_TENSOR.ATTN_V_B: "blk.{bid}.attn_v_b",
550
731
  MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
551
732
  MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
552
733
  MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
@@ -597,9 +778,126 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
597
778
  MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
598
779
  MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
599
780
  MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
781
+ # vision
782
+ MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
783
+ MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
784
+ MODEL_TENSOR.V_MMPROJ_MLP: "mm.model.mlp.{bid}",
785
+ MODEL_TENSOR.V_MMPROJ_PEG: "mm.model.peg.{bid}",
786
+ MODEL_TENSOR.V_ENC_EMBD_CLS: "v.class_embd",
787
+ MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.patch_embd",
788
+ MODEL_TENSOR.V_ENC_EMBD_POS: "v.position_embd",
789
+ MODEL_TENSOR.V_ENC_ATTN_Q: "v.blk.{bid}.attn_q",
790
+ MODEL_TENSOR.V_ENC_ATTN_Q_NORM: "v.blk.{bid}.attn_q_norm",
791
+ MODEL_TENSOR.V_ENC_ATTN_K: "v.blk.{bid}.attn_k",
792
+ MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
793
+ MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
794
+ MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
795
+ MODEL_TENSOR.V_ENC_ATTN_O: "v.blk.{bid}.attn_out",
796
+ MODEL_TENSOR.V_ENC_ATTN_O_NORM: "v.blk.{bid}.attn_out_norm",
797
+ MODEL_TENSOR.V_ENC_POST_ATTN_NORM: "v.blk.{bid}.ln2",
798
+ MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
799
+ MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
800
+ MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
801
+ MODEL_TENSOR.V_LAYER_SCALE_1: "v.blk.{bid}.ls1",
802
+ MODEL_TENSOR.V_LAYER_SCALE_2: "v.blk.{bid}.ls2",
803
+ MODEL_TENSOR.V_PRE_NORM: "v.pre_ln",
804
+ MODEL_TENSOR.V_POST_NORM: "v.post_ln",
805
+ MODEL_TENSOR.V_MM_INP_PROJ: "mm.input_projection",
806
+ MODEL_TENSOR.V_MM_INP_NORM: "mm.input_norm",
807
+ MODEL_TENSOR.V_MM_SOFT_EMB_NORM: "mm.soft_emb_norm",
808
+ MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "resampler.pos_embd_k",
809
+ MODEL_TENSOR.V_RESMPL_ATTN_Q: "resampler.attn.q",
810
+ MODEL_TENSOR.V_RESMPL_ATTN_K: "resampler.attn.k",
811
+ MODEL_TENSOR.V_RESMPL_ATTN_V: "resampler.attn.v",
812
+ MODEL_TENSOR.V_RESMPL_ATTN_OUT: "resampler.attn.out",
813
+ MODEL_TENSOR.V_RESMPL_KV: "resampler.kv",
814
+ MODEL_TENSOR.V_RESMPL_KV_NORM: "resampler.ln_kv",
815
+ MODEL_TENSOR.V_RESMPL_POST_NORM: "resampler.ln_post",
816
+ MODEL_TENSOR.V_RESMPL_Q_NORM: "resampler.ln_q",
817
+ MODEL_TENSOR.V_RESMPL_PROJ: "resampler.proj",
818
+ MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
819
+ MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
820
+ MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
821
+ # audio (mtmd)
822
+ MODEL_TENSOR.A_ENC_EMBD_POS: "a.position_embd",
823
+ MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
824
+ MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
825
+ MODEL_TENSOR.A_POST_NORM: "a.post_ln",
826
+ MODEL_TENSOR.A_ENC_ATTN_Q: "a.blk.{bid}.attn_q",
827
+ MODEL_TENSOR.A_ENC_ATTN_K: "a.blk.{bid}.attn_k",
828
+ MODEL_TENSOR.A_ENC_ATTN_V: "a.blk.{bid}.attn_v",
829
+ MODEL_TENSOR.A_ENC_INPUT_NORM: "a.blk.{bid}.ln1",
830
+ MODEL_TENSOR.A_ENC_OUTPUT: "a.blk.{bid}.attn_out",
831
+ MODEL_TENSOR.A_ENC_OUTPUT_NORM: "a.blk.{bid}.ln2",
832
+ MODEL_TENSOR.A_ENC_FFN_UP: "a.blk.{bid}.ffn_up",
833
+ MODEL_TENSOR.A_ENC_FFN_GATE: "a.blk.{bid}.ffn_gate",
834
+ MODEL_TENSOR.A_ENC_FFN_DOWN: "a.blk.{bid}.ffn_down",
835
+ MODEL_TENSOR.A_MMPROJ: "mm.a.mlp.{bid}",
836
+ MODEL_TENSOR.A_MMPROJ_FC: "mm.a.fc",
837
+ MODEL_TENSOR.A_MM_NORM_PRE: "mm.a.norm_pre",
838
+ MODEL_TENSOR.A_MM_NORM_MID: "mm.a.norm_mid",
600
839
  }
601
840
 
602
841
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
842
+ MODEL_ARCH.MMPROJ: [
843
+ MODEL_TENSOR.V_MMPROJ,
844
+ MODEL_TENSOR.V_MMPROJ_FC,
845
+ MODEL_TENSOR.V_MMPROJ_MLP,
846
+ MODEL_TENSOR.V_MMPROJ_PEG,
847
+ MODEL_TENSOR.V_ENC_EMBD_CLS,
848
+ MODEL_TENSOR.V_ENC_EMBD_PATCH,
849
+ MODEL_TENSOR.V_ENC_EMBD_POS,
850
+ MODEL_TENSOR.V_ENC_INPUT_NORM,
851
+ MODEL_TENSOR.V_ENC_ATTN_Q,
852
+ MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
853
+ MODEL_TENSOR.V_ENC_ATTN_K,
854
+ MODEL_TENSOR.V_ENC_ATTN_K_NORM,
855
+ MODEL_TENSOR.V_ENC_ATTN_V,
856
+ MODEL_TENSOR.V_ENC_ATTN_O,
857
+ MODEL_TENSOR.V_ENC_ATTN_O_NORM,
858
+ MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
859
+ MODEL_TENSOR.V_ENC_FFN_UP,
860
+ MODEL_TENSOR.V_ENC_FFN_GATE,
861
+ MODEL_TENSOR.V_ENC_FFN_DOWN,
862
+ MODEL_TENSOR.V_LAYER_SCALE_1,
863
+ MODEL_TENSOR.V_LAYER_SCALE_2,
864
+ MODEL_TENSOR.V_PRE_NORM,
865
+ MODEL_TENSOR.V_POST_NORM,
866
+ MODEL_TENSOR.V_MM_INP_PROJ,
867
+ MODEL_TENSOR.V_MM_INP_NORM,
868
+ MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
869
+ MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
870
+ MODEL_TENSOR.V_RESMPL_ATTN_Q,
871
+ MODEL_TENSOR.V_RESMPL_ATTN_K,
872
+ MODEL_TENSOR.V_RESMPL_ATTN_V,
873
+ MODEL_TENSOR.V_RESMPL_ATTN_OUT,
874
+ MODEL_TENSOR.V_RESMPL_KV,
875
+ MODEL_TENSOR.V_RESMPL_KV_NORM,
876
+ MODEL_TENSOR.V_RESMPL_POST_NORM,
877
+ MODEL_TENSOR.V_RESMPL_Q_NORM,
878
+ MODEL_TENSOR.V_RESMPL_PROJ,
879
+ MODEL_TENSOR.V_RESMPL_QUERY,
880
+ MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
881
+ MODEL_TENSOR.V_MM_PATCH_MERGER,
882
+ # audio
883
+ MODEL_TENSOR.A_ENC_EMBD_POS,
884
+ MODEL_TENSOR.A_ENC_CONV1D,
885
+ MODEL_TENSOR.A_PRE_NORM,
886
+ MODEL_TENSOR.A_POST_NORM,
887
+ MODEL_TENSOR.A_ENC_ATTN_Q,
888
+ MODEL_TENSOR.A_ENC_ATTN_K,
889
+ MODEL_TENSOR.A_ENC_ATTN_V,
890
+ MODEL_TENSOR.A_ENC_INPUT_NORM,
891
+ MODEL_TENSOR.A_ENC_OUTPUT,
892
+ MODEL_TENSOR.A_ENC_OUTPUT_NORM,
893
+ MODEL_TENSOR.A_ENC_FFN_UP,
894
+ MODEL_TENSOR.A_ENC_FFN_GATE,
895
+ MODEL_TENSOR.A_ENC_FFN_DOWN,
896
+ MODEL_TENSOR.A_MMPROJ,
897
+ MODEL_TENSOR.A_MMPROJ_FC,
898
+ MODEL_TENSOR.A_MM_NORM_PRE,
899
+ MODEL_TENSOR.A_MM_NORM_MID,
900
+ ],
603
901
  MODEL_ARCH.LLAMA: [
604
902
  MODEL_TENSOR.TOKEN_EMBD,
605
903
  MODEL_TENSOR.OUTPUT_NORM,
@@ -620,6 +918,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
620
918
  MODEL_TENSOR.FFN_DOWN_EXP,
621
919
  MODEL_TENSOR.FFN_UP_EXP,
622
920
  ],
921
+ MODEL_ARCH.LLAMA4: [
922
+ MODEL_TENSOR.TOKEN_EMBD,
923
+ MODEL_TENSOR.OUTPUT_NORM,
924
+ MODEL_TENSOR.OUTPUT,
925
+ MODEL_TENSOR.ROPE_FREQS,
926
+ MODEL_TENSOR.ATTN_NORM,
927
+ MODEL_TENSOR.ATTN_Q,
928
+ MODEL_TENSOR.ATTN_K,
929
+ MODEL_TENSOR.ATTN_V,
930
+ MODEL_TENSOR.ATTN_OUT,
931
+ MODEL_TENSOR.ATTN_ROT_EMBD,
932
+ MODEL_TENSOR.FFN_GATE_INP,
933
+ MODEL_TENSOR.FFN_NORM,
934
+ MODEL_TENSOR.FFN_GATE,
935
+ MODEL_TENSOR.FFN_DOWN,
936
+ MODEL_TENSOR.FFN_UP,
937
+ MODEL_TENSOR.FFN_GATE_EXP,
938
+ MODEL_TENSOR.FFN_DOWN_EXP,
939
+ MODEL_TENSOR.FFN_UP_EXP,
940
+ MODEL_TENSOR.FFN_GATE_SHEXP,
941
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
942
+ MODEL_TENSOR.FFN_UP_SHEXP,
943
+ ],
623
944
  MODEL_ARCH.DECI: [
624
945
  MODEL_TENSOR.TOKEN_EMBD,
625
946
  MODEL_TENSOR.OUTPUT_NORM,
@@ -719,6 +1040,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
719
1040
  MODEL_TENSOR.POS_EMBD,
720
1041
  MODEL_TENSOR.OUTPUT_NORM,
721
1042
  MODEL_TENSOR.ATTN_OUT_NORM,
1043
+ MODEL_TENSOR.ATTN_QKV,
722
1044
  MODEL_TENSOR.ATTN_Q,
723
1045
  MODEL_TENSOR.ATTN_K,
724
1046
  MODEL_TENSOR.ATTN_V,
@@ -743,6 +1065,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
743
1065
  MODEL_TENSOR.FFN_UP,
744
1066
  MODEL_TENSOR.LAYER_OUT_NORM,
745
1067
  ],
1068
+ MODEL_ARCH.NOMIC_BERT_MOE: [
1069
+ MODEL_TENSOR.TOKEN_EMBD,
1070
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1071
+ MODEL_TENSOR.TOKEN_TYPES,
1072
+ MODEL_TENSOR.POS_EMBD,
1073
+ MODEL_TENSOR.OUTPUT_NORM,
1074
+ MODEL_TENSOR.ATTN_OUT_NORM,
1075
+ MODEL_TENSOR.ATTN_QKV,
1076
+ MODEL_TENSOR.ATTN_OUT,
1077
+ MODEL_TENSOR.FFN_DOWN,
1078
+ MODEL_TENSOR.FFN_UP,
1079
+ MODEL_TENSOR.FFN_GATE_INP,
1080
+ MODEL_TENSOR.FFN_DOWN_EXP,
1081
+ MODEL_TENSOR.FFN_UP_EXP,
1082
+ MODEL_TENSOR.LAYER_OUT_NORM,
1083
+ ],
746
1084
  MODEL_ARCH.JINA_BERT_V2: [
747
1085
  MODEL_TENSOR.TOKEN_EMBD,
748
1086
  MODEL_TENSOR.TOKEN_EMBD_NORM,
@@ -1115,6 +1453,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1115
1453
  MODEL_TENSOR.FFN_PRE_NORM,
1116
1454
  MODEL_TENSOR.FFN_POST_NORM,
1117
1455
  ],
1456
+ MODEL_ARCH.GEMMA3: [
1457
+ MODEL_TENSOR.TOKEN_EMBD,
1458
+ MODEL_TENSOR.OUTPUT,
1459
+ MODEL_TENSOR.OUTPUT_NORM,
1460
+ MODEL_TENSOR.ATTN_Q,
1461
+ MODEL_TENSOR.ATTN_Q_NORM,
1462
+ MODEL_TENSOR.ATTN_K,
1463
+ MODEL_TENSOR.ATTN_K_NORM,
1464
+ MODEL_TENSOR.ATTN_V,
1465
+ MODEL_TENSOR.ATTN_OUT,
1466
+ MODEL_TENSOR.FFN_GATE,
1467
+ MODEL_TENSOR.FFN_DOWN,
1468
+ MODEL_TENSOR.FFN_UP,
1469
+ MODEL_TENSOR.ATTN_NORM,
1470
+ MODEL_TENSOR.ATTN_POST_NORM,
1471
+ MODEL_TENSOR.FFN_PRE_NORM,
1472
+ MODEL_TENSOR.FFN_POST_NORM,
1473
+ ],
1118
1474
  MODEL_ARCH.STARCODER2: [
1119
1475
  MODEL_TENSOR.TOKEN_EMBD,
1120
1476
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1191,6 +1547,68 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1191
1547
  MODEL_TENSOR.FFN_DOWN,
1192
1548
  MODEL_TENSOR.FFN_UP,
1193
1549
  ],
1550
+ MODEL_ARCH.RWKV7: [
1551
+ MODEL_TENSOR.TOKEN_EMBD,
1552
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1553
+ MODEL_TENSOR.OUTPUT_NORM,
1554
+ MODEL_TENSOR.OUTPUT,
1555
+ MODEL_TENSOR.ATTN_NORM,
1556
+ MODEL_TENSOR.ATTN_NORM_2,
1557
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1558
+ MODEL_TENSOR.TIME_MIX_W0,
1559
+ MODEL_TENSOR.TIME_MIX_W1,
1560
+ MODEL_TENSOR.TIME_MIX_W2,
1561
+ MODEL_TENSOR.TIME_MIX_A0,
1562
+ MODEL_TENSOR.TIME_MIX_A1,
1563
+ MODEL_TENSOR.TIME_MIX_A2,
1564
+ MODEL_TENSOR.TIME_MIX_V0,
1565
+ MODEL_TENSOR.TIME_MIX_V1,
1566
+ MODEL_TENSOR.TIME_MIX_V2,
1567
+ MODEL_TENSOR.TIME_MIX_G1,
1568
+ MODEL_TENSOR.TIME_MIX_G2,
1569
+ MODEL_TENSOR.TIME_MIX_K_K,
1570
+ MODEL_TENSOR.TIME_MIX_K_A,
1571
+ MODEL_TENSOR.TIME_MIX_R_K,
1572
+ MODEL_TENSOR.TIME_MIX_KEY,
1573
+ MODEL_TENSOR.TIME_MIX_VALUE,
1574
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1575
+ MODEL_TENSOR.TIME_MIX_LN,
1576
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1577
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K,
1578
+ MODEL_TENSOR.CHANNEL_MIX_KEY,
1579
+ MODEL_TENSOR.CHANNEL_MIX_VALUE,
1580
+ ],
1581
+ MODEL_ARCH.ARWKV7: [
1582
+ MODEL_TENSOR.TOKEN_EMBD,
1583
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1584
+ MODEL_TENSOR.OUTPUT_NORM,
1585
+ MODEL_TENSOR.OUTPUT,
1586
+ MODEL_TENSOR.ATTN_NORM,
1587
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1588
+ MODEL_TENSOR.TIME_MIX_W0,
1589
+ MODEL_TENSOR.TIME_MIX_W1,
1590
+ MODEL_TENSOR.TIME_MIX_W2,
1591
+ MODEL_TENSOR.TIME_MIX_A0,
1592
+ MODEL_TENSOR.TIME_MIX_A1,
1593
+ MODEL_TENSOR.TIME_MIX_A2,
1594
+ MODEL_TENSOR.TIME_MIX_V0,
1595
+ MODEL_TENSOR.TIME_MIX_V1,
1596
+ MODEL_TENSOR.TIME_MIX_V2,
1597
+ MODEL_TENSOR.TIME_MIX_G1,
1598
+ MODEL_TENSOR.TIME_MIX_G2,
1599
+ MODEL_TENSOR.TIME_MIX_K_K,
1600
+ MODEL_TENSOR.TIME_MIX_K_A,
1601
+ MODEL_TENSOR.TIME_MIX_R_K,
1602
+ MODEL_TENSOR.TIME_MIX_KEY,
1603
+ MODEL_TENSOR.TIME_MIX_VALUE,
1604
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1605
+ MODEL_TENSOR.TIME_MIX_LN,
1606
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1607
+ MODEL_TENSOR.FFN_NORM,
1608
+ MODEL_TENSOR.FFN_GATE,
1609
+ MODEL_TENSOR.FFN_DOWN,
1610
+ MODEL_TENSOR.FFN_UP,
1611
+ ],
1194
1612
  MODEL_ARCH.MAMBA: [
1195
1613
  MODEL_TENSOR.TOKEN_EMBD,
1196
1614
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1371,6 +1789,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1371
1789
  MODEL_TENSOR.ATTN_Q_B,
1372
1790
  MODEL_TENSOR.ATTN_KV_A_MQA,
1373
1791
  MODEL_TENSOR.ATTN_KV_B,
1792
+ MODEL_TENSOR.ATTN_K_B,
1793
+ MODEL_TENSOR.ATTN_V_B,
1374
1794
  MODEL_TENSOR.ATTN_Q_A_NORM,
1375
1795
  MODEL_TENSOR.ATTN_KV_A_NORM,
1376
1796
  MODEL_TENSOR.ATTN_OUT,
@@ -1388,6 +1808,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1388
1808
  MODEL_TENSOR.FFN_UP_SHEXP,
1389
1809
  MODEL_TENSOR.FFN_EXP_PROBS_B,
1390
1810
  ],
1811
+ MODEL_ARCH.PLM: [
1812
+ MODEL_TENSOR.TOKEN_EMBD,
1813
+ MODEL_TENSOR.OUTPUT,
1814
+ MODEL_TENSOR.OUTPUT_NORM,
1815
+ MODEL_TENSOR.ATTN_NORM,
1816
+ MODEL_TENSOR.ATTN_Q,
1817
+ MODEL_TENSOR.ATTN_KV_A_MQA,
1818
+ MODEL_TENSOR.ATTN_KV_A_NORM,
1819
+ MODEL_TENSOR.ATTN_KV_B,
1820
+ MODEL_TENSOR.ATTN_OUT,
1821
+ MODEL_TENSOR.FFN_NORM,
1822
+ MODEL_TENSOR.FFN_UP,
1823
+ MODEL_TENSOR.FFN_DOWN,
1824
+ ],
1391
1825
  MODEL_ARCH.CHATGLM : [
1392
1826
  MODEL_TENSOR.TOKEN_EMBD,
1393
1827
  MODEL_TENSOR.ROPE_FREQS,
@@ -1403,6 +1837,23 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1403
1837
  MODEL_TENSOR.FFN_DOWN,
1404
1838
  MODEL_TENSOR.FFN_UP,
1405
1839
  ],
1840
+ MODEL_ARCH.GLM4 : [
1841
+ MODEL_TENSOR.TOKEN_EMBD,
1842
+ MODEL_TENSOR.ROPE_FREQS,
1843
+ MODEL_TENSOR.OUTPUT_NORM,
1844
+ MODEL_TENSOR.OUTPUT,
1845
+ MODEL_TENSOR.ATTN_NORM,
1846
+ MODEL_TENSOR.ATTN_QKV,
1847
+ MODEL_TENSOR.ATTN_Q,
1848
+ MODEL_TENSOR.ATTN_K,
1849
+ MODEL_TENSOR.ATTN_V,
1850
+ MODEL_TENSOR.ATTN_OUT,
1851
+ MODEL_TENSOR.FFN_NORM,
1852
+ MODEL_TENSOR.FFN_DOWN,
1853
+ MODEL_TENSOR.FFN_UP,
1854
+ MODEL_TENSOR.ATTN_POST_NORM,
1855
+ MODEL_TENSOR.FFN_POST_NORM,
1856
+ ],
1406
1857
  MODEL_ARCH.BITNET: [
1407
1858
  MODEL_TENSOR.ATTN_Q,
1408
1859
  MODEL_TENSOR.ATTN_K,
@@ -1536,6 +1987,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1536
1987
  MODEL_TENSOR.FFN_GATE_EXP,
1537
1988
  MODEL_TENSOR.FFN_DOWN_EXP,
1538
1989
  MODEL_TENSOR.FFN_UP_EXP,
1990
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1991
+ MODEL_TENSOR.FFN_UP_SHEXP,
1992
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1539
1993
  ],
1540
1994
  MODEL_ARCH.CHAMELEON: [
1541
1995
  MODEL_TENSOR.TOKEN_EMBD,
@@ -1575,6 +2029,64 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1575
2029
  MODEL_TENSOR.POSNET_ATTN_V,
1576
2030
  MODEL_TENSOR.POSNET_ATTN_OUT,
1577
2031
  ],
2032
+ MODEL_ARCH.BAILINGMOE: [
2033
+ MODEL_TENSOR.TOKEN_EMBD,
2034
+ MODEL_TENSOR.OUTPUT_NORM,
2035
+ MODEL_TENSOR.OUTPUT,
2036
+ MODEL_TENSOR.ROPE_FREQS,
2037
+ MODEL_TENSOR.ATTN_NORM,
2038
+ MODEL_TENSOR.ATTN_Q,
2039
+ MODEL_TENSOR.ATTN_K,
2040
+ MODEL_TENSOR.ATTN_V,
2041
+ MODEL_TENSOR.ATTN_OUT,
2042
+ MODEL_TENSOR.FFN_GATE_INP,
2043
+ MODEL_TENSOR.FFN_NORM,
2044
+ MODEL_TENSOR.FFN_GATE_EXP,
2045
+ MODEL_TENSOR.FFN_DOWN_EXP,
2046
+ MODEL_TENSOR.FFN_UP_EXP,
2047
+ MODEL_TENSOR.FFN_GATE_SHEXP,
2048
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
2049
+ MODEL_TENSOR.FFN_UP_SHEXP,
2050
+ ],
2051
+ MODEL_ARCH.DOTS1: [
2052
+ MODEL_TENSOR.TOKEN_EMBD,
2053
+ MODEL_TENSOR.OUTPUT_NORM,
2054
+ MODEL_TENSOR.OUTPUT,
2055
+ MODEL_TENSOR.ATTN_NORM,
2056
+ MODEL_TENSOR.ATTN_Q,
2057
+ MODEL_TENSOR.ATTN_Q_NORM,
2058
+ MODEL_TENSOR.ATTN_K,
2059
+ MODEL_TENSOR.ATTN_K_NORM,
2060
+ MODEL_TENSOR.ATTN_V,
2061
+ MODEL_TENSOR.ATTN_OUT,
2062
+ MODEL_TENSOR.FFN_EXP_PROBS_B,
2063
+ MODEL_TENSOR.FFN_NORM,
2064
+ MODEL_TENSOR.FFN_GATE,
2065
+ MODEL_TENSOR.FFN_GATE_EXP,
2066
+ MODEL_TENSOR.FFN_GATE_INP,
2067
+ MODEL_TENSOR.FFN_GATE_SHEXP,
2068
+ MODEL_TENSOR.FFN_DOWN,
2069
+ MODEL_TENSOR.FFN_DOWN_EXP,
2070
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
2071
+ MODEL_TENSOR.FFN_UP,
2072
+ MODEL_TENSOR.FFN_UP_EXP,
2073
+ MODEL_TENSOR.FFN_UP_SHEXP,
2074
+ ],
2075
+ MODEL_ARCH.ARCEE: [
2076
+ MODEL_TENSOR.TOKEN_EMBD,
2077
+ MODEL_TENSOR.OUTPUT_NORM,
2078
+ MODEL_TENSOR.OUTPUT,
2079
+ MODEL_TENSOR.ROPE_FREQS,
2080
+ MODEL_TENSOR.ATTN_NORM,
2081
+ MODEL_TENSOR.ATTN_Q,
2082
+ MODEL_TENSOR.ATTN_K,
2083
+ MODEL_TENSOR.ATTN_V,
2084
+ MODEL_TENSOR.ATTN_OUT,
2085
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2086
+ MODEL_TENSOR.FFN_NORM,
2087
+ MODEL_TENSOR.FFN_DOWN,
2088
+ MODEL_TENSOR.FFN_UP,
2089
+ ],
1578
2090
  # TODO
1579
2091
  }
1580
2092
 
@@ -1627,6 +2139,9 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1627
2139
  MODEL_TENSOR.ROPE_FREQS,
1628
2140
  MODEL_TENSOR.ATTN_ROT_EMBD,
1629
2141
  ],
2142
+ MODEL_ARCH.BAILINGMOE: [
2143
+ MODEL_TENSOR.ROPE_FREQS,
2144
+ ],
1630
2145
  }
1631
2146
 
1632
2147
  #
@@ -1654,6 +2169,8 @@ class PoolingType(IntEnum):
1654
2169
  NONE = 0
1655
2170
  MEAN = 1
1656
2171
  CLS = 2
2172
+ LAST = 3
2173
+ RANK = 4
1657
2174
 
1658
2175
 
1659
2176
  class GGMLQuantizationType(IntEnum):
@@ -1780,6 +2297,19 @@ class GGUFValueType(IntEnum):
1780
2297
  raise ValueError(f"Unknown type: {type(val)}")
1781
2298
 
1782
2299
 
2300
+ class VisionProjectorType:
2301
+ GEMMA3 = "gemma3"
2302
+ IDEFICS3 = "idefics3"
2303
+ PIXTRAL = "pixtral"
2304
+ LLAMA4 = "llama4"
2305
+ QWEN2VL = "qwen2vl_merger"
2306
+ QWEN25VL = "qwen2.5vl_merger"
2307
+ ULTRAVOX = "ultravox"
2308
+ INTERNVL = "internvl"
2309
+ QWEN2A = "qwen2a" # audio
2310
+ QWEN25O = "qwen2.5o" # omni
2311
+
2312
+
1783
2313
  # Items here are (block size, type size)
1784
2314
  QK_K = 256
1785
2315
  GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {