bigdl-core-cpp 2.7.0b20250630__py3-none-win_amd64.whl → 2.7.0b20250702__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +1987 -558
- bigdl/cpp/convert_hf_to_gguf_update.py +131 -67
- bigdl/cpp/convert_lora_to_gguf.py +3 -3
- bigdl/cpp/gguf-py/gguf/constants.py +546 -16
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +57 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +119 -7
- bigdl/cpp/gguf-py/gguf/lazy.py +10 -0
- bigdl/cpp/gguf-py/gguf/metadata.py +28 -8
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +461 -48
- bigdl/cpp/gguf-py/gguf/utility.py +195 -0
- bigdl/cpp/gguf-py/gguf/vocab.py +6 -1
- bigdl/cpp/libs/llama_cpp/ggml-base.dll +0 -0
- bigdl/cpp/libs/llama_cpp/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/llama_cpp/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/llama_cpp/ggml.dll +0 -0
- bigdl/cpp/libs/llama_cpp/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-server.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama.dll +0 -0
- bigdl/cpp/libs/ollama/ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama/ggml.dll +0 -0
- bigdl/cpp/libs/ollama/llama.dll +0 -0
- bigdl/cpp/libs/ollama/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama/mtmd_shared.dll +0 -0
- bigdl/cpp/libs/ollama/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama/ollama.exe +0 -0
- {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250702.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.7.0b20250702.dist-info/RECORD +56 -0
- bigdl/cpp/libs/llama_cpp/llava_shared.dll +0 -0
- bigdl_core_cpp-2.7.0b20250630.dist-info/RECORD +0 -57
- {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250702.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250702.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250702.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250702.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250702.dist-info}/top_level.txt +0 -0
@@ -104,6 +104,7 @@ class Keys:
|
|
104
104
|
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
105
105
|
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
106
106
|
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
107
|
+
MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
|
107
108
|
POOLING_TYPE = "{arch}.pooling_type"
|
108
109
|
LOGIT_SCALE = "{arch}.logit_scale"
|
109
110
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
@@ -116,24 +117,31 @@ class Keys:
|
|
116
117
|
RESIDUAL_SCALE = "{arch}.residual_scale"
|
117
118
|
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
118
119
|
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
|
120
|
+
INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
|
119
121
|
|
120
122
|
class Attention:
|
121
|
-
HEAD_COUNT
|
122
|
-
HEAD_COUNT_KV
|
123
|
-
MAX_ALIBI_BIAS
|
124
|
-
CLAMP_KQV
|
125
|
-
KEY_LENGTH
|
126
|
-
VALUE_LENGTH
|
127
|
-
LAYERNORM_EPS
|
128
|
-
LAYERNORM_RMS_EPS
|
129
|
-
GROUPNORM_EPS
|
130
|
-
GROUPNORM_GROUPS
|
131
|
-
CAUSAL
|
132
|
-
Q_LORA_RANK
|
133
|
-
KV_LORA_RANK
|
134
|
-
|
135
|
-
|
136
|
-
|
123
|
+
HEAD_COUNT = "{arch}.attention.head_count"
|
124
|
+
HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
|
125
|
+
MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
|
126
|
+
CLAMP_KQV = "{arch}.attention.clamp_kqv"
|
127
|
+
KEY_LENGTH = "{arch}.attention.key_length"
|
128
|
+
VALUE_LENGTH = "{arch}.attention.value_length"
|
129
|
+
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
130
|
+
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
131
|
+
GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
|
132
|
+
GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
|
133
|
+
CAUSAL = "{arch}.attention.causal"
|
134
|
+
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
135
|
+
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
136
|
+
DECAY_LORA_RANK = "{arch}.attention.decay_lora_rank"
|
137
|
+
ICLR_LORA_RANK = "{arch}.attention.iclr_lora_rank"
|
138
|
+
VALUE_RESIDUAL_MIX_LORA_RANK = "{arch}.attention.value_residual_mix_lora_rank"
|
139
|
+
GATE_LORA_RANK = "{arch}.attention.gate_lora_rank"
|
140
|
+
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
141
|
+
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
142
|
+
SCALE = "{arch}.attention.scale"
|
143
|
+
KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
|
144
|
+
VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
|
137
145
|
|
138
146
|
class Rope:
|
139
147
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
@@ -169,6 +177,9 @@ class Keys:
|
|
169
177
|
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
170
178
|
BLOCK_COUNT = "{arch}.convnext.block_count"
|
171
179
|
|
180
|
+
class Classifier:
|
181
|
+
OUTPUT_LABELS = "{arch}.classifier.output_labels"
|
182
|
+
|
172
183
|
class Tokenizer:
|
173
184
|
MODEL = "tokenizer.ggml.model"
|
174
185
|
PRE = "tokenizer.ggml.pre"
|
@@ -211,6 +222,47 @@ class Keys:
|
|
211
222
|
TYPE = "adapter.type"
|
212
223
|
LORA_ALPHA = "adapter.lora.alpha"
|
213
224
|
|
225
|
+
class Clip:
|
226
|
+
PROJECTOR_TYPE = "clip.projector_type"
|
227
|
+
HAS_VISION_ENCODER = "clip.has_vision_encoder"
|
228
|
+
HAS_AUDIO_ENCODER = "clip.has_audio_encoder"
|
229
|
+
HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
|
230
|
+
|
231
|
+
class ClipVision:
|
232
|
+
IMAGE_SIZE = "clip.vision.image_size"
|
233
|
+
PATCH_SIZE = "clip.vision.patch_size"
|
234
|
+
EMBEDDING_LENGTH = "clip.vision.embedding_length"
|
235
|
+
FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"
|
236
|
+
PROJECTION_DIM = "clip.vision.projection_dim"
|
237
|
+
BLOCK_COUNT = "clip.vision.block_count"
|
238
|
+
IMAGE_MEAN = "clip.vision.image_mean"
|
239
|
+
IMAGE_STD = "clip.vision.image_std"
|
240
|
+
SPATIAL_MERGE_SIZE = "clip.vision.spatial_merge_size"
|
241
|
+
USE_GELU = "clip.use_gelu"
|
242
|
+
USE_SILU = "clip.use_silu"
|
243
|
+
N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
|
244
|
+
|
245
|
+
class Attention:
|
246
|
+
HEAD_COUNT = "clip.vision.attention.head_count"
|
247
|
+
LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
|
248
|
+
|
249
|
+
class Projector:
|
250
|
+
SCALE_FACTOR = "clip.vision.projector.scale_factor"
|
251
|
+
|
252
|
+
class ClipAudio:
|
253
|
+
NUM_MEL_BINS = "clip.audio.num_mel_bins"
|
254
|
+
EMBEDDING_LENGTH = "clip.audio.embedding_length"
|
255
|
+
FEED_FORWARD_LENGTH = "clip.audio.feed_forward_length"
|
256
|
+
PROJECTION_DIM = "clip.audio.projection_dim"
|
257
|
+
BLOCK_COUNT = "clip.audio.block_count"
|
258
|
+
|
259
|
+
class Attention:
|
260
|
+
HEAD_COUNT = "clip.audio.attention.head_count"
|
261
|
+
LAYERNORM_EPS = "clip.audio.attention.layer_norm_epsilon"
|
262
|
+
|
263
|
+
class Projector:
|
264
|
+
STACK_FACTOR = "clip.audio.projector.stack_factor"
|
265
|
+
|
214
266
|
#
|
215
267
|
# recommended mapping of model tensor names for storage in gguf
|
216
268
|
#
|
@@ -219,10 +271,13 @@ class Keys:
|
|
219
271
|
class GGUFType:
|
220
272
|
MODEL = "model"
|
221
273
|
ADAPTER = "adapter"
|
274
|
+
MMPROJ = "mmproj" # dummy, unused for now
|
222
275
|
|
223
276
|
|
224
277
|
class MODEL_ARCH(IntEnum):
|
278
|
+
MMPROJ = auto() # dummy arch for clip.cpp
|
225
279
|
LLAMA = auto()
|
280
|
+
LLAMA4 = auto()
|
226
281
|
DECI = auto()
|
227
282
|
FALCON = auto()
|
228
283
|
BAICHUAN = auto()
|
@@ -235,6 +290,7 @@ class MODEL_ARCH(IntEnum):
|
|
235
290
|
REFACT = auto()
|
236
291
|
BERT = auto()
|
237
292
|
NOMIC_BERT = auto()
|
293
|
+
NOMIC_BERT_MOE = auto()
|
238
294
|
JINA_BERT_V2 = auto()
|
239
295
|
BLOOM = auto()
|
240
296
|
STABLELM = auto()
|
@@ -255,9 +311,12 @@ class MODEL_ARCH(IntEnum):
|
|
255
311
|
MINICPM3 = auto()
|
256
312
|
GEMMA = auto()
|
257
313
|
GEMMA2 = auto()
|
314
|
+
GEMMA3 = auto()
|
258
315
|
STARCODER2 = auto()
|
259
316
|
RWKV6 = auto()
|
260
317
|
RWKV6QWEN2 = auto()
|
318
|
+
RWKV7 = auto()
|
319
|
+
ARWKV7 = auto()
|
261
320
|
MAMBA = auto()
|
262
321
|
XVERSE = auto()
|
263
322
|
COMMAND_R = auto()
|
@@ -271,6 +330,7 @@ class MODEL_ARCH(IntEnum):
|
|
271
330
|
DEEPSEEK = auto()
|
272
331
|
DEEPSEEK2 = auto()
|
273
332
|
CHATGLM = auto()
|
333
|
+
GLM4 = auto()
|
274
334
|
BITNET = auto()
|
275
335
|
T5 = auto()
|
276
336
|
T5ENCODER = auto()
|
@@ -281,6 +341,20 @@ class MODEL_ARCH(IntEnum):
|
|
281
341
|
GRANITE_MOE = auto()
|
282
342
|
CHAMELEON = auto()
|
283
343
|
WAVTOKENIZER_DEC = auto()
|
344
|
+
PLM = auto()
|
345
|
+
BAILINGMOE = auto()
|
346
|
+
DOTS1 = auto()
|
347
|
+
ARCEE = auto()
|
348
|
+
|
349
|
+
|
350
|
+
class VISION_PROJECTOR_TYPE(IntEnum):
|
351
|
+
MLP = auto()
|
352
|
+
LDP = auto()
|
353
|
+
LDPV2 = auto()
|
354
|
+
RESAMPLER = auto()
|
355
|
+
GLM_EDGE = auto()
|
356
|
+
MERGER = auto()
|
357
|
+
GEMMA3 = auto()
|
284
358
|
|
285
359
|
|
286
360
|
class MODEL_TENSOR(IntEnum):
|
@@ -330,8 +404,20 @@ class MODEL_TENSOR(IntEnum):
|
|
330
404
|
SSM_A = auto()
|
331
405
|
SSM_D = auto()
|
332
406
|
SSM_OUT = auto()
|
407
|
+
TIME_MIX_W0 = auto()
|
333
408
|
TIME_MIX_W1 = auto()
|
334
409
|
TIME_MIX_W2 = auto()
|
410
|
+
TIME_MIX_A0 = auto()
|
411
|
+
TIME_MIX_A1 = auto()
|
412
|
+
TIME_MIX_A2 = auto()
|
413
|
+
TIME_MIX_V0 = auto()
|
414
|
+
TIME_MIX_V1 = auto()
|
415
|
+
TIME_MIX_V2 = auto()
|
416
|
+
TIME_MIX_G1 = auto()
|
417
|
+
TIME_MIX_G2 = auto()
|
418
|
+
TIME_MIX_K_K = auto()
|
419
|
+
TIME_MIX_K_A = auto()
|
420
|
+
TIME_MIX_R_K = auto()
|
335
421
|
TIME_MIX_LERP_X = auto()
|
336
422
|
TIME_MIX_LERP_K = auto()
|
337
423
|
TIME_MIX_LERP_V = auto()
|
@@ -358,6 +444,8 @@ class MODEL_TENSOR(IntEnum):
|
|
358
444
|
ATTN_Q_B = auto()
|
359
445
|
ATTN_KV_A_MQA = auto()
|
360
446
|
ATTN_KV_B = auto()
|
447
|
+
ATTN_K_B = auto()
|
448
|
+
ATTN_V_B = auto()
|
361
449
|
ATTN_Q_A_NORM = auto()
|
362
450
|
ATTN_KV_A_NORM = auto()
|
363
451
|
FFN_SUB_NORM = auto()
|
@@ -408,10 +496,70 @@ class MODEL_TENSOR(IntEnum):
|
|
408
496
|
POSNET_ATTN_K = auto()
|
409
497
|
POSNET_ATTN_V = auto()
|
410
498
|
POSNET_ATTN_OUT = auto()
|
499
|
+
# vision
|
500
|
+
V_MMPROJ = auto()
|
501
|
+
V_MMPROJ_FC = auto()
|
502
|
+
V_MMPROJ_MLP = auto()
|
503
|
+
V_MMPROJ_PEG = auto()
|
504
|
+
V_ENC_EMBD_CLS = auto()
|
505
|
+
V_ENC_EMBD_PATCH = auto()
|
506
|
+
V_ENC_EMBD_POS = auto()
|
507
|
+
V_ENC_INPUT_NORM = auto()
|
508
|
+
V_ENC_ATTN_Q = auto()
|
509
|
+
V_ENC_ATTN_Q_NORM = auto()
|
510
|
+
V_ENC_ATTN_K = auto()
|
511
|
+
V_ENC_ATTN_K_NORM = auto()
|
512
|
+
V_ENC_ATTN_V = auto()
|
513
|
+
V_ENC_ATTN_O = auto()
|
514
|
+
V_ENC_ATTN_O_NORM = auto()
|
515
|
+
V_ENC_POST_ATTN_NORM = auto()
|
516
|
+
V_ENC_FFN_UP = auto()
|
517
|
+
V_ENC_FFN_GATE = auto()
|
518
|
+
V_ENC_FFN_DOWN = auto()
|
519
|
+
V_LAYER_SCALE_1 = auto()
|
520
|
+
V_LAYER_SCALE_2 = auto()
|
521
|
+
V_PRE_NORM = auto()
|
522
|
+
V_POST_NORM = auto()
|
523
|
+
V_MM_INP_NORM = auto()
|
524
|
+
V_MM_INP_PROJ = auto() # gemma3
|
525
|
+
V_MM_SOFT_EMB_NORM = auto() # gemma3
|
526
|
+
V_RESMPL_POS_EMBD_K = auto() # minicpmv
|
527
|
+
V_RESMPL_ATTN_Q = auto() # minicpmv
|
528
|
+
V_RESMPL_ATTN_K = auto() # minicpmv
|
529
|
+
V_RESMPL_ATTN_V = auto() # minicpmv
|
530
|
+
V_RESMPL_ATTN_OUT = auto() # minicpmv
|
531
|
+
V_RESMPL_KV = auto() # minicpmv
|
532
|
+
V_RESMPL_KV_NORM = auto() # minicpmv
|
533
|
+
V_RESMPL_POST_NORM = auto() # minicpmv
|
534
|
+
V_RESMPL_Q_NORM = auto() # minicpmv
|
535
|
+
V_RESMPL_PROJ = auto() # minicpmv
|
536
|
+
V_RESMPL_QUERY = auto() # minicpmv
|
537
|
+
V_TOK_EMBD_IMG_BREAK = auto() # pixtral
|
538
|
+
V_MM_PATCH_MERGER = auto() # mistral small 3.1
|
539
|
+
# audio (mtmd)
|
540
|
+
A_ENC_EMBD_POS = auto()
|
541
|
+
A_ENC_CONV1D = auto()
|
542
|
+
A_PRE_NORM = auto()
|
543
|
+
A_POST_NORM = auto()
|
544
|
+
A_ENC_ATTN_Q = auto()
|
545
|
+
A_ENC_ATTN_K = auto()
|
546
|
+
A_ENC_ATTN_V = auto()
|
547
|
+
A_ENC_INPUT_NORM = auto()
|
548
|
+
A_ENC_OUTPUT = auto()
|
549
|
+
A_ENC_OUTPUT_NORM = auto()
|
550
|
+
A_ENC_FFN_UP = auto()
|
551
|
+
A_ENC_FFN_GATE = auto()
|
552
|
+
A_ENC_FFN_DOWN = auto()
|
553
|
+
A_MMPROJ = auto()
|
554
|
+
A_MMPROJ_FC = auto()
|
555
|
+
A_MM_NORM_PRE = auto()
|
556
|
+
A_MM_NORM_MID = auto()
|
411
557
|
|
412
558
|
|
413
559
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
560
|
+
MODEL_ARCH.MMPROJ: "clip", # dummy arch for clip.cpp
|
414
561
|
MODEL_ARCH.LLAMA: "llama",
|
562
|
+
MODEL_ARCH.LLAMA4: "llama4",
|
415
563
|
MODEL_ARCH.DECI: "deci",
|
416
564
|
MODEL_ARCH.FALCON: "falcon",
|
417
565
|
MODEL_ARCH.BAICHUAN: "baichuan",
|
@@ -424,6 +572,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
424
572
|
MODEL_ARCH.REFACT: "refact",
|
425
573
|
MODEL_ARCH.BERT: "bert",
|
426
574
|
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
575
|
+
MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
|
427
576
|
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
428
577
|
MODEL_ARCH.BLOOM: "bloom",
|
429
578
|
MODEL_ARCH.STABLELM: "stablelm",
|
@@ -444,9 +593,12 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
444
593
|
MODEL_ARCH.MINICPM3: "minicpm3",
|
445
594
|
MODEL_ARCH.GEMMA: "gemma",
|
446
595
|
MODEL_ARCH.GEMMA2: "gemma2",
|
596
|
+
MODEL_ARCH.GEMMA3: "gemma3",
|
447
597
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
448
598
|
MODEL_ARCH.RWKV6: "rwkv6",
|
449
599
|
MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
|
600
|
+
MODEL_ARCH.RWKV7: "rwkv7",
|
601
|
+
MODEL_ARCH.ARWKV7: "arwkv7",
|
450
602
|
MODEL_ARCH.MAMBA: "mamba",
|
451
603
|
MODEL_ARCH.XVERSE: "xverse",
|
452
604
|
MODEL_ARCH.COMMAND_R: "command-r",
|
@@ -460,6 +612,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
460
612
|
MODEL_ARCH.DEEPSEEK: "deepseek",
|
461
613
|
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
462
614
|
MODEL_ARCH.CHATGLM: "chatglm",
|
615
|
+
MODEL_ARCH.GLM4: "glm4",
|
463
616
|
MODEL_ARCH.BITNET: "bitnet",
|
464
617
|
MODEL_ARCH.T5: "t5",
|
465
618
|
MODEL_ARCH.T5ENCODER: "t5encoder",
|
@@ -470,6 +623,20 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
470
623
|
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
471
624
|
MODEL_ARCH.CHAMELEON: "chameleon",
|
472
625
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
626
|
+
MODEL_ARCH.PLM: "plm",
|
627
|
+
MODEL_ARCH.BAILINGMOE: "bailingmoe",
|
628
|
+
MODEL_ARCH.DOTS1: "dots1",
|
629
|
+
MODEL_ARCH.ARCEE: "arcee",
|
630
|
+
}
|
631
|
+
|
632
|
+
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
|
633
|
+
VISION_PROJECTOR_TYPE.MLP: "mlp",
|
634
|
+
VISION_PROJECTOR_TYPE.LDP: "ldp",
|
635
|
+
VISION_PROJECTOR_TYPE.LDPV2: "ldpv2",
|
636
|
+
VISION_PROJECTOR_TYPE.RESAMPLER: "resampler",
|
637
|
+
VISION_PROJECTOR_TYPE.GLM_EDGE: "adapter",
|
638
|
+
VISION_PROJECTOR_TYPE.MERGER: "qwen2vl_merger",
|
639
|
+
VISION_PROJECTOR_TYPE.GEMMA3: "gemma3",
|
473
640
|
}
|
474
641
|
|
475
642
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
@@ -519,8 +686,20 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
519
686
|
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
520
687
|
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
521
688
|
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
689
|
+
MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
|
522
690
|
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
523
691
|
MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
|
692
|
+
MODEL_TENSOR.TIME_MIX_A0: "blk.{bid}.time_mix_a0",
|
693
|
+
MODEL_TENSOR.TIME_MIX_A1: "blk.{bid}.time_mix_a1",
|
694
|
+
MODEL_TENSOR.TIME_MIX_A2: "blk.{bid}.time_mix_a2",
|
695
|
+
MODEL_TENSOR.TIME_MIX_V0: "blk.{bid}.time_mix_v0",
|
696
|
+
MODEL_TENSOR.TIME_MIX_V1: "blk.{bid}.time_mix_v1",
|
697
|
+
MODEL_TENSOR.TIME_MIX_V2: "blk.{bid}.time_mix_v2",
|
698
|
+
MODEL_TENSOR.TIME_MIX_G1: "blk.{bid}.time_mix_g1",
|
699
|
+
MODEL_TENSOR.TIME_MIX_G2: "blk.{bid}.time_mix_g2",
|
700
|
+
MODEL_TENSOR.TIME_MIX_K_K: "blk.{bid}.time_mix_k_k",
|
701
|
+
MODEL_TENSOR.TIME_MIX_K_A: "blk.{bid}.time_mix_k_a",
|
702
|
+
MODEL_TENSOR.TIME_MIX_R_K: "blk.{bid}.time_mix_r_k",
|
524
703
|
MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
|
525
704
|
MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
|
526
705
|
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
@@ -547,6 +726,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
547
726
|
MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
|
548
727
|
MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
|
549
728
|
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
|
729
|
+
MODEL_TENSOR.ATTN_K_B: "blk.{bid}.attn_k_b",
|
730
|
+
MODEL_TENSOR.ATTN_V_B: "blk.{bid}.attn_v_b",
|
550
731
|
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
|
551
732
|
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
|
552
733
|
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
|
@@ -597,9 +778,126 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
597
778
|
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
598
779
|
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
599
780
|
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
781
|
+
# vision
|
782
|
+
MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
|
783
|
+
MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
|
784
|
+
MODEL_TENSOR.V_MMPROJ_MLP: "mm.model.mlp.{bid}",
|
785
|
+
MODEL_TENSOR.V_MMPROJ_PEG: "mm.model.peg.{bid}",
|
786
|
+
MODEL_TENSOR.V_ENC_EMBD_CLS: "v.class_embd",
|
787
|
+
MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.patch_embd",
|
788
|
+
MODEL_TENSOR.V_ENC_EMBD_POS: "v.position_embd",
|
789
|
+
MODEL_TENSOR.V_ENC_ATTN_Q: "v.blk.{bid}.attn_q",
|
790
|
+
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: "v.blk.{bid}.attn_q_norm",
|
791
|
+
MODEL_TENSOR.V_ENC_ATTN_K: "v.blk.{bid}.attn_k",
|
792
|
+
MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
|
793
|
+
MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
|
794
|
+
MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
|
795
|
+
MODEL_TENSOR.V_ENC_ATTN_O: "v.blk.{bid}.attn_out",
|
796
|
+
MODEL_TENSOR.V_ENC_ATTN_O_NORM: "v.blk.{bid}.attn_out_norm",
|
797
|
+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: "v.blk.{bid}.ln2",
|
798
|
+
MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
|
799
|
+
MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
|
800
|
+
MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
|
801
|
+
MODEL_TENSOR.V_LAYER_SCALE_1: "v.blk.{bid}.ls1",
|
802
|
+
MODEL_TENSOR.V_LAYER_SCALE_2: "v.blk.{bid}.ls2",
|
803
|
+
MODEL_TENSOR.V_PRE_NORM: "v.pre_ln",
|
804
|
+
MODEL_TENSOR.V_POST_NORM: "v.post_ln",
|
805
|
+
MODEL_TENSOR.V_MM_INP_PROJ: "mm.input_projection",
|
806
|
+
MODEL_TENSOR.V_MM_INP_NORM: "mm.input_norm",
|
807
|
+
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: "mm.soft_emb_norm",
|
808
|
+
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "resampler.pos_embd_k",
|
809
|
+
MODEL_TENSOR.V_RESMPL_ATTN_Q: "resampler.attn.q",
|
810
|
+
MODEL_TENSOR.V_RESMPL_ATTN_K: "resampler.attn.k",
|
811
|
+
MODEL_TENSOR.V_RESMPL_ATTN_V: "resampler.attn.v",
|
812
|
+
MODEL_TENSOR.V_RESMPL_ATTN_OUT: "resampler.attn.out",
|
813
|
+
MODEL_TENSOR.V_RESMPL_KV: "resampler.kv",
|
814
|
+
MODEL_TENSOR.V_RESMPL_KV_NORM: "resampler.ln_kv",
|
815
|
+
MODEL_TENSOR.V_RESMPL_POST_NORM: "resampler.ln_post",
|
816
|
+
MODEL_TENSOR.V_RESMPL_Q_NORM: "resampler.ln_q",
|
817
|
+
MODEL_TENSOR.V_RESMPL_PROJ: "resampler.proj",
|
818
|
+
MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
|
819
|
+
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
|
820
|
+
MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
|
821
|
+
# audio (mtmd)
|
822
|
+
MODEL_TENSOR.A_ENC_EMBD_POS: "a.position_embd",
|
823
|
+
MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
|
824
|
+
MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
|
825
|
+
MODEL_TENSOR.A_POST_NORM: "a.post_ln",
|
826
|
+
MODEL_TENSOR.A_ENC_ATTN_Q: "a.blk.{bid}.attn_q",
|
827
|
+
MODEL_TENSOR.A_ENC_ATTN_K: "a.blk.{bid}.attn_k",
|
828
|
+
MODEL_TENSOR.A_ENC_ATTN_V: "a.blk.{bid}.attn_v",
|
829
|
+
MODEL_TENSOR.A_ENC_INPUT_NORM: "a.blk.{bid}.ln1",
|
830
|
+
MODEL_TENSOR.A_ENC_OUTPUT: "a.blk.{bid}.attn_out",
|
831
|
+
MODEL_TENSOR.A_ENC_OUTPUT_NORM: "a.blk.{bid}.ln2",
|
832
|
+
MODEL_TENSOR.A_ENC_FFN_UP: "a.blk.{bid}.ffn_up",
|
833
|
+
MODEL_TENSOR.A_ENC_FFN_GATE: "a.blk.{bid}.ffn_gate",
|
834
|
+
MODEL_TENSOR.A_ENC_FFN_DOWN: "a.blk.{bid}.ffn_down",
|
835
|
+
MODEL_TENSOR.A_MMPROJ: "mm.a.mlp.{bid}",
|
836
|
+
MODEL_TENSOR.A_MMPROJ_FC: "mm.a.fc",
|
837
|
+
MODEL_TENSOR.A_MM_NORM_PRE: "mm.a.norm_pre",
|
838
|
+
MODEL_TENSOR.A_MM_NORM_MID: "mm.a.norm_mid",
|
600
839
|
}
|
601
840
|
|
602
841
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
842
|
+
MODEL_ARCH.MMPROJ: [
|
843
|
+
MODEL_TENSOR.V_MMPROJ,
|
844
|
+
MODEL_TENSOR.V_MMPROJ_FC,
|
845
|
+
MODEL_TENSOR.V_MMPROJ_MLP,
|
846
|
+
MODEL_TENSOR.V_MMPROJ_PEG,
|
847
|
+
MODEL_TENSOR.V_ENC_EMBD_CLS,
|
848
|
+
MODEL_TENSOR.V_ENC_EMBD_PATCH,
|
849
|
+
MODEL_TENSOR.V_ENC_EMBD_POS,
|
850
|
+
MODEL_TENSOR.V_ENC_INPUT_NORM,
|
851
|
+
MODEL_TENSOR.V_ENC_ATTN_Q,
|
852
|
+
MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
|
853
|
+
MODEL_TENSOR.V_ENC_ATTN_K,
|
854
|
+
MODEL_TENSOR.V_ENC_ATTN_K_NORM,
|
855
|
+
MODEL_TENSOR.V_ENC_ATTN_V,
|
856
|
+
MODEL_TENSOR.V_ENC_ATTN_O,
|
857
|
+
MODEL_TENSOR.V_ENC_ATTN_O_NORM,
|
858
|
+
MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
|
859
|
+
MODEL_TENSOR.V_ENC_FFN_UP,
|
860
|
+
MODEL_TENSOR.V_ENC_FFN_GATE,
|
861
|
+
MODEL_TENSOR.V_ENC_FFN_DOWN,
|
862
|
+
MODEL_TENSOR.V_LAYER_SCALE_1,
|
863
|
+
MODEL_TENSOR.V_LAYER_SCALE_2,
|
864
|
+
MODEL_TENSOR.V_PRE_NORM,
|
865
|
+
MODEL_TENSOR.V_POST_NORM,
|
866
|
+
MODEL_TENSOR.V_MM_INP_PROJ,
|
867
|
+
MODEL_TENSOR.V_MM_INP_NORM,
|
868
|
+
MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
|
869
|
+
MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
|
870
|
+
MODEL_TENSOR.V_RESMPL_ATTN_Q,
|
871
|
+
MODEL_TENSOR.V_RESMPL_ATTN_K,
|
872
|
+
MODEL_TENSOR.V_RESMPL_ATTN_V,
|
873
|
+
MODEL_TENSOR.V_RESMPL_ATTN_OUT,
|
874
|
+
MODEL_TENSOR.V_RESMPL_KV,
|
875
|
+
MODEL_TENSOR.V_RESMPL_KV_NORM,
|
876
|
+
MODEL_TENSOR.V_RESMPL_POST_NORM,
|
877
|
+
MODEL_TENSOR.V_RESMPL_Q_NORM,
|
878
|
+
MODEL_TENSOR.V_RESMPL_PROJ,
|
879
|
+
MODEL_TENSOR.V_RESMPL_QUERY,
|
880
|
+
MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
|
881
|
+
MODEL_TENSOR.V_MM_PATCH_MERGER,
|
882
|
+
# audio
|
883
|
+
MODEL_TENSOR.A_ENC_EMBD_POS,
|
884
|
+
MODEL_TENSOR.A_ENC_CONV1D,
|
885
|
+
MODEL_TENSOR.A_PRE_NORM,
|
886
|
+
MODEL_TENSOR.A_POST_NORM,
|
887
|
+
MODEL_TENSOR.A_ENC_ATTN_Q,
|
888
|
+
MODEL_TENSOR.A_ENC_ATTN_K,
|
889
|
+
MODEL_TENSOR.A_ENC_ATTN_V,
|
890
|
+
MODEL_TENSOR.A_ENC_INPUT_NORM,
|
891
|
+
MODEL_TENSOR.A_ENC_OUTPUT,
|
892
|
+
MODEL_TENSOR.A_ENC_OUTPUT_NORM,
|
893
|
+
MODEL_TENSOR.A_ENC_FFN_UP,
|
894
|
+
MODEL_TENSOR.A_ENC_FFN_GATE,
|
895
|
+
MODEL_TENSOR.A_ENC_FFN_DOWN,
|
896
|
+
MODEL_TENSOR.A_MMPROJ,
|
897
|
+
MODEL_TENSOR.A_MMPROJ_FC,
|
898
|
+
MODEL_TENSOR.A_MM_NORM_PRE,
|
899
|
+
MODEL_TENSOR.A_MM_NORM_MID,
|
900
|
+
],
|
603
901
|
MODEL_ARCH.LLAMA: [
|
604
902
|
MODEL_TENSOR.TOKEN_EMBD,
|
605
903
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -620,6 +918,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
620
918
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
621
919
|
MODEL_TENSOR.FFN_UP_EXP,
|
622
920
|
],
|
921
|
+
MODEL_ARCH.LLAMA4: [
|
922
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
923
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
924
|
+
MODEL_TENSOR.OUTPUT,
|
925
|
+
MODEL_TENSOR.ROPE_FREQS,
|
926
|
+
MODEL_TENSOR.ATTN_NORM,
|
927
|
+
MODEL_TENSOR.ATTN_Q,
|
928
|
+
MODEL_TENSOR.ATTN_K,
|
929
|
+
MODEL_TENSOR.ATTN_V,
|
930
|
+
MODEL_TENSOR.ATTN_OUT,
|
931
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
932
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
933
|
+
MODEL_TENSOR.FFN_NORM,
|
934
|
+
MODEL_TENSOR.FFN_GATE,
|
935
|
+
MODEL_TENSOR.FFN_DOWN,
|
936
|
+
MODEL_TENSOR.FFN_UP,
|
937
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
938
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
939
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
940
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
941
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
942
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
943
|
+
],
|
623
944
|
MODEL_ARCH.DECI: [
|
624
945
|
MODEL_TENSOR.TOKEN_EMBD,
|
625
946
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -719,6 +1040,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
719
1040
|
MODEL_TENSOR.POS_EMBD,
|
720
1041
|
MODEL_TENSOR.OUTPUT_NORM,
|
721
1042
|
MODEL_TENSOR.ATTN_OUT_NORM,
|
1043
|
+
MODEL_TENSOR.ATTN_QKV,
|
722
1044
|
MODEL_TENSOR.ATTN_Q,
|
723
1045
|
MODEL_TENSOR.ATTN_K,
|
724
1046
|
MODEL_TENSOR.ATTN_V,
|
@@ -743,6 +1065,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
743
1065
|
MODEL_TENSOR.FFN_UP,
|
744
1066
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
745
1067
|
],
|
1068
|
+
MODEL_ARCH.NOMIC_BERT_MOE: [
|
1069
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1070
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1071
|
+
MODEL_TENSOR.TOKEN_TYPES,
|
1072
|
+
MODEL_TENSOR.POS_EMBD,
|
1073
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1074
|
+
MODEL_TENSOR.ATTN_OUT_NORM,
|
1075
|
+
MODEL_TENSOR.ATTN_QKV,
|
1076
|
+
MODEL_TENSOR.ATTN_OUT,
|
1077
|
+
MODEL_TENSOR.FFN_DOWN,
|
1078
|
+
MODEL_TENSOR.FFN_UP,
|
1079
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1080
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1081
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1082
|
+
MODEL_TENSOR.LAYER_OUT_NORM,
|
1083
|
+
],
|
746
1084
|
MODEL_ARCH.JINA_BERT_V2: [
|
747
1085
|
MODEL_TENSOR.TOKEN_EMBD,
|
748
1086
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
@@ -1115,6 +1453,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1115
1453
|
MODEL_TENSOR.FFN_PRE_NORM,
|
1116
1454
|
MODEL_TENSOR.FFN_POST_NORM,
|
1117
1455
|
],
|
1456
|
+
MODEL_ARCH.GEMMA3: [
|
1457
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1458
|
+
MODEL_TENSOR.OUTPUT,
|
1459
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1460
|
+
MODEL_TENSOR.ATTN_Q,
|
1461
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1462
|
+
MODEL_TENSOR.ATTN_K,
|
1463
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1464
|
+
MODEL_TENSOR.ATTN_V,
|
1465
|
+
MODEL_TENSOR.ATTN_OUT,
|
1466
|
+
MODEL_TENSOR.FFN_GATE,
|
1467
|
+
MODEL_TENSOR.FFN_DOWN,
|
1468
|
+
MODEL_TENSOR.FFN_UP,
|
1469
|
+
MODEL_TENSOR.ATTN_NORM,
|
1470
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
1471
|
+
MODEL_TENSOR.FFN_PRE_NORM,
|
1472
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
1473
|
+
],
|
1118
1474
|
MODEL_ARCH.STARCODER2: [
|
1119
1475
|
MODEL_TENSOR.TOKEN_EMBD,
|
1120
1476
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1191,6 +1547,68 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1191
1547
|
MODEL_TENSOR.FFN_DOWN,
|
1192
1548
|
MODEL_TENSOR.FFN_UP,
|
1193
1549
|
],
|
1550
|
+
MODEL_ARCH.RWKV7: [
|
1551
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1552
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1553
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1554
|
+
MODEL_TENSOR.OUTPUT,
|
1555
|
+
MODEL_TENSOR.ATTN_NORM,
|
1556
|
+
MODEL_TENSOR.ATTN_NORM_2,
|
1557
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1558
|
+
MODEL_TENSOR.TIME_MIX_W0,
|
1559
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1560
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1561
|
+
MODEL_TENSOR.TIME_MIX_A0,
|
1562
|
+
MODEL_TENSOR.TIME_MIX_A1,
|
1563
|
+
MODEL_TENSOR.TIME_MIX_A2,
|
1564
|
+
MODEL_TENSOR.TIME_MIX_V0,
|
1565
|
+
MODEL_TENSOR.TIME_MIX_V1,
|
1566
|
+
MODEL_TENSOR.TIME_MIX_V2,
|
1567
|
+
MODEL_TENSOR.TIME_MIX_G1,
|
1568
|
+
MODEL_TENSOR.TIME_MIX_G2,
|
1569
|
+
MODEL_TENSOR.TIME_MIX_K_K,
|
1570
|
+
MODEL_TENSOR.TIME_MIX_K_A,
|
1571
|
+
MODEL_TENSOR.TIME_MIX_R_K,
|
1572
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1573
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1574
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1575
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1576
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1577
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K,
|
1578
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY,
|
1579
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
1580
|
+
],
|
1581
|
+
MODEL_ARCH.ARWKV7: [
|
1582
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1583
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1584
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1585
|
+
MODEL_TENSOR.OUTPUT,
|
1586
|
+
MODEL_TENSOR.ATTN_NORM,
|
1587
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1588
|
+
MODEL_TENSOR.TIME_MIX_W0,
|
1589
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1590
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1591
|
+
MODEL_TENSOR.TIME_MIX_A0,
|
1592
|
+
MODEL_TENSOR.TIME_MIX_A1,
|
1593
|
+
MODEL_TENSOR.TIME_MIX_A2,
|
1594
|
+
MODEL_TENSOR.TIME_MIX_V0,
|
1595
|
+
MODEL_TENSOR.TIME_MIX_V1,
|
1596
|
+
MODEL_TENSOR.TIME_MIX_V2,
|
1597
|
+
MODEL_TENSOR.TIME_MIX_G1,
|
1598
|
+
MODEL_TENSOR.TIME_MIX_G2,
|
1599
|
+
MODEL_TENSOR.TIME_MIX_K_K,
|
1600
|
+
MODEL_TENSOR.TIME_MIX_K_A,
|
1601
|
+
MODEL_TENSOR.TIME_MIX_R_K,
|
1602
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1603
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1604
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1605
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1606
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1607
|
+
MODEL_TENSOR.FFN_NORM,
|
1608
|
+
MODEL_TENSOR.FFN_GATE,
|
1609
|
+
MODEL_TENSOR.FFN_DOWN,
|
1610
|
+
MODEL_TENSOR.FFN_UP,
|
1611
|
+
],
|
1194
1612
|
MODEL_ARCH.MAMBA: [
|
1195
1613
|
MODEL_TENSOR.TOKEN_EMBD,
|
1196
1614
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1371,6 +1789,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1371
1789
|
MODEL_TENSOR.ATTN_Q_B,
|
1372
1790
|
MODEL_TENSOR.ATTN_KV_A_MQA,
|
1373
1791
|
MODEL_TENSOR.ATTN_KV_B,
|
1792
|
+
MODEL_TENSOR.ATTN_K_B,
|
1793
|
+
MODEL_TENSOR.ATTN_V_B,
|
1374
1794
|
MODEL_TENSOR.ATTN_Q_A_NORM,
|
1375
1795
|
MODEL_TENSOR.ATTN_KV_A_NORM,
|
1376
1796
|
MODEL_TENSOR.ATTN_OUT,
|
@@ -1388,6 +1808,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1388
1808
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
1389
1809
|
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
1390
1810
|
],
|
1811
|
+
MODEL_ARCH.PLM: [
|
1812
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1813
|
+
MODEL_TENSOR.OUTPUT,
|
1814
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1815
|
+
MODEL_TENSOR.ATTN_NORM,
|
1816
|
+
MODEL_TENSOR.ATTN_Q,
|
1817
|
+
MODEL_TENSOR.ATTN_KV_A_MQA,
|
1818
|
+
MODEL_TENSOR.ATTN_KV_A_NORM,
|
1819
|
+
MODEL_TENSOR.ATTN_KV_B,
|
1820
|
+
MODEL_TENSOR.ATTN_OUT,
|
1821
|
+
MODEL_TENSOR.FFN_NORM,
|
1822
|
+
MODEL_TENSOR.FFN_UP,
|
1823
|
+
MODEL_TENSOR.FFN_DOWN,
|
1824
|
+
],
|
1391
1825
|
MODEL_ARCH.CHATGLM : [
|
1392
1826
|
MODEL_TENSOR.TOKEN_EMBD,
|
1393
1827
|
MODEL_TENSOR.ROPE_FREQS,
|
@@ -1403,6 +1837,23 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1403
1837
|
MODEL_TENSOR.FFN_DOWN,
|
1404
1838
|
MODEL_TENSOR.FFN_UP,
|
1405
1839
|
],
|
1840
|
+
MODEL_ARCH.GLM4 : [
|
1841
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1842
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1843
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1844
|
+
MODEL_TENSOR.OUTPUT,
|
1845
|
+
MODEL_TENSOR.ATTN_NORM,
|
1846
|
+
MODEL_TENSOR.ATTN_QKV,
|
1847
|
+
MODEL_TENSOR.ATTN_Q,
|
1848
|
+
MODEL_TENSOR.ATTN_K,
|
1849
|
+
MODEL_TENSOR.ATTN_V,
|
1850
|
+
MODEL_TENSOR.ATTN_OUT,
|
1851
|
+
MODEL_TENSOR.FFN_NORM,
|
1852
|
+
MODEL_TENSOR.FFN_DOWN,
|
1853
|
+
MODEL_TENSOR.FFN_UP,
|
1854
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
1855
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
1856
|
+
],
|
1406
1857
|
MODEL_ARCH.BITNET: [
|
1407
1858
|
MODEL_TENSOR.ATTN_Q,
|
1408
1859
|
MODEL_TENSOR.ATTN_K,
|
@@ -1536,6 +1987,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1536
1987
|
MODEL_TENSOR.FFN_GATE_EXP,
|
1537
1988
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
1538
1989
|
MODEL_TENSOR.FFN_UP_EXP,
|
1990
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
1991
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
1992
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
1539
1993
|
],
|
1540
1994
|
MODEL_ARCH.CHAMELEON: [
|
1541
1995
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -1575,6 +2029,64 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1575
2029
|
MODEL_TENSOR.POSNET_ATTN_V,
|
1576
2030
|
MODEL_TENSOR.POSNET_ATTN_OUT,
|
1577
2031
|
],
|
2032
|
+
MODEL_ARCH.BAILINGMOE: [
|
2033
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
2034
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
2035
|
+
MODEL_TENSOR.OUTPUT,
|
2036
|
+
MODEL_TENSOR.ROPE_FREQS,
|
2037
|
+
MODEL_TENSOR.ATTN_NORM,
|
2038
|
+
MODEL_TENSOR.ATTN_Q,
|
2039
|
+
MODEL_TENSOR.ATTN_K,
|
2040
|
+
MODEL_TENSOR.ATTN_V,
|
2041
|
+
MODEL_TENSOR.ATTN_OUT,
|
2042
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
2043
|
+
MODEL_TENSOR.FFN_NORM,
|
2044
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
2045
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
2046
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
2047
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
2048
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
2049
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
2050
|
+
],
|
2051
|
+
MODEL_ARCH.DOTS1: [
|
2052
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
2053
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
2054
|
+
MODEL_TENSOR.OUTPUT,
|
2055
|
+
MODEL_TENSOR.ATTN_NORM,
|
2056
|
+
MODEL_TENSOR.ATTN_Q,
|
2057
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
2058
|
+
MODEL_TENSOR.ATTN_K,
|
2059
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
2060
|
+
MODEL_TENSOR.ATTN_V,
|
2061
|
+
MODEL_TENSOR.ATTN_OUT,
|
2062
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
2063
|
+
MODEL_TENSOR.FFN_NORM,
|
2064
|
+
MODEL_TENSOR.FFN_GATE,
|
2065
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
2066
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
2067
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
2068
|
+
MODEL_TENSOR.FFN_DOWN,
|
2069
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
2070
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
2071
|
+
MODEL_TENSOR.FFN_UP,
|
2072
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
2073
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
2074
|
+
],
|
2075
|
+
MODEL_ARCH.ARCEE: [
|
2076
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
2077
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
2078
|
+
MODEL_TENSOR.OUTPUT,
|
2079
|
+
MODEL_TENSOR.ROPE_FREQS,
|
2080
|
+
MODEL_TENSOR.ATTN_NORM,
|
2081
|
+
MODEL_TENSOR.ATTN_Q,
|
2082
|
+
MODEL_TENSOR.ATTN_K,
|
2083
|
+
MODEL_TENSOR.ATTN_V,
|
2084
|
+
MODEL_TENSOR.ATTN_OUT,
|
2085
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
2086
|
+
MODEL_TENSOR.FFN_NORM,
|
2087
|
+
MODEL_TENSOR.FFN_DOWN,
|
2088
|
+
MODEL_TENSOR.FFN_UP,
|
2089
|
+
],
|
1578
2090
|
# TODO
|
1579
2091
|
}
|
1580
2092
|
|
@@ -1627,6 +2139,9 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1627
2139
|
MODEL_TENSOR.ROPE_FREQS,
|
1628
2140
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1629
2141
|
],
|
2142
|
+
MODEL_ARCH.BAILINGMOE: [
|
2143
|
+
MODEL_TENSOR.ROPE_FREQS,
|
2144
|
+
],
|
1630
2145
|
}
|
1631
2146
|
|
1632
2147
|
#
|
@@ -1654,6 +2169,8 @@ class PoolingType(IntEnum):
|
|
1654
2169
|
NONE = 0
|
1655
2170
|
MEAN = 1
|
1656
2171
|
CLS = 2
|
2172
|
+
LAST = 3
|
2173
|
+
RANK = 4
|
1657
2174
|
|
1658
2175
|
|
1659
2176
|
class GGMLQuantizationType(IntEnum):
|
@@ -1780,6 +2297,19 @@ class GGUFValueType(IntEnum):
|
|
1780
2297
|
raise ValueError(f"Unknown type: {type(val)}")
|
1781
2298
|
|
1782
2299
|
|
2300
|
+
class VisionProjectorType:
|
2301
|
+
GEMMA3 = "gemma3"
|
2302
|
+
IDEFICS3 = "idefics3"
|
2303
|
+
PIXTRAL = "pixtral"
|
2304
|
+
LLAMA4 = "llama4"
|
2305
|
+
QWEN2VL = "qwen2vl_merger"
|
2306
|
+
QWEN25VL = "qwen2.5vl_merger"
|
2307
|
+
ULTRAVOX = "ultravox"
|
2308
|
+
INTERNVL = "internvl"
|
2309
|
+
QWEN2A = "qwen2a" # audio
|
2310
|
+
QWEN25O = "qwen2.5o" # omni
|
2311
|
+
|
2312
|
+
|
1783
2313
|
# Items here are (block size, type size)
|
1784
2314
|
QK_K = 256
|
1785
2315
|
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|