bigdl-core-cpp 2.6.0b20250319__py3-none-win_amd64.whl → 2.6.0b20250321__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +687 -60
  2. bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
  3. bigdl/cpp/convert_lora_to_gguf.py +33 -5
  4. bigdl/cpp/gguf-py/gguf/constants.py +306 -123
  5. bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
  6. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
  7. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  8. bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
  9. bigdl/cpp/libs/common.lib +0 -0
  10. bigdl/cpp/libs/ggml-base.dll +0 -0
  11. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  12. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  13. bigdl/cpp/libs/ggml.dll +0 -0
  14. bigdl/cpp/libs/llama-batched.exe +0 -0
  15. bigdl/cpp/libs/llama-bench.exe +0 -0
  16. bigdl/cpp/libs/llama-cli.exe +0 -0
  17. bigdl/cpp/libs/llama-embedding.exe +0 -0
  18. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  19. bigdl/cpp/libs/llama-gguf.exe +0 -0
  20. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  21. bigdl/cpp/libs/llama-lookup.exe +0 -0
  22. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  23. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  25. bigdl/cpp/libs/llama-quantize.exe +0 -0
  26. bigdl/cpp/libs/llama-server.exe +0 -0
  27. bigdl/cpp/libs/llama-simple.exe +0 -0
  28. bigdl/cpp/libs/llama-speculative.exe +0 -0
  29. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  30. bigdl/cpp/libs/llama.dll +0 -0
  31. bigdl/cpp/libs/llava_shared.dll +0 -0
  32. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  33. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  34. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  35. bigdl/cpp/libs/ollama-lib.exe +0 -0
  36. bigdl/cpp/libs/ollama.exe +0 -0
  37. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  38. bigdl/cpp/libs/ollama_llama.dll +0 -0
  39. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  40. {bigdl_core_cpp-2.6.0b20250319.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/METADATA +2 -2
  41. bigdl_core_cpp-2.6.0b20250321.dist-info/RECORD +57 -0
  42. {bigdl_core_cpp-2.6.0b20250319.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/WHEEL +1 -1
  43. bigdl_core_cpp-2.6.0b20250319.dist-info/RECORD +0 -57
  44. {bigdl_core_cpp-2.6.0b20250319.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-llama-cpp.bat +0 -0
  45. {bigdl_core_cpp-2.6.0b20250319.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-llama-cpp.ps1 +0 -0
  46. {bigdl_core_cpp-2.6.0b20250319.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-ollama.bat +0 -0
  47. {bigdl_core_cpp-2.6.0b20250319.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/top_level.txt +0 -0
@@ -90,6 +90,7 @@ class Keys:
90
90
  VOCAB_SIZE = "{arch}.vocab_size"
91
91
  CONTEXT_LENGTH = "{arch}.context_length"
92
92
  EMBEDDING_LENGTH = "{arch}.embedding_length"
93
+ FEATURES_LENGTH = "{arch}.features_length"
93
94
  BLOCK_COUNT = "{arch}.block_count"
94
95
  LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
95
96
  FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
@@ -101,6 +102,8 @@ class Keys:
101
102
  EXPERT_USED_COUNT = "{arch}.expert_used_count"
102
103
  EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
103
104
  EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
105
+ EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106
+ EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
104
107
  POOLING_TYPE = "{arch}.pooling_type"
105
108
  LOGIT_SCALE = "{arch}.logit_scale"
106
109
  DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@@ -112,6 +115,7 @@ class Keys:
112
115
  TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
113
116
  RESIDUAL_SCALE = "{arch}.residual_scale"
114
117
  EMBEDDING_SCALE = "{arch}.embedding_scale"
118
+ TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
115
119
 
116
120
  class Attention:
117
121
  HEAD_COUNT = "{arch}.attention.head_count"
@@ -122,6 +126,8 @@ class Keys:
122
126
  VALUE_LENGTH = "{arch}.attention.value_length"
123
127
  LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
124
128
  LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
129
+ GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
130
+ GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
125
131
  CAUSAL = "{arch}.attention.causal"
126
132
  Q_LORA_RANK = "{arch}.attention.q_lora_rank"
127
133
  KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
@@ -155,6 +161,14 @@ class Keys:
155
161
  class WKV:
156
162
  HEAD_SIZE = "{arch}.wkv.head_size"
157
163
 
164
+ class PosNet:
165
+ EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
166
+ BLOCK_COUNT = "{arch}.posnet.block_count"
167
+
168
+ class ConvNext:
169
+ EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
170
+ BLOCK_COUNT = "{arch}.convnext.block_count"
171
+
158
172
  class Tokenizer:
159
173
  MODEL = "tokenizer.ggml.model"
160
174
  PRE = "tokenizer.ggml.pre"
@@ -170,7 +184,6 @@ class Keys:
170
184
  UNK_ID = "tokenizer.ggml.unknown_token_id"
171
185
  SEP_ID = "tokenizer.ggml.seperator_token_id"
172
186
  PAD_ID = "tokenizer.ggml.padding_token_id"
173
- CLS_ID = "tokenizer.ggml.cls_token_id"
174
187
  MASK_ID = "tokenizer.ggml.mask_token_id"
175
188
  ADD_BOS = "tokenizer.ggml.add_bos_token"
176
189
  ADD_EOS = "tokenizer.ggml.add_eos_token"
@@ -209,58 +222,63 @@ class GGUFType:
209
222
 
210
223
 
211
224
  class MODEL_ARCH(IntEnum):
212
- LLAMA = auto()
213
- FALCON = auto()
214
- BAICHUAN = auto()
215
- GROK = auto()
216
- GPT2 = auto()
217
- GPTJ = auto()
218
- GPTNEOX = auto()
219
- MPT = auto()
220
- STARCODER = auto()
221
- REFACT = auto()
222
- BERT = auto()
223
- NOMIC_BERT = auto()
224
- JINA_BERT_V2 = auto()
225
- BLOOM = auto()
226
- STABLELM = auto()
227
- QWEN = auto()
228
- QWEN2 = auto()
229
- QWEN2MOE = auto()
230
- QWEN2VL = auto()
231
- PHI2 = auto()
232
- PHI3 = auto()
233
- PLAMO = auto()
234
- CODESHELL = auto()
235
- ORION = auto()
236
- INTERNLM2 = auto()
237
- MINICPM = auto()
238
- MINICPM3 = auto()
239
- GEMMA = auto()
240
- GEMMA2 = auto()
241
- GEMMA3 = auto()
242
- STARCODER2 = auto()
243
- RWKV6 = auto()
244
- MAMBA = auto()
245
- XVERSE = auto()
246
- COMMAND_R = auto()
247
- DBRX = auto()
248
- OLMO = auto()
249
- OLMO2 = auto()
250
- OLMOE = auto()
251
- OPENELM = auto()
252
- ARCTIC = auto()
253
- DEEPSEEK2 = auto()
254
- CHATGLM = auto()
255
- BITNET = auto()
256
- T5 = auto()
257
- T5ENCODER = auto()
258
- JAIS = auto()
259
- NEMOTRON = auto()
260
- EXAONE = auto()
261
- GRANITE = auto()
262
- GRANITE_MOE = auto()
263
- CHAMELEON = auto()
225
+ LLAMA = auto()
226
+ DECI = auto()
227
+ FALCON = auto()
228
+ BAICHUAN = auto()
229
+ GROK = auto()
230
+ GPT2 = auto()
231
+ GPTJ = auto()
232
+ GPTNEOX = auto()
233
+ MPT = auto()
234
+ STARCODER = auto()
235
+ REFACT = auto()
236
+ BERT = auto()
237
+ NOMIC_BERT = auto()
238
+ JINA_BERT_V2 = auto()
239
+ BLOOM = auto()
240
+ STABLELM = auto()
241
+ QWEN = auto()
242
+ QWEN2 = auto()
243
+ QWEN2MOE = auto()
244
+ QWEN2VL = auto()
245
+ PHI2 = auto()
246
+ PHI3 = auto()
247
+ PHIMOE = auto()
248
+ PLAMO = auto()
249
+ CODESHELL = auto()
250
+ ORION = auto()
251
+ INTERNLM2 = auto()
252
+ MINICPM = auto()
253
+ MINICPM3 = auto()
254
+ GEMMA = auto()
255
+ GEMMA2 = auto()
256
+ STARCODER2 = auto()
257
+ RWKV6 = auto()
258
+ RWKV6QWEN2 = auto()
259
+ MAMBA = auto()
260
+ XVERSE = auto()
261
+ COMMAND_R = auto()
262
+ COHERE2 = auto()
263
+ DBRX = auto()
264
+ OLMO = auto()
265
+ OLMO2 = auto()
266
+ OLMOE = auto()
267
+ OPENELM = auto()
268
+ ARCTIC = auto()
269
+ DEEPSEEK = auto()
270
+ DEEPSEEK2 = auto()
271
+ CHATGLM = auto()
272
+ BITNET = auto()
273
+ T5 = auto()
274
+ T5ENCODER = auto()
275
+ JAIS = auto()
276
+ NEMOTRON = auto()
277
+ EXAONE = auto()
278
+ GRANITE = auto()
279
+ GRANITE_MOE = auto()
280
+ CHAMELEON = auto()
281
+ WAVTOKENIZER_DEC = auto()
264
282
 
265
283
 
266
284
  class MODEL_TENSOR(IntEnum):
@@ -299,6 +317,7 @@ class MODEL_TENSOR(IntEnum):
299
317
  FFN_GATE_SHEXP = auto()
300
318
  FFN_DOWN_SHEXP = auto()
301
319
  FFN_UP_SHEXP = auto()
320
+ FFN_EXP_PROBS_B = auto()
302
321
  ATTN_Q_NORM = auto()
303
322
  ATTN_K_NORM = auto()
304
323
  LAYER_OUT_NORM = auto()
@@ -316,6 +335,7 @@ class MODEL_TENSOR(IntEnum):
316
335
  TIME_MIX_LERP_V = auto()
317
336
  TIME_MIX_LERP_R = auto()
318
337
  TIME_MIX_LERP_G = auto()
338
+ TIME_MIX_LERP_FUSED = auto()
319
339
  TIME_MIX_LERP_W = auto()
320
340
  TIME_MIX_FIRST = auto()
321
341
  TIME_MIX_DECAY = auto()
@@ -370,61 +390,82 @@ class MODEL_TENSOR(IntEnum):
370
390
  ENC_OUTPUT_NORM = auto()
371
391
  CLS = auto() # classifier
372
392
  CLS_OUT = auto() # classifier output projection
393
+ CONV1D = auto()
394
+ CONVNEXT_DW = auto()
395
+ CONVNEXT_NORM = auto()
396
+ CONVNEXT_PW1 = auto()
397
+ CONVNEXT_PW2 = auto()
398
+ CONVNEXT_GAMMA = auto()
399
+ POSNET_CONV1 = auto()
400
+ POSNET_CONV2 = auto()
401
+ POSNET_NORM = auto()
402
+ POSNET_NORM1 = auto()
403
+ POSNET_NORM2 = auto()
404
+ POSNET_ATTN_NORM = auto()
405
+ POSNET_ATTN_Q = auto()
406
+ POSNET_ATTN_K = auto()
407
+ POSNET_ATTN_V = auto()
408
+ POSNET_ATTN_OUT = auto()
373
409
 
374
410
 
375
411
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
376
- MODEL_ARCH.LLAMA: "llama",
377
- MODEL_ARCH.FALCON: "falcon",
378
- MODEL_ARCH.BAICHUAN: "baichuan",
379
- MODEL_ARCH.GROK: "grok",
380
- MODEL_ARCH.GPT2: "gpt2",
381
- MODEL_ARCH.GPTJ: "gptj",
382
- MODEL_ARCH.GPTNEOX: "gptneox",
383
- MODEL_ARCH.MPT: "mpt",
384
- MODEL_ARCH.STARCODER: "starcoder",
385
- MODEL_ARCH.REFACT: "refact",
386
- MODEL_ARCH.BERT: "bert",
387
- MODEL_ARCH.NOMIC_BERT: "nomic-bert",
388
- MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
389
- MODEL_ARCH.BLOOM: "bloom",
390
- MODEL_ARCH.STABLELM: "stablelm",
391
- MODEL_ARCH.QWEN: "qwen",
392
- MODEL_ARCH.QWEN2: "qwen2",
393
- MODEL_ARCH.QWEN2MOE: "qwen2moe",
394
- MODEL_ARCH.QWEN2VL: "qwen2vl",
395
- MODEL_ARCH.PHI2: "phi2",
396
- MODEL_ARCH.PHI3: "phi3",
397
- MODEL_ARCH.PLAMO: "plamo",
398
- MODEL_ARCH.CODESHELL: "codeshell",
399
- MODEL_ARCH.ORION: "orion",
400
- MODEL_ARCH.INTERNLM2: "internlm2",
401
- MODEL_ARCH.MINICPM: "minicpm",
402
- MODEL_ARCH.MINICPM3: "minicpm3",
403
- MODEL_ARCH.GEMMA: "gemma",
404
- MODEL_ARCH.GEMMA2: "gemma2",
405
- MODEL_ARCH.GEMMA3: "gemma3",
406
- MODEL_ARCH.STARCODER2: "starcoder2",
407
- MODEL_ARCH.RWKV6: "rwkv6",
408
- MODEL_ARCH.MAMBA: "mamba",
409
- MODEL_ARCH.XVERSE: "xverse",
410
- MODEL_ARCH.COMMAND_R: "command-r",
411
- MODEL_ARCH.DBRX: "dbrx",
412
- MODEL_ARCH.OLMO: "olmo",
413
- MODEL_ARCH.OLMO2: "olmo2",
414
- MODEL_ARCH.OLMOE: "olmoe",
415
- MODEL_ARCH.OPENELM: "openelm",
416
- MODEL_ARCH.ARCTIC: "arctic",
417
- MODEL_ARCH.DEEPSEEK2: "deepseek2",
418
- MODEL_ARCH.CHATGLM: "chatglm",
419
- MODEL_ARCH.BITNET: "bitnet",
420
- MODEL_ARCH.T5: "t5",
421
- MODEL_ARCH.T5ENCODER: "t5encoder",
422
- MODEL_ARCH.JAIS: "jais",
423
- MODEL_ARCH.NEMOTRON: "nemotron",
424
- MODEL_ARCH.EXAONE: "exaone",
425
- MODEL_ARCH.GRANITE: "granite",
426
- MODEL_ARCH.GRANITE_MOE: "granitemoe",
427
- MODEL_ARCH.CHAMELEON: "chameleon",
412
+ MODEL_ARCH.LLAMA: "llama",
413
+ MODEL_ARCH.DECI: "deci",
414
+ MODEL_ARCH.FALCON: "falcon",
415
+ MODEL_ARCH.BAICHUAN: "baichuan",
416
+ MODEL_ARCH.GROK: "grok",
417
+ MODEL_ARCH.GPT2: "gpt2",
418
+ MODEL_ARCH.GPTJ: "gptj",
419
+ MODEL_ARCH.GPTNEOX: "gptneox",
420
+ MODEL_ARCH.MPT: "mpt",
421
+ MODEL_ARCH.STARCODER: "starcoder",
422
+ MODEL_ARCH.REFACT: "refact",
423
+ MODEL_ARCH.BERT: "bert",
424
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
425
+ MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
426
+ MODEL_ARCH.BLOOM: "bloom",
427
+ MODEL_ARCH.STABLELM: "stablelm",
428
+ MODEL_ARCH.QWEN: "qwen",
429
+ MODEL_ARCH.QWEN2: "qwen2",
430
+ MODEL_ARCH.QWEN2MOE: "qwen2moe",
431
+ MODEL_ARCH.QWEN2VL: "qwen2vl",
432
+ MODEL_ARCH.PHI2: "phi2",
433
+ MODEL_ARCH.PHI3: "phi3",
434
+ MODEL_ARCH.PHIMOE: "phimoe",
435
+ MODEL_ARCH.PLAMO: "plamo",
436
+ MODEL_ARCH.CODESHELL: "codeshell",
437
+ MODEL_ARCH.ORION: "orion",
438
+ MODEL_ARCH.INTERNLM2: "internlm2",
439
+ MODEL_ARCH.MINICPM: "minicpm",
440
+ MODEL_ARCH.MINICPM3: "minicpm3",
441
+ MODEL_ARCH.GEMMA: "gemma",
442
+ MODEL_ARCH.GEMMA2: "gemma2",
443
+ MODEL_ARCH.STARCODER2: "starcoder2",
444
+ MODEL_ARCH.RWKV6: "rwkv6",
445
+ MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
446
+ MODEL_ARCH.MAMBA: "mamba",
447
+ MODEL_ARCH.XVERSE: "xverse",
448
+ MODEL_ARCH.COMMAND_R: "command-r",
449
+ MODEL_ARCH.COHERE2: "cohere2",
450
+ MODEL_ARCH.DBRX: "dbrx",
451
+ MODEL_ARCH.OLMO: "olmo",
452
+ MODEL_ARCH.OLMO2: "olmo2",
453
+ MODEL_ARCH.OLMOE: "olmoe",
454
+ MODEL_ARCH.OPENELM: "openelm",
455
+ MODEL_ARCH.ARCTIC: "arctic",
456
+ MODEL_ARCH.DEEPSEEK: "deepseek",
457
+ MODEL_ARCH.DEEPSEEK2: "deepseek2",
458
+ MODEL_ARCH.CHATGLM: "chatglm",
459
+ MODEL_ARCH.BITNET: "bitnet",
460
+ MODEL_ARCH.T5: "t5",
461
+ MODEL_ARCH.T5ENCODER: "t5encoder",
462
+ MODEL_ARCH.JAIS: "jais",
463
+ MODEL_ARCH.NEMOTRON: "nemotron",
464
+ MODEL_ARCH.EXAONE: "exaone",
465
+ MODEL_ARCH.GRANITE: "granite",
466
+ MODEL_ARCH.GRANITE_MOE: "granitemoe",
467
+ MODEL_ARCH.CHAMELEON: "chameleon",
468
+ MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
428
469
  }
429
470
 
430
471
  TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -465,6 +506,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
465
506
  MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
466
507
  MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
467
508
  MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
509
+ MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
468
510
  MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
469
511
  MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
470
512
  MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
@@ -480,6 +522,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
480
522
  MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
481
523
  MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
482
524
  MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
525
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
483
526
  MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
484
527
  MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
485
528
  MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
@@ -534,6 +577,22 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
534
577
  MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
535
578
  MODEL_TENSOR.CLS: "cls",
536
579
  MODEL_TENSOR.CLS_OUT: "cls.output",
580
+ MODEL_TENSOR.CONV1D: "conv1d",
581
+ MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
582
+ MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
583
+ MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
584
+ MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
585
+ MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
586
+ MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
587
+ MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
588
+ MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
589
+ MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
590
+ MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
591
+ MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
592
+ MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
593
+ MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
594
+ MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
595
+ MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
537
596
  }
538
597
 
539
598
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -557,6 +616,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
557
616
  MODEL_TENSOR.FFN_DOWN_EXP,
558
617
  MODEL_TENSOR.FFN_UP_EXP,
559
618
  ],
619
+ MODEL_ARCH.DECI: [
620
+ MODEL_TENSOR.TOKEN_EMBD,
621
+ MODEL_TENSOR.OUTPUT_NORM,
622
+ MODEL_TENSOR.OUTPUT,
623
+ MODEL_TENSOR.ROPE_FREQS,
624
+ MODEL_TENSOR.ATTN_NORM,
625
+ MODEL_TENSOR.ATTN_Q,
626
+ MODEL_TENSOR.ATTN_K,
627
+ MODEL_TENSOR.ATTN_V,
628
+ MODEL_TENSOR.ATTN_OUT,
629
+ MODEL_TENSOR.ATTN_ROT_EMBD,
630
+ MODEL_TENSOR.FFN_GATE_INP,
631
+ MODEL_TENSOR.FFN_NORM,
632
+ MODEL_TENSOR.FFN_GATE,
633
+ MODEL_TENSOR.FFN_DOWN,
634
+ MODEL_TENSOR.FFN_UP,
635
+ MODEL_TENSOR.FFN_GATE_EXP,
636
+ MODEL_TENSOR.FFN_DOWN_EXP,
637
+ MODEL_TENSOR.FFN_UP_EXP,
638
+ ],
560
639
  MODEL_ARCH.GROK: [
561
640
  MODEL_TENSOR.TOKEN_EMBD,
562
641
  MODEL_TENSOR.OUTPUT_NORM,
@@ -867,6 +946,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
867
946
  MODEL_TENSOR.FFN_DOWN,
868
947
  MODEL_TENSOR.FFN_UP,
869
948
  ],
949
+ MODEL_ARCH.PHIMOE: [
950
+ MODEL_TENSOR.TOKEN_EMBD,
951
+ MODEL_TENSOR.OUTPUT_NORM,
952
+ MODEL_TENSOR.OUTPUT,
953
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
954
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
955
+ MODEL_TENSOR.ATTN_NORM,
956
+ MODEL_TENSOR.ATTN_QKV,
957
+ MODEL_TENSOR.ATTN_Q,
958
+ MODEL_TENSOR.ATTN_K,
959
+ MODEL_TENSOR.ATTN_V,
960
+ MODEL_TENSOR.ATTN_OUT,
961
+ MODEL_TENSOR.FFN_NORM,
962
+ MODEL_TENSOR.FFN_GATE_INP,
963
+ MODEL_TENSOR.FFN_GATE_EXP,
964
+ MODEL_TENSOR.FFN_DOWN_EXP,
965
+ MODEL_TENSOR.FFN_UP_EXP,
966
+ ],
870
967
  MODEL_ARCH.CODESHELL: [
871
968
  MODEL_TENSOR.TOKEN_EMBD,
872
969
  MODEL_TENSOR.POS_EMBD,
@@ -980,23 +1077,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
980
1077
  MODEL_TENSOR.FFN_PRE_NORM,
981
1078
  MODEL_TENSOR.FFN_POST_NORM,
982
1079
  ],
983
- MODEL_ARCH.GEMMA3: [
984
- MODEL_TENSOR.TOKEN_EMBD,
985
- MODEL_TENSOR.OUTPUT_NORM,
986
- MODEL_TENSOR.ATTN_Q,
987
- MODEL_TENSOR.ATTN_Q_NORM,
988
- MODEL_TENSOR.ATTN_K,
989
- MODEL_TENSOR.ATTN_K_NORM,
990
- MODEL_TENSOR.ATTN_V,
991
- MODEL_TENSOR.ATTN_OUT,
992
- MODEL_TENSOR.FFN_GATE,
993
- MODEL_TENSOR.FFN_DOWN,
994
- MODEL_TENSOR.FFN_UP,
995
- MODEL_TENSOR.ATTN_NORM,
996
- MODEL_TENSOR.ATTN_POST_NORM,
997
- MODEL_TENSOR.FFN_PRE_NORM,
998
- MODEL_TENSOR.FFN_POST_NORM,
999
- ],
1000
1080
  MODEL_ARCH.STARCODER2: [
1001
1081
  MODEL_TENSOR.TOKEN_EMBD,
1002
1082
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1027,6 +1107,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1027
1107
  MODEL_TENSOR.TIME_MIX_LERP_R,
1028
1108
  MODEL_TENSOR.TIME_MIX_LERP_G,
1029
1109
  MODEL_TENSOR.TIME_MIX_LERP_W,
1110
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1030
1111
  MODEL_TENSOR.TIME_MIX_FIRST,
1031
1112
  MODEL_TENSOR.TIME_MIX_DECAY,
1032
1113
  MODEL_TENSOR.TIME_MIX_DECAY_W1,
@@ -1043,6 +1124,35 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1043
1124
  MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
1044
1125
  MODEL_TENSOR.CHANNEL_MIX_VALUE,
1045
1126
  ],
1127
+ MODEL_ARCH.RWKV6QWEN2: [
1128
+ MODEL_TENSOR.TOKEN_EMBD,
1129
+ MODEL_TENSOR.OUTPUT_NORM,
1130
+ MODEL_TENSOR.OUTPUT,
1131
+ MODEL_TENSOR.ATTN_NORM,
1132
+ MODEL_TENSOR.TIME_MIX_W1,
1133
+ MODEL_TENSOR.TIME_MIX_W2,
1134
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1135
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1136
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1137
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1138
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1139
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1140
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1141
+ MODEL_TENSOR.TIME_MIX_FIRST,
1142
+ MODEL_TENSOR.TIME_MIX_DECAY,
1143
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1144
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1145
+ MODEL_TENSOR.TIME_MIX_KEY,
1146
+ MODEL_TENSOR.TIME_MIX_VALUE,
1147
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1148
+ MODEL_TENSOR.TIME_MIX_GATE,
1149
+ MODEL_TENSOR.TIME_MIX_LN,
1150
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1151
+ MODEL_TENSOR.FFN_NORM,
1152
+ MODEL_TENSOR.FFN_GATE,
1153
+ MODEL_TENSOR.FFN_DOWN,
1154
+ MODEL_TENSOR.FFN_UP,
1155
+ ],
1046
1156
  MODEL_ARCH.MAMBA: [
1047
1157
  MODEL_TENSOR.TOKEN_EMBD,
1048
1158
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1086,6 +1196,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1086
1196
  MODEL_TENSOR.ATTN_K_NORM,
1087
1197
  MODEL_TENSOR.ATTN_Q_NORM,
1088
1198
  ],
1199
+ MODEL_ARCH.COHERE2: [
1200
+ MODEL_TENSOR.TOKEN_EMBD,
1201
+ MODEL_TENSOR.OUTPUT_NORM,
1202
+ MODEL_TENSOR.ATTN_NORM,
1203
+ MODEL_TENSOR.ATTN_Q,
1204
+ MODEL_TENSOR.ATTN_K,
1205
+ MODEL_TENSOR.ATTN_V,
1206
+ MODEL_TENSOR.ATTN_OUT,
1207
+ MODEL_TENSOR.FFN_GATE,
1208
+ MODEL_TENSOR.FFN_DOWN,
1209
+ MODEL_TENSOR.FFN_UP,
1210
+ ],
1089
1211
  MODEL_ARCH.DBRX: [
1090
1212
  MODEL_TENSOR.TOKEN_EMBD,
1091
1213
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1177,6 +1299,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1177
1299
  MODEL_TENSOR.FFN_DOWN_EXP,
1178
1300
  MODEL_TENSOR.FFN_UP_EXP,
1179
1301
  ],
1302
+ MODEL_ARCH.DEEPSEEK: [
1303
+ MODEL_TENSOR.TOKEN_EMBD,
1304
+ MODEL_TENSOR.OUTPUT_NORM,
1305
+ MODEL_TENSOR.OUTPUT,
1306
+ MODEL_TENSOR.ROPE_FREQS,
1307
+ MODEL_TENSOR.ATTN_NORM,
1308
+ MODEL_TENSOR.ATTN_Q,
1309
+ MODEL_TENSOR.ATTN_K,
1310
+ MODEL_TENSOR.ATTN_V,
1311
+ MODEL_TENSOR.ATTN_OUT,
1312
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1313
+ MODEL_TENSOR.FFN_GATE_INP,
1314
+ MODEL_TENSOR.FFN_NORM,
1315
+ MODEL_TENSOR.FFN_GATE,
1316
+ MODEL_TENSOR.FFN_DOWN,
1317
+ MODEL_TENSOR.FFN_UP,
1318
+ MODEL_TENSOR.FFN_GATE_EXP,
1319
+ MODEL_TENSOR.FFN_DOWN_EXP,
1320
+ MODEL_TENSOR.FFN_UP_EXP,
1321
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1322
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1323
+ MODEL_TENSOR.FFN_UP_SHEXP,
1324
+ ],
1180
1325
  MODEL_ARCH.DEEPSEEK2: [
1181
1326
  MODEL_TENSOR.TOKEN_EMBD,
1182
1327
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1203,6 +1348,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1203
1348
  MODEL_TENSOR.FFN_GATE_SHEXP,
1204
1349
  MODEL_TENSOR.FFN_DOWN_SHEXP,
1205
1350
  MODEL_TENSOR.FFN_UP_SHEXP,
1351
+ MODEL_TENSOR.FFN_EXP_PROBS_B,
1206
1352
  ],
1207
1353
  MODEL_ARCH.CHATGLM : [
1208
1354
  MODEL_TENSOR.TOKEN_EMBD,
@@ -1211,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1211
1357
  MODEL_TENSOR.OUTPUT,
1212
1358
  MODEL_TENSOR.ATTN_NORM,
1213
1359
  MODEL_TENSOR.ATTN_QKV,
1360
+ MODEL_TENSOR.ATTN_Q,
1361
+ MODEL_TENSOR.ATTN_K,
1362
+ MODEL_TENSOR.ATTN_V,
1214
1363
  MODEL_TENSOR.ATTN_OUT,
1215
1364
  MODEL_TENSOR.FFN_NORM,
1216
1365
  MODEL_TENSOR.FFN_DOWN,
@@ -1366,6 +1515,28 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1366
1515
  MODEL_TENSOR.FFN_DOWN,
1367
1516
  MODEL_TENSOR.FFN_UP,
1368
1517
  ],
1518
+ MODEL_ARCH.WAVTOKENIZER_DEC: [
1519
+ MODEL_TENSOR.TOKEN_EMBD,
1520
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1521
+ MODEL_TENSOR.CONV1D,
1522
+ MODEL_TENSOR.CONVNEXT_DW,
1523
+ MODEL_TENSOR.CONVNEXT_NORM,
1524
+ MODEL_TENSOR.CONVNEXT_PW1,
1525
+ MODEL_TENSOR.CONVNEXT_PW2,
1526
+ MODEL_TENSOR.CONVNEXT_GAMMA,
1527
+ MODEL_TENSOR.OUTPUT,
1528
+ MODEL_TENSOR.OUTPUT_NORM,
1529
+ MODEL_TENSOR.POSNET_CONV1,
1530
+ MODEL_TENSOR.POSNET_CONV2,
1531
+ MODEL_TENSOR.POSNET_NORM,
1532
+ MODEL_TENSOR.POSNET_NORM1,
1533
+ MODEL_TENSOR.POSNET_NORM2,
1534
+ MODEL_TENSOR.POSNET_ATTN_NORM,
1535
+ MODEL_TENSOR.POSNET_ATTN_Q,
1536
+ MODEL_TENSOR.POSNET_ATTN_K,
1537
+ MODEL_TENSOR.POSNET_ATTN_V,
1538
+ MODEL_TENSOR.POSNET_ATTN_OUT,
1539
+ ],
1369
1540
  # TODO
1370
1541
  }
1371
1542
 
@@ -1375,6 +1546,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1375
1546
  MODEL_TENSOR.ROPE_FREQS,
1376
1547
  MODEL_TENSOR.ATTN_ROT_EMBD,
1377
1548
  ],
1549
+ MODEL_ARCH.DECI: [
1550
+ MODEL_TENSOR.ROPE_FREQS,
1551
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1552
+ ],
1378
1553
  MODEL_ARCH.BAICHUAN: [
1379
1554
  MODEL_TENSOR.ROPE_FREQS,
1380
1555
  MODEL_TENSOR.ATTN_ROT_EMBD,
@@ -1399,6 +1574,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1399
1574
  MODEL_TENSOR.ROPE_FREQS,
1400
1575
  MODEL_TENSOR.ATTN_ROT_EMBD,
1401
1576
  ],
1577
+ MODEL_ARCH.DEEPSEEK: [
1578
+ MODEL_TENSOR.ROPE_FREQS,
1579
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1580
+ ],
1402
1581
  MODEL_ARCH.DEEPSEEK2: [
1403
1582
  MODEL_TENSOR.ROPE_FREQS,
1404
1583
  MODEL_TENSOR.ATTN_ROT_EMBD,
@@ -1473,6 +1652,11 @@ class GGMLQuantizationType(IntEnum):
1473
1652
  TQ2_0 = 35
1474
1653
 
1475
1654
 
1655
+ class ExpertGatingFuncType(IntEnum):
1656
+ SOFTMAX = 1
1657
+ SIGMOID = 2
1658
+
1659
+
1476
1660
  # TODO: add GGMLFileType from ggml_ftype in ggml.h
1477
1661
 
1478
1662
 
@@ -1655,7 +1839,6 @@ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
1655
1839
  KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
1656
1840
  KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
1657
1841
  KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
1658
- KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
1659
1842
  KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
1660
1843
  KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
1661
1844
  KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV