bigdl-core-cpp 2.6.0b20250228__py3-none-win_amd64.whl → 2.6.0b20250231__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +687 -60
- bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
- bigdl/cpp/convert_lora_to_gguf.py +33 -5
- bigdl/cpp/gguf-py/gguf/constants.py +306 -104
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250228.dist-info → bigdl_core_cpp-2.6.0b20250231.dist-info}/METADATA +2 -2
- bigdl_core_cpp-2.6.0b20250231.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.6.0b20250228.dist-info → bigdl_core_cpp-2.6.0b20250231.dist-info}/WHEEL +1 -1
- bigdl_core_cpp-2.6.0b20250228.dist-info/RECORD +0 -56
- {bigdl_core_cpp-2.6.0b20250228.data → bigdl_core_cpp-2.6.0b20250231.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250228.data → bigdl_core_cpp-2.6.0b20250231.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250228.data → bigdl_core_cpp-2.6.0b20250231.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250228.dist-info → bigdl_core_cpp-2.6.0b20250231.dist-info}/top_level.txt +0 -0
@@ -90,6 +90,7 @@ class Keys:
|
|
90
90
|
VOCAB_SIZE = "{arch}.vocab_size"
|
91
91
|
CONTEXT_LENGTH = "{arch}.context_length"
|
92
92
|
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
93
|
+
FEATURES_LENGTH = "{arch}.features_length"
|
93
94
|
BLOCK_COUNT = "{arch}.block_count"
|
94
95
|
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
95
96
|
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
@@ -101,6 +102,8 @@ class Keys:
|
|
101
102
|
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
102
103
|
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
|
103
104
|
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
105
|
+
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
106
|
+
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
104
107
|
POOLING_TYPE = "{arch}.pooling_type"
|
105
108
|
LOGIT_SCALE = "{arch}.logit_scale"
|
106
109
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
@@ -112,6 +115,7 @@ class Keys:
|
|
112
115
|
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
113
116
|
RESIDUAL_SCALE = "{arch}.residual_scale"
|
114
117
|
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
118
|
+
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
|
115
119
|
|
116
120
|
class Attention:
|
117
121
|
HEAD_COUNT = "{arch}.attention.head_count"
|
@@ -122,6 +126,8 @@ class Keys:
|
|
122
126
|
VALUE_LENGTH = "{arch}.attention.value_length"
|
123
127
|
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
124
128
|
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
129
|
+
GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
|
130
|
+
GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
|
125
131
|
CAUSAL = "{arch}.attention.causal"
|
126
132
|
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
127
133
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
@@ -155,6 +161,14 @@ class Keys:
|
|
155
161
|
class WKV:
|
156
162
|
HEAD_SIZE = "{arch}.wkv.head_size"
|
157
163
|
|
164
|
+
class PosNet:
|
165
|
+
EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
|
166
|
+
BLOCK_COUNT = "{arch}.posnet.block_count"
|
167
|
+
|
168
|
+
class ConvNext:
|
169
|
+
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
170
|
+
BLOCK_COUNT = "{arch}.convnext.block_count"
|
171
|
+
|
158
172
|
class Tokenizer:
|
159
173
|
MODEL = "tokenizer.ggml.model"
|
160
174
|
PRE = "tokenizer.ggml.pre"
|
@@ -170,7 +184,6 @@ class Keys:
|
|
170
184
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
171
185
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
172
186
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
173
|
-
CLS_ID = "tokenizer.ggml.cls_token_id"
|
174
187
|
MASK_ID = "tokenizer.ggml.mask_token_id"
|
175
188
|
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
176
189
|
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
@@ -209,57 +222,63 @@ class GGUFType:
|
|
209
222
|
|
210
223
|
|
211
224
|
class MODEL_ARCH(IntEnum):
|
212
|
-
LLAMA
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
225
|
+
LLAMA = auto()
|
226
|
+
DECI = auto()
|
227
|
+
FALCON = auto()
|
228
|
+
BAICHUAN = auto()
|
229
|
+
GROK = auto()
|
230
|
+
GPT2 = auto()
|
231
|
+
GPTJ = auto()
|
232
|
+
GPTNEOX = auto()
|
233
|
+
MPT = auto()
|
234
|
+
STARCODER = auto()
|
235
|
+
REFACT = auto()
|
236
|
+
BERT = auto()
|
237
|
+
NOMIC_BERT = auto()
|
238
|
+
JINA_BERT_V2 = auto()
|
239
|
+
BLOOM = auto()
|
240
|
+
STABLELM = auto()
|
241
|
+
QWEN = auto()
|
242
|
+
QWEN2 = auto()
|
243
|
+
QWEN2MOE = auto()
|
244
|
+
QWEN2VL = auto()
|
245
|
+
PHI2 = auto()
|
246
|
+
PHI3 = auto()
|
247
|
+
PHIMOE = auto()
|
248
|
+
PLAMO = auto()
|
249
|
+
CODESHELL = auto()
|
250
|
+
ORION = auto()
|
251
|
+
INTERNLM2 = auto()
|
252
|
+
MINICPM = auto()
|
253
|
+
MINICPM3 = auto()
|
254
|
+
GEMMA = auto()
|
255
|
+
GEMMA2 = auto()
|
256
|
+
STARCODER2 = auto()
|
257
|
+
RWKV6 = auto()
|
258
|
+
RWKV6QWEN2 = auto()
|
259
|
+
MAMBA = auto()
|
260
|
+
XVERSE = auto()
|
261
|
+
COMMAND_R = auto()
|
262
|
+
COHERE2 = auto()
|
263
|
+
DBRX = auto()
|
264
|
+
OLMO = auto()
|
265
|
+
OLMO2 = auto()
|
266
|
+
OLMOE = auto()
|
267
|
+
OPENELM = auto()
|
268
|
+
ARCTIC = auto()
|
269
|
+
DEEPSEEK = auto()
|
270
|
+
DEEPSEEK2 = auto()
|
271
|
+
CHATGLM = auto()
|
272
|
+
BITNET = auto()
|
273
|
+
T5 = auto()
|
274
|
+
T5ENCODER = auto()
|
275
|
+
JAIS = auto()
|
276
|
+
NEMOTRON = auto()
|
277
|
+
EXAONE = auto()
|
278
|
+
GRANITE = auto()
|
279
|
+
GRANITE_MOE = auto()
|
280
|
+
CHAMELEON = auto()
|
281
|
+
WAVTOKENIZER_DEC = auto()
|
263
282
|
|
264
283
|
|
265
284
|
class MODEL_TENSOR(IntEnum):
|
@@ -298,6 +317,7 @@ class MODEL_TENSOR(IntEnum):
|
|
298
317
|
FFN_GATE_SHEXP = auto()
|
299
318
|
FFN_DOWN_SHEXP = auto()
|
300
319
|
FFN_UP_SHEXP = auto()
|
320
|
+
FFN_EXP_PROBS_B = auto()
|
301
321
|
ATTN_Q_NORM = auto()
|
302
322
|
ATTN_K_NORM = auto()
|
303
323
|
LAYER_OUT_NORM = auto()
|
@@ -315,6 +335,7 @@ class MODEL_TENSOR(IntEnum):
|
|
315
335
|
TIME_MIX_LERP_V = auto()
|
316
336
|
TIME_MIX_LERP_R = auto()
|
317
337
|
TIME_MIX_LERP_G = auto()
|
338
|
+
TIME_MIX_LERP_FUSED = auto()
|
318
339
|
TIME_MIX_LERP_W = auto()
|
319
340
|
TIME_MIX_FIRST = auto()
|
320
341
|
TIME_MIX_DECAY = auto()
|
@@ -369,60 +390,82 @@ class MODEL_TENSOR(IntEnum):
|
|
369
390
|
ENC_OUTPUT_NORM = auto()
|
370
391
|
CLS = auto() # classifier
|
371
392
|
CLS_OUT = auto() # classifier output projection
|
393
|
+
CONV1D = auto()
|
394
|
+
CONVNEXT_DW = auto()
|
395
|
+
CONVNEXT_NORM = auto()
|
396
|
+
CONVNEXT_PW1 = auto()
|
397
|
+
CONVNEXT_PW2 = auto()
|
398
|
+
CONVNEXT_GAMMA = auto()
|
399
|
+
POSNET_CONV1 = auto()
|
400
|
+
POSNET_CONV2 = auto()
|
401
|
+
POSNET_NORM = auto()
|
402
|
+
POSNET_NORM1 = auto()
|
403
|
+
POSNET_NORM2 = auto()
|
404
|
+
POSNET_ATTN_NORM = auto()
|
405
|
+
POSNET_ATTN_Q = auto()
|
406
|
+
POSNET_ATTN_K = auto()
|
407
|
+
POSNET_ATTN_V = auto()
|
408
|
+
POSNET_ATTN_OUT = auto()
|
372
409
|
|
373
410
|
|
374
411
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
375
|
-
MODEL_ARCH.LLAMA:
|
376
|
-
MODEL_ARCH.
|
377
|
-
MODEL_ARCH.
|
378
|
-
MODEL_ARCH.
|
379
|
-
MODEL_ARCH.
|
380
|
-
MODEL_ARCH.
|
381
|
-
MODEL_ARCH.
|
382
|
-
MODEL_ARCH.
|
383
|
-
MODEL_ARCH.
|
384
|
-
MODEL_ARCH.
|
385
|
-
MODEL_ARCH.
|
386
|
-
MODEL_ARCH.
|
387
|
-
MODEL_ARCH.
|
388
|
-
MODEL_ARCH.
|
389
|
-
MODEL_ARCH.
|
390
|
-
MODEL_ARCH.
|
391
|
-
MODEL_ARCH.
|
392
|
-
MODEL_ARCH.
|
393
|
-
MODEL_ARCH.
|
394
|
-
MODEL_ARCH.
|
395
|
-
MODEL_ARCH.
|
396
|
-
MODEL_ARCH.
|
397
|
-
MODEL_ARCH.
|
398
|
-
MODEL_ARCH.
|
399
|
-
MODEL_ARCH.
|
400
|
-
MODEL_ARCH.
|
401
|
-
MODEL_ARCH.
|
402
|
-
MODEL_ARCH.
|
403
|
-
MODEL_ARCH.
|
404
|
-
MODEL_ARCH.
|
405
|
-
MODEL_ARCH.
|
406
|
-
MODEL_ARCH.
|
407
|
-
MODEL_ARCH.
|
408
|
-
MODEL_ARCH.
|
409
|
-
MODEL_ARCH.
|
410
|
-
MODEL_ARCH.
|
411
|
-
MODEL_ARCH.
|
412
|
-
MODEL_ARCH.
|
413
|
-
MODEL_ARCH.
|
414
|
-
MODEL_ARCH.
|
415
|
-
MODEL_ARCH.
|
416
|
-
MODEL_ARCH.
|
417
|
-
MODEL_ARCH.
|
418
|
-
MODEL_ARCH.
|
419
|
-
MODEL_ARCH.
|
420
|
-
MODEL_ARCH.
|
421
|
-
MODEL_ARCH.
|
422
|
-
MODEL_ARCH.
|
423
|
-
MODEL_ARCH.
|
424
|
-
MODEL_ARCH.
|
425
|
-
MODEL_ARCH.
|
412
|
+
MODEL_ARCH.LLAMA: "llama",
|
413
|
+
MODEL_ARCH.DECI: "deci",
|
414
|
+
MODEL_ARCH.FALCON: "falcon",
|
415
|
+
MODEL_ARCH.BAICHUAN: "baichuan",
|
416
|
+
MODEL_ARCH.GROK: "grok",
|
417
|
+
MODEL_ARCH.GPT2: "gpt2",
|
418
|
+
MODEL_ARCH.GPTJ: "gptj",
|
419
|
+
MODEL_ARCH.GPTNEOX: "gptneox",
|
420
|
+
MODEL_ARCH.MPT: "mpt",
|
421
|
+
MODEL_ARCH.STARCODER: "starcoder",
|
422
|
+
MODEL_ARCH.REFACT: "refact",
|
423
|
+
MODEL_ARCH.BERT: "bert",
|
424
|
+
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
425
|
+
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
426
|
+
MODEL_ARCH.BLOOM: "bloom",
|
427
|
+
MODEL_ARCH.STABLELM: "stablelm",
|
428
|
+
MODEL_ARCH.QWEN: "qwen",
|
429
|
+
MODEL_ARCH.QWEN2: "qwen2",
|
430
|
+
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
431
|
+
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
432
|
+
MODEL_ARCH.PHI2: "phi2",
|
433
|
+
MODEL_ARCH.PHI3: "phi3",
|
434
|
+
MODEL_ARCH.PHIMOE: "phimoe",
|
435
|
+
MODEL_ARCH.PLAMO: "plamo",
|
436
|
+
MODEL_ARCH.CODESHELL: "codeshell",
|
437
|
+
MODEL_ARCH.ORION: "orion",
|
438
|
+
MODEL_ARCH.INTERNLM2: "internlm2",
|
439
|
+
MODEL_ARCH.MINICPM: "minicpm",
|
440
|
+
MODEL_ARCH.MINICPM3: "minicpm3",
|
441
|
+
MODEL_ARCH.GEMMA: "gemma",
|
442
|
+
MODEL_ARCH.GEMMA2: "gemma2",
|
443
|
+
MODEL_ARCH.STARCODER2: "starcoder2",
|
444
|
+
MODEL_ARCH.RWKV6: "rwkv6",
|
445
|
+
MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
|
446
|
+
MODEL_ARCH.MAMBA: "mamba",
|
447
|
+
MODEL_ARCH.XVERSE: "xverse",
|
448
|
+
MODEL_ARCH.COMMAND_R: "command-r",
|
449
|
+
MODEL_ARCH.COHERE2: "cohere2",
|
450
|
+
MODEL_ARCH.DBRX: "dbrx",
|
451
|
+
MODEL_ARCH.OLMO: "olmo",
|
452
|
+
MODEL_ARCH.OLMO2: "olmo2",
|
453
|
+
MODEL_ARCH.OLMOE: "olmoe",
|
454
|
+
MODEL_ARCH.OPENELM: "openelm",
|
455
|
+
MODEL_ARCH.ARCTIC: "arctic",
|
456
|
+
MODEL_ARCH.DEEPSEEK: "deepseek",
|
457
|
+
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
458
|
+
MODEL_ARCH.CHATGLM: "chatglm",
|
459
|
+
MODEL_ARCH.BITNET: "bitnet",
|
460
|
+
MODEL_ARCH.T5: "t5",
|
461
|
+
MODEL_ARCH.T5ENCODER: "t5encoder",
|
462
|
+
MODEL_ARCH.JAIS: "jais",
|
463
|
+
MODEL_ARCH.NEMOTRON: "nemotron",
|
464
|
+
MODEL_ARCH.EXAONE: "exaone",
|
465
|
+
MODEL_ARCH.GRANITE: "granite",
|
466
|
+
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
467
|
+
MODEL_ARCH.CHAMELEON: "chameleon",
|
468
|
+
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
426
469
|
}
|
427
470
|
|
428
471
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
@@ -463,6 +506,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
463
506
|
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
464
507
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
465
508
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
509
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
|
466
510
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
467
511
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
468
512
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
@@ -478,6 +522,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
478
522
|
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
479
523
|
MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
|
480
524
|
MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
|
525
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
|
481
526
|
MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
|
482
527
|
MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
|
483
528
|
MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
|
@@ -532,6 +577,22 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
532
577
|
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
533
578
|
MODEL_TENSOR.CLS: "cls",
|
534
579
|
MODEL_TENSOR.CLS_OUT: "cls.output",
|
580
|
+
MODEL_TENSOR.CONV1D: "conv1d",
|
581
|
+
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
|
582
|
+
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
|
583
|
+
MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
|
584
|
+
MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
|
585
|
+
MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
|
586
|
+
MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
|
587
|
+
MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
|
588
|
+
MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
|
589
|
+
MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
|
590
|
+
MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
|
591
|
+
MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
|
592
|
+
MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
|
593
|
+
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
594
|
+
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
595
|
+
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
535
596
|
}
|
536
597
|
|
537
598
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
@@ -555,6 +616,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
555
616
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
556
617
|
MODEL_TENSOR.FFN_UP_EXP,
|
557
618
|
],
|
619
|
+
MODEL_ARCH.DECI: [
|
620
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
621
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
622
|
+
MODEL_TENSOR.OUTPUT,
|
623
|
+
MODEL_TENSOR.ROPE_FREQS,
|
624
|
+
MODEL_TENSOR.ATTN_NORM,
|
625
|
+
MODEL_TENSOR.ATTN_Q,
|
626
|
+
MODEL_TENSOR.ATTN_K,
|
627
|
+
MODEL_TENSOR.ATTN_V,
|
628
|
+
MODEL_TENSOR.ATTN_OUT,
|
629
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
630
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
631
|
+
MODEL_TENSOR.FFN_NORM,
|
632
|
+
MODEL_TENSOR.FFN_GATE,
|
633
|
+
MODEL_TENSOR.FFN_DOWN,
|
634
|
+
MODEL_TENSOR.FFN_UP,
|
635
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
636
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
637
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
638
|
+
],
|
558
639
|
MODEL_ARCH.GROK: [
|
559
640
|
MODEL_TENSOR.TOKEN_EMBD,
|
560
641
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -865,6 +946,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
865
946
|
MODEL_TENSOR.FFN_DOWN,
|
866
947
|
MODEL_TENSOR.FFN_UP,
|
867
948
|
],
|
949
|
+
MODEL_ARCH.PHIMOE: [
|
950
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
951
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
952
|
+
MODEL_TENSOR.OUTPUT,
|
953
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
954
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
955
|
+
MODEL_TENSOR.ATTN_NORM,
|
956
|
+
MODEL_TENSOR.ATTN_QKV,
|
957
|
+
MODEL_TENSOR.ATTN_Q,
|
958
|
+
MODEL_TENSOR.ATTN_K,
|
959
|
+
MODEL_TENSOR.ATTN_V,
|
960
|
+
MODEL_TENSOR.ATTN_OUT,
|
961
|
+
MODEL_TENSOR.FFN_NORM,
|
962
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
963
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
964
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
965
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
966
|
+
],
|
868
967
|
MODEL_ARCH.CODESHELL: [
|
869
968
|
MODEL_TENSOR.TOKEN_EMBD,
|
870
969
|
MODEL_TENSOR.POS_EMBD,
|
@@ -1008,6 +1107,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1008
1107
|
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1009
1108
|
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1010
1109
|
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1110
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1011
1111
|
MODEL_TENSOR.TIME_MIX_FIRST,
|
1012
1112
|
MODEL_TENSOR.TIME_MIX_DECAY,
|
1013
1113
|
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
@@ -1024,6 +1124,35 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1024
1124
|
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
|
1025
1125
|
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
1026
1126
|
],
|
1127
|
+
MODEL_ARCH.RWKV6QWEN2: [
|
1128
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1129
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1130
|
+
MODEL_TENSOR.OUTPUT,
|
1131
|
+
MODEL_TENSOR.ATTN_NORM,
|
1132
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1133
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1134
|
+
MODEL_TENSOR.TIME_MIX_LERP_X,
|
1135
|
+
MODEL_TENSOR.TIME_MIX_LERP_K,
|
1136
|
+
MODEL_TENSOR.TIME_MIX_LERP_V,
|
1137
|
+
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1138
|
+
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1139
|
+
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1140
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1141
|
+
MODEL_TENSOR.TIME_MIX_FIRST,
|
1142
|
+
MODEL_TENSOR.TIME_MIX_DECAY,
|
1143
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
1144
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
1145
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1146
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1147
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1148
|
+
MODEL_TENSOR.TIME_MIX_GATE,
|
1149
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1150
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1151
|
+
MODEL_TENSOR.FFN_NORM,
|
1152
|
+
MODEL_TENSOR.FFN_GATE,
|
1153
|
+
MODEL_TENSOR.FFN_DOWN,
|
1154
|
+
MODEL_TENSOR.FFN_UP,
|
1155
|
+
],
|
1027
1156
|
MODEL_ARCH.MAMBA: [
|
1028
1157
|
MODEL_TENSOR.TOKEN_EMBD,
|
1029
1158
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1067,6 +1196,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1067
1196
|
MODEL_TENSOR.ATTN_K_NORM,
|
1068
1197
|
MODEL_TENSOR.ATTN_Q_NORM,
|
1069
1198
|
],
|
1199
|
+
MODEL_ARCH.COHERE2: [
|
1200
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1201
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1202
|
+
MODEL_TENSOR.ATTN_NORM,
|
1203
|
+
MODEL_TENSOR.ATTN_Q,
|
1204
|
+
MODEL_TENSOR.ATTN_K,
|
1205
|
+
MODEL_TENSOR.ATTN_V,
|
1206
|
+
MODEL_TENSOR.ATTN_OUT,
|
1207
|
+
MODEL_TENSOR.FFN_GATE,
|
1208
|
+
MODEL_TENSOR.FFN_DOWN,
|
1209
|
+
MODEL_TENSOR.FFN_UP,
|
1210
|
+
],
|
1070
1211
|
MODEL_ARCH.DBRX: [
|
1071
1212
|
MODEL_TENSOR.TOKEN_EMBD,
|
1072
1213
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1158,6 +1299,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1158
1299
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
1159
1300
|
MODEL_TENSOR.FFN_UP_EXP,
|
1160
1301
|
],
|
1302
|
+
MODEL_ARCH.DEEPSEEK: [
|
1303
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1304
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1305
|
+
MODEL_TENSOR.OUTPUT,
|
1306
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1307
|
+
MODEL_TENSOR.ATTN_NORM,
|
1308
|
+
MODEL_TENSOR.ATTN_Q,
|
1309
|
+
MODEL_TENSOR.ATTN_K,
|
1310
|
+
MODEL_TENSOR.ATTN_V,
|
1311
|
+
MODEL_TENSOR.ATTN_OUT,
|
1312
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1313
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1314
|
+
MODEL_TENSOR.FFN_NORM,
|
1315
|
+
MODEL_TENSOR.FFN_GATE,
|
1316
|
+
MODEL_TENSOR.FFN_DOWN,
|
1317
|
+
MODEL_TENSOR.FFN_UP,
|
1318
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1319
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1320
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1321
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
1322
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
1323
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
1324
|
+
],
|
1161
1325
|
MODEL_ARCH.DEEPSEEK2: [
|
1162
1326
|
MODEL_TENSOR.TOKEN_EMBD,
|
1163
1327
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1184,6 +1348,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1184
1348
|
MODEL_TENSOR.FFN_GATE_SHEXP,
|
1185
1349
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
1186
1350
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
1351
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
1187
1352
|
],
|
1188
1353
|
MODEL_ARCH.CHATGLM : [
|
1189
1354
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -1192,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1192
1357
|
MODEL_TENSOR.OUTPUT,
|
1193
1358
|
MODEL_TENSOR.ATTN_NORM,
|
1194
1359
|
MODEL_TENSOR.ATTN_QKV,
|
1360
|
+
MODEL_TENSOR.ATTN_Q,
|
1361
|
+
MODEL_TENSOR.ATTN_K,
|
1362
|
+
MODEL_TENSOR.ATTN_V,
|
1195
1363
|
MODEL_TENSOR.ATTN_OUT,
|
1196
1364
|
MODEL_TENSOR.FFN_NORM,
|
1197
1365
|
MODEL_TENSOR.FFN_DOWN,
|
@@ -1347,6 +1515,28 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1347
1515
|
MODEL_TENSOR.FFN_DOWN,
|
1348
1516
|
MODEL_TENSOR.FFN_UP,
|
1349
1517
|
],
|
1518
|
+
MODEL_ARCH.WAVTOKENIZER_DEC: [
|
1519
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1520
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1521
|
+
MODEL_TENSOR.CONV1D,
|
1522
|
+
MODEL_TENSOR.CONVNEXT_DW,
|
1523
|
+
MODEL_TENSOR.CONVNEXT_NORM,
|
1524
|
+
MODEL_TENSOR.CONVNEXT_PW1,
|
1525
|
+
MODEL_TENSOR.CONVNEXT_PW2,
|
1526
|
+
MODEL_TENSOR.CONVNEXT_GAMMA,
|
1527
|
+
MODEL_TENSOR.OUTPUT,
|
1528
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1529
|
+
MODEL_TENSOR.POSNET_CONV1,
|
1530
|
+
MODEL_TENSOR.POSNET_CONV2,
|
1531
|
+
MODEL_TENSOR.POSNET_NORM,
|
1532
|
+
MODEL_TENSOR.POSNET_NORM1,
|
1533
|
+
MODEL_TENSOR.POSNET_NORM2,
|
1534
|
+
MODEL_TENSOR.POSNET_ATTN_NORM,
|
1535
|
+
MODEL_TENSOR.POSNET_ATTN_Q,
|
1536
|
+
MODEL_TENSOR.POSNET_ATTN_K,
|
1537
|
+
MODEL_TENSOR.POSNET_ATTN_V,
|
1538
|
+
MODEL_TENSOR.POSNET_ATTN_OUT,
|
1539
|
+
],
|
1350
1540
|
# TODO
|
1351
1541
|
}
|
1352
1542
|
|
@@ -1356,6 +1546,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1356
1546
|
MODEL_TENSOR.ROPE_FREQS,
|
1357
1547
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1358
1548
|
],
|
1549
|
+
MODEL_ARCH.DECI: [
|
1550
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1551
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1552
|
+
],
|
1359
1553
|
MODEL_ARCH.BAICHUAN: [
|
1360
1554
|
MODEL_TENSOR.ROPE_FREQS,
|
1361
1555
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
@@ -1380,6 +1574,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1380
1574
|
MODEL_TENSOR.ROPE_FREQS,
|
1381
1575
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1382
1576
|
],
|
1577
|
+
MODEL_ARCH.DEEPSEEK: [
|
1578
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1579
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1580
|
+
],
|
1383
1581
|
MODEL_ARCH.DEEPSEEK2: [
|
1384
1582
|
MODEL_TENSOR.ROPE_FREQS,
|
1385
1583
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
@@ -1454,6 +1652,11 @@ class GGMLQuantizationType(IntEnum):
|
|
1454
1652
|
TQ2_0 = 35
|
1455
1653
|
|
1456
1654
|
|
1655
|
+
class ExpertGatingFuncType(IntEnum):
|
1656
|
+
SOFTMAX = 1
|
1657
|
+
SIGMOID = 2
|
1658
|
+
|
1659
|
+
|
1457
1660
|
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
1458
1661
|
|
1459
1662
|
|
@@ -1636,7 +1839,6 @@ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
|
1636
1839
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
1637
1840
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
1638
1841
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
1639
|
-
KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
1640
1842
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
1641
1843
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
1642
1844
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
@@ -26,6 +26,7 @@ from .constants import (
|
|
26
26
|
RopeScalingType,
|
27
27
|
PoolingType,
|
28
28
|
TokenType,
|
29
|
+
ExpertGatingFuncType,
|
29
30
|
)
|
30
31
|
|
31
32
|
from .quants import quant_shape_from_byte_shape
|
@@ -631,6 +632,21 @@ class GGUFWriter:
|
|
631
632
|
def add_embedding_length(self, length: int) -> None:
|
632
633
|
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
633
634
|
|
635
|
+
def add_features_length(self, length: int) -> None:
|
636
|
+
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
|
637
|
+
|
638
|
+
def add_posnet_embedding_length(self, length: int) -> None:
|
639
|
+
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
640
|
+
|
641
|
+
def add_posnet_block_count(self, length: int) -> None:
|
642
|
+
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
|
643
|
+
|
644
|
+
def add_convnext_embedding_length(self, length: int) -> None:
|
645
|
+
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
646
|
+
|
647
|
+
def add_convnext_block_count(self, length: int) -> None:
|
648
|
+
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
649
|
+
|
634
650
|
def add_block_count(self, length: int) -> None:
|
635
651
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
636
652
|
|
@@ -700,6 +716,12 @@ class GGUFWriter:
|
|
700
716
|
def add_expert_weights_scale(self, value: float) -> None:
|
701
717
|
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
|
702
718
|
|
719
|
+
def add_expert_weights_norm(self, value: bool) -> None:
|
720
|
+
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
|
721
|
+
|
722
|
+
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
723
|
+
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
724
|
+
|
703
725
|
def add_swin_norm(self, value: bool) -> None:
|
704
726
|
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
705
727
|
|
@@ -721,12 +743,21 @@ class GGUFWriter:
|
|
721
743
|
def add_wkv_head_size(self, size: int) -> None:
|
722
744
|
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
723
745
|
|
746
|
+
def add_token_shift_count(self, count: int) -> None:
|
747
|
+
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
|
748
|
+
|
724
749
|
def add_layer_norm_eps(self, value: float) -> None:
|
725
750
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
726
751
|
|
727
752
|
def add_layer_norm_rms_eps(self, value: float) -> None:
|
728
753
|
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
729
754
|
|
755
|
+
def add_group_norm_eps(self, value: float) -> None:
|
756
|
+
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
|
757
|
+
|
758
|
+
def add_group_norm_groups(self, value: int) -> None:
|
759
|
+
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
|
760
|
+
|
730
761
|
def add_causal_attention(self, value: bool) -> None:
|
731
762
|
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
732
763
|
|
@@ -826,9 +857,6 @@ class GGUFWriter:
|
|
826
857
|
def add_pad_token_id(self, id: int) -> None:
|
827
858
|
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
828
859
|
|
829
|
-
def add_cls_token_id(self, id: int) -> None:
|
830
|
-
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
|
831
|
-
|
832
860
|
def add_mask_token_id(self, id: int) -> None:
|
833
861
|
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
834
862
|
|