bigdl-core-cpp 2.6.0b20250320__py3-none-win_amd64.whl → 2.6.0b20250322__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +687 -60
- bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
- bigdl/cpp/convert_lora_to_gguf.py +33 -5
- bigdl/cpp/gguf-py/gguf/constants.py +306 -123
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.6.0b20250322.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/WHEEL +1 -1
- bigdl_core_cpp-2.6.0b20250320.dist-info/RECORD +0 -57
- {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/top_level.txt +0 -0
@@ -90,6 +90,7 @@ class Keys:
|
|
90
90
|
VOCAB_SIZE = "{arch}.vocab_size"
|
91
91
|
CONTEXT_LENGTH = "{arch}.context_length"
|
92
92
|
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
93
|
+
FEATURES_LENGTH = "{arch}.features_length"
|
93
94
|
BLOCK_COUNT = "{arch}.block_count"
|
94
95
|
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
95
96
|
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
@@ -101,6 +102,8 @@ class Keys:
|
|
101
102
|
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
102
103
|
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
|
103
104
|
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
105
|
+
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
106
|
+
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
104
107
|
POOLING_TYPE = "{arch}.pooling_type"
|
105
108
|
LOGIT_SCALE = "{arch}.logit_scale"
|
106
109
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
@@ -112,6 +115,7 @@ class Keys:
|
|
112
115
|
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
113
116
|
RESIDUAL_SCALE = "{arch}.residual_scale"
|
114
117
|
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
118
|
+
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
|
115
119
|
|
116
120
|
class Attention:
|
117
121
|
HEAD_COUNT = "{arch}.attention.head_count"
|
@@ -122,6 +126,8 @@ class Keys:
|
|
122
126
|
VALUE_LENGTH = "{arch}.attention.value_length"
|
123
127
|
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
124
128
|
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
129
|
+
GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
|
130
|
+
GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
|
125
131
|
CAUSAL = "{arch}.attention.causal"
|
126
132
|
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
127
133
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
@@ -155,6 +161,14 @@ class Keys:
|
|
155
161
|
class WKV:
|
156
162
|
HEAD_SIZE = "{arch}.wkv.head_size"
|
157
163
|
|
164
|
+
class PosNet:
|
165
|
+
EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
|
166
|
+
BLOCK_COUNT = "{arch}.posnet.block_count"
|
167
|
+
|
168
|
+
class ConvNext:
|
169
|
+
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
170
|
+
BLOCK_COUNT = "{arch}.convnext.block_count"
|
171
|
+
|
158
172
|
class Tokenizer:
|
159
173
|
MODEL = "tokenizer.ggml.model"
|
160
174
|
PRE = "tokenizer.ggml.pre"
|
@@ -170,7 +184,6 @@ class Keys:
|
|
170
184
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
171
185
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
172
186
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
173
|
-
CLS_ID = "tokenizer.ggml.cls_token_id"
|
174
187
|
MASK_ID = "tokenizer.ggml.mask_token_id"
|
175
188
|
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
176
189
|
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
@@ -209,58 +222,63 @@ class GGUFType:
|
|
209
222
|
|
210
223
|
|
211
224
|
class MODEL_ARCH(IntEnum):
|
212
|
-
LLAMA
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
225
|
+
LLAMA = auto()
|
226
|
+
DECI = auto()
|
227
|
+
FALCON = auto()
|
228
|
+
BAICHUAN = auto()
|
229
|
+
GROK = auto()
|
230
|
+
GPT2 = auto()
|
231
|
+
GPTJ = auto()
|
232
|
+
GPTNEOX = auto()
|
233
|
+
MPT = auto()
|
234
|
+
STARCODER = auto()
|
235
|
+
REFACT = auto()
|
236
|
+
BERT = auto()
|
237
|
+
NOMIC_BERT = auto()
|
238
|
+
JINA_BERT_V2 = auto()
|
239
|
+
BLOOM = auto()
|
240
|
+
STABLELM = auto()
|
241
|
+
QWEN = auto()
|
242
|
+
QWEN2 = auto()
|
243
|
+
QWEN2MOE = auto()
|
244
|
+
QWEN2VL = auto()
|
245
|
+
PHI2 = auto()
|
246
|
+
PHI3 = auto()
|
247
|
+
PHIMOE = auto()
|
248
|
+
PLAMO = auto()
|
249
|
+
CODESHELL = auto()
|
250
|
+
ORION = auto()
|
251
|
+
INTERNLM2 = auto()
|
252
|
+
MINICPM = auto()
|
253
|
+
MINICPM3 = auto()
|
254
|
+
GEMMA = auto()
|
255
|
+
GEMMA2 = auto()
|
256
|
+
STARCODER2 = auto()
|
257
|
+
RWKV6 = auto()
|
258
|
+
RWKV6QWEN2 = auto()
|
259
|
+
MAMBA = auto()
|
260
|
+
XVERSE = auto()
|
261
|
+
COMMAND_R = auto()
|
262
|
+
COHERE2 = auto()
|
263
|
+
DBRX = auto()
|
264
|
+
OLMO = auto()
|
265
|
+
OLMO2 = auto()
|
266
|
+
OLMOE = auto()
|
267
|
+
OPENELM = auto()
|
268
|
+
ARCTIC = auto()
|
269
|
+
DEEPSEEK = auto()
|
270
|
+
DEEPSEEK2 = auto()
|
271
|
+
CHATGLM = auto()
|
272
|
+
BITNET = auto()
|
273
|
+
T5 = auto()
|
274
|
+
T5ENCODER = auto()
|
275
|
+
JAIS = auto()
|
276
|
+
NEMOTRON = auto()
|
277
|
+
EXAONE = auto()
|
278
|
+
GRANITE = auto()
|
279
|
+
GRANITE_MOE = auto()
|
280
|
+
CHAMELEON = auto()
|
281
|
+
WAVTOKENIZER_DEC = auto()
|
264
282
|
|
265
283
|
|
266
284
|
class MODEL_TENSOR(IntEnum):
|
@@ -299,6 +317,7 @@ class MODEL_TENSOR(IntEnum):
|
|
299
317
|
FFN_GATE_SHEXP = auto()
|
300
318
|
FFN_DOWN_SHEXP = auto()
|
301
319
|
FFN_UP_SHEXP = auto()
|
320
|
+
FFN_EXP_PROBS_B = auto()
|
302
321
|
ATTN_Q_NORM = auto()
|
303
322
|
ATTN_K_NORM = auto()
|
304
323
|
LAYER_OUT_NORM = auto()
|
@@ -316,6 +335,7 @@ class MODEL_TENSOR(IntEnum):
|
|
316
335
|
TIME_MIX_LERP_V = auto()
|
317
336
|
TIME_MIX_LERP_R = auto()
|
318
337
|
TIME_MIX_LERP_G = auto()
|
338
|
+
TIME_MIX_LERP_FUSED = auto()
|
319
339
|
TIME_MIX_LERP_W = auto()
|
320
340
|
TIME_MIX_FIRST = auto()
|
321
341
|
TIME_MIX_DECAY = auto()
|
@@ -370,61 +390,82 @@ class MODEL_TENSOR(IntEnum):
|
|
370
390
|
ENC_OUTPUT_NORM = auto()
|
371
391
|
CLS = auto() # classifier
|
372
392
|
CLS_OUT = auto() # classifier output projection
|
393
|
+
CONV1D = auto()
|
394
|
+
CONVNEXT_DW = auto()
|
395
|
+
CONVNEXT_NORM = auto()
|
396
|
+
CONVNEXT_PW1 = auto()
|
397
|
+
CONVNEXT_PW2 = auto()
|
398
|
+
CONVNEXT_GAMMA = auto()
|
399
|
+
POSNET_CONV1 = auto()
|
400
|
+
POSNET_CONV2 = auto()
|
401
|
+
POSNET_NORM = auto()
|
402
|
+
POSNET_NORM1 = auto()
|
403
|
+
POSNET_NORM2 = auto()
|
404
|
+
POSNET_ATTN_NORM = auto()
|
405
|
+
POSNET_ATTN_Q = auto()
|
406
|
+
POSNET_ATTN_K = auto()
|
407
|
+
POSNET_ATTN_V = auto()
|
408
|
+
POSNET_ATTN_OUT = auto()
|
373
409
|
|
374
410
|
|
375
411
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
376
|
-
MODEL_ARCH.LLAMA:
|
377
|
-
MODEL_ARCH.
|
378
|
-
MODEL_ARCH.
|
379
|
-
MODEL_ARCH.
|
380
|
-
MODEL_ARCH.
|
381
|
-
MODEL_ARCH.
|
382
|
-
MODEL_ARCH.
|
383
|
-
MODEL_ARCH.
|
384
|
-
MODEL_ARCH.
|
385
|
-
MODEL_ARCH.
|
386
|
-
MODEL_ARCH.
|
387
|
-
MODEL_ARCH.
|
388
|
-
MODEL_ARCH.
|
389
|
-
MODEL_ARCH.
|
390
|
-
MODEL_ARCH.
|
391
|
-
MODEL_ARCH.
|
392
|
-
MODEL_ARCH.
|
393
|
-
MODEL_ARCH.
|
394
|
-
MODEL_ARCH.
|
395
|
-
MODEL_ARCH.
|
396
|
-
MODEL_ARCH.
|
397
|
-
MODEL_ARCH.
|
398
|
-
MODEL_ARCH.
|
399
|
-
MODEL_ARCH.
|
400
|
-
MODEL_ARCH.
|
401
|
-
MODEL_ARCH.
|
402
|
-
MODEL_ARCH.
|
403
|
-
MODEL_ARCH.
|
404
|
-
MODEL_ARCH.
|
405
|
-
MODEL_ARCH.
|
406
|
-
MODEL_ARCH.
|
407
|
-
MODEL_ARCH.
|
408
|
-
MODEL_ARCH.
|
409
|
-
MODEL_ARCH.
|
410
|
-
MODEL_ARCH.
|
411
|
-
MODEL_ARCH.
|
412
|
-
MODEL_ARCH.
|
413
|
-
MODEL_ARCH.
|
414
|
-
MODEL_ARCH.
|
415
|
-
MODEL_ARCH.
|
416
|
-
MODEL_ARCH.
|
417
|
-
MODEL_ARCH.
|
418
|
-
MODEL_ARCH.
|
419
|
-
MODEL_ARCH.
|
420
|
-
MODEL_ARCH.
|
421
|
-
MODEL_ARCH.
|
422
|
-
MODEL_ARCH.
|
423
|
-
MODEL_ARCH.
|
424
|
-
MODEL_ARCH.
|
425
|
-
MODEL_ARCH.
|
426
|
-
MODEL_ARCH.
|
427
|
-
MODEL_ARCH.
|
412
|
+
MODEL_ARCH.LLAMA: "llama",
|
413
|
+
MODEL_ARCH.DECI: "deci",
|
414
|
+
MODEL_ARCH.FALCON: "falcon",
|
415
|
+
MODEL_ARCH.BAICHUAN: "baichuan",
|
416
|
+
MODEL_ARCH.GROK: "grok",
|
417
|
+
MODEL_ARCH.GPT2: "gpt2",
|
418
|
+
MODEL_ARCH.GPTJ: "gptj",
|
419
|
+
MODEL_ARCH.GPTNEOX: "gptneox",
|
420
|
+
MODEL_ARCH.MPT: "mpt",
|
421
|
+
MODEL_ARCH.STARCODER: "starcoder",
|
422
|
+
MODEL_ARCH.REFACT: "refact",
|
423
|
+
MODEL_ARCH.BERT: "bert",
|
424
|
+
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
425
|
+
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
426
|
+
MODEL_ARCH.BLOOM: "bloom",
|
427
|
+
MODEL_ARCH.STABLELM: "stablelm",
|
428
|
+
MODEL_ARCH.QWEN: "qwen",
|
429
|
+
MODEL_ARCH.QWEN2: "qwen2",
|
430
|
+
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
431
|
+
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
432
|
+
MODEL_ARCH.PHI2: "phi2",
|
433
|
+
MODEL_ARCH.PHI3: "phi3",
|
434
|
+
MODEL_ARCH.PHIMOE: "phimoe",
|
435
|
+
MODEL_ARCH.PLAMO: "plamo",
|
436
|
+
MODEL_ARCH.CODESHELL: "codeshell",
|
437
|
+
MODEL_ARCH.ORION: "orion",
|
438
|
+
MODEL_ARCH.INTERNLM2: "internlm2",
|
439
|
+
MODEL_ARCH.MINICPM: "minicpm",
|
440
|
+
MODEL_ARCH.MINICPM3: "minicpm3",
|
441
|
+
MODEL_ARCH.GEMMA: "gemma",
|
442
|
+
MODEL_ARCH.GEMMA2: "gemma2",
|
443
|
+
MODEL_ARCH.STARCODER2: "starcoder2",
|
444
|
+
MODEL_ARCH.RWKV6: "rwkv6",
|
445
|
+
MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
|
446
|
+
MODEL_ARCH.MAMBA: "mamba",
|
447
|
+
MODEL_ARCH.XVERSE: "xverse",
|
448
|
+
MODEL_ARCH.COMMAND_R: "command-r",
|
449
|
+
MODEL_ARCH.COHERE2: "cohere2",
|
450
|
+
MODEL_ARCH.DBRX: "dbrx",
|
451
|
+
MODEL_ARCH.OLMO: "olmo",
|
452
|
+
MODEL_ARCH.OLMO2: "olmo2",
|
453
|
+
MODEL_ARCH.OLMOE: "olmoe",
|
454
|
+
MODEL_ARCH.OPENELM: "openelm",
|
455
|
+
MODEL_ARCH.ARCTIC: "arctic",
|
456
|
+
MODEL_ARCH.DEEPSEEK: "deepseek",
|
457
|
+
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
458
|
+
MODEL_ARCH.CHATGLM: "chatglm",
|
459
|
+
MODEL_ARCH.BITNET: "bitnet",
|
460
|
+
MODEL_ARCH.T5: "t5",
|
461
|
+
MODEL_ARCH.T5ENCODER: "t5encoder",
|
462
|
+
MODEL_ARCH.JAIS: "jais",
|
463
|
+
MODEL_ARCH.NEMOTRON: "nemotron",
|
464
|
+
MODEL_ARCH.EXAONE: "exaone",
|
465
|
+
MODEL_ARCH.GRANITE: "granite",
|
466
|
+
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
467
|
+
MODEL_ARCH.CHAMELEON: "chameleon",
|
468
|
+
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
428
469
|
}
|
429
470
|
|
430
471
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
@@ -465,6 +506,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
465
506
|
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
466
507
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
467
508
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
509
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
|
468
510
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
469
511
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
470
512
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
@@ -480,6 +522,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
480
522
|
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
481
523
|
MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
|
482
524
|
MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
|
525
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
|
483
526
|
MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
|
484
527
|
MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
|
485
528
|
MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
|
@@ -534,6 +577,22 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
|
534
577
|
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
535
578
|
MODEL_TENSOR.CLS: "cls",
|
536
579
|
MODEL_TENSOR.CLS_OUT: "cls.output",
|
580
|
+
MODEL_TENSOR.CONV1D: "conv1d",
|
581
|
+
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
|
582
|
+
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
|
583
|
+
MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
|
584
|
+
MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
|
585
|
+
MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
|
586
|
+
MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
|
587
|
+
MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
|
588
|
+
MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
|
589
|
+
MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
|
590
|
+
MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
|
591
|
+
MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
|
592
|
+
MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
|
593
|
+
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
594
|
+
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
595
|
+
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
537
596
|
}
|
538
597
|
|
539
598
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
@@ -557,6 +616,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
557
616
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
558
617
|
MODEL_TENSOR.FFN_UP_EXP,
|
559
618
|
],
|
619
|
+
MODEL_ARCH.DECI: [
|
620
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
621
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
622
|
+
MODEL_TENSOR.OUTPUT,
|
623
|
+
MODEL_TENSOR.ROPE_FREQS,
|
624
|
+
MODEL_TENSOR.ATTN_NORM,
|
625
|
+
MODEL_TENSOR.ATTN_Q,
|
626
|
+
MODEL_TENSOR.ATTN_K,
|
627
|
+
MODEL_TENSOR.ATTN_V,
|
628
|
+
MODEL_TENSOR.ATTN_OUT,
|
629
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
630
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
631
|
+
MODEL_TENSOR.FFN_NORM,
|
632
|
+
MODEL_TENSOR.FFN_GATE,
|
633
|
+
MODEL_TENSOR.FFN_DOWN,
|
634
|
+
MODEL_TENSOR.FFN_UP,
|
635
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
636
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
637
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
638
|
+
],
|
560
639
|
MODEL_ARCH.GROK: [
|
561
640
|
MODEL_TENSOR.TOKEN_EMBD,
|
562
641
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -867,6 +946,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
867
946
|
MODEL_TENSOR.FFN_DOWN,
|
868
947
|
MODEL_TENSOR.FFN_UP,
|
869
948
|
],
|
949
|
+
MODEL_ARCH.PHIMOE: [
|
950
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
951
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
952
|
+
MODEL_TENSOR.OUTPUT,
|
953
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
954
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
955
|
+
MODEL_TENSOR.ATTN_NORM,
|
956
|
+
MODEL_TENSOR.ATTN_QKV,
|
957
|
+
MODEL_TENSOR.ATTN_Q,
|
958
|
+
MODEL_TENSOR.ATTN_K,
|
959
|
+
MODEL_TENSOR.ATTN_V,
|
960
|
+
MODEL_TENSOR.ATTN_OUT,
|
961
|
+
MODEL_TENSOR.FFN_NORM,
|
962
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
963
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
964
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
965
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
966
|
+
],
|
870
967
|
MODEL_ARCH.CODESHELL: [
|
871
968
|
MODEL_TENSOR.TOKEN_EMBD,
|
872
969
|
MODEL_TENSOR.POS_EMBD,
|
@@ -980,23 +1077,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
980
1077
|
MODEL_TENSOR.FFN_PRE_NORM,
|
981
1078
|
MODEL_TENSOR.FFN_POST_NORM,
|
982
1079
|
],
|
983
|
-
MODEL_ARCH.GEMMA3: [
|
984
|
-
MODEL_TENSOR.TOKEN_EMBD,
|
985
|
-
MODEL_TENSOR.OUTPUT_NORM,
|
986
|
-
MODEL_TENSOR.ATTN_Q,
|
987
|
-
MODEL_TENSOR.ATTN_Q_NORM,
|
988
|
-
MODEL_TENSOR.ATTN_K,
|
989
|
-
MODEL_TENSOR.ATTN_K_NORM,
|
990
|
-
MODEL_TENSOR.ATTN_V,
|
991
|
-
MODEL_TENSOR.ATTN_OUT,
|
992
|
-
MODEL_TENSOR.FFN_GATE,
|
993
|
-
MODEL_TENSOR.FFN_DOWN,
|
994
|
-
MODEL_TENSOR.FFN_UP,
|
995
|
-
MODEL_TENSOR.ATTN_NORM,
|
996
|
-
MODEL_TENSOR.ATTN_POST_NORM,
|
997
|
-
MODEL_TENSOR.FFN_PRE_NORM,
|
998
|
-
MODEL_TENSOR.FFN_POST_NORM,
|
999
|
-
],
|
1000
1080
|
MODEL_ARCH.STARCODER2: [
|
1001
1081
|
MODEL_TENSOR.TOKEN_EMBD,
|
1002
1082
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1027,6 +1107,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1027
1107
|
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1028
1108
|
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1029
1109
|
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1110
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1030
1111
|
MODEL_TENSOR.TIME_MIX_FIRST,
|
1031
1112
|
MODEL_TENSOR.TIME_MIX_DECAY,
|
1032
1113
|
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
@@ -1043,6 +1124,35 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1043
1124
|
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
|
1044
1125
|
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
1045
1126
|
],
|
1127
|
+
MODEL_ARCH.RWKV6QWEN2: [
|
1128
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1129
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1130
|
+
MODEL_TENSOR.OUTPUT,
|
1131
|
+
MODEL_TENSOR.ATTN_NORM,
|
1132
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1133
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1134
|
+
MODEL_TENSOR.TIME_MIX_LERP_X,
|
1135
|
+
MODEL_TENSOR.TIME_MIX_LERP_K,
|
1136
|
+
MODEL_TENSOR.TIME_MIX_LERP_V,
|
1137
|
+
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1138
|
+
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1139
|
+
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1140
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1141
|
+
MODEL_TENSOR.TIME_MIX_FIRST,
|
1142
|
+
MODEL_TENSOR.TIME_MIX_DECAY,
|
1143
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
1144
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
1145
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1146
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1147
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1148
|
+
MODEL_TENSOR.TIME_MIX_GATE,
|
1149
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1150
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1151
|
+
MODEL_TENSOR.FFN_NORM,
|
1152
|
+
MODEL_TENSOR.FFN_GATE,
|
1153
|
+
MODEL_TENSOR.FFN_DOWN,
|
1154
|
+
MODEL_TENSOR.FFN_UP,
|
1155
|
+
],
|
1046
1156
|
MODEL_ARCH.MAMBA: [
|
1047
1157
|
MODEL_TENSOR.TOKEN_EMBD,
|
1048
1158
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1086,6 +1196,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1086
1196
|
MODEL_TENSOR.ATTN_K_NORM,
|
1087
1197
|
MODEL_TENSOR.ATTN_Q_NORM,
|
1088
1198
|
],
|
1199
|
+
MODEL_ARCH.COHERE2: [
|
1200
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1201
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1202
|
+
MODEL_TENSOR.ATTN_NORM,
|
1203
|
+
MODEL_TENSOR.ATTN_Q,
|
1204
|
+
MODEL_TENSOR.ATTN_K,
|
1205
|
+
MODEL_TENSOR.ATTN_V,
|
1206
|
+
MODEL_TENSOR.ATTN_OUT,
|
1207
|
+
MODEL_TENSOR.FFN_GATE,
|
1208
|
+
MODEL_TENSOR.FFN_DOWN,
|
1209
|
+
MODEL_TENSOR.FFN_UP,
|
1210
|
+
],
|
1089
1211
|
MODEL_ARCH.DBRX: [
|
1090
1212
|
MODEL_TENSOR.TOKEN_EMBD,
|
1091
1213
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1177,6 +1299,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1177
1299
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
1178
1300
|
MODEL_TENSOR.FFN_UP_EXP,
|
1179
1301
|
],
|
1302
|
+
MODEL_ARCH.DEEPSEEK: [
|
1303
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1304
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1305
|
+
MODEL_TENSOR.OUTPUT,
|
1306
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1307
|
+
MODEL_TENSOR.ATTN_NORM,
|
1308
|
+
MODEL_TENSOR.ATTN_Q,
|
1309
|
+
MODEL_TENSOR.ATTN_K,
|
1310
|
+
MODEL_TENSOR.ATTN_V,
|
1311
|
+
MODEL_TENSOR.ATTN_OUT,
|
1312
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1313
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1314
|
+
MODEL_TENSOR.FFN_NORM,
|
1315
|
+
MODEL_TENSOR.FFN_GATE,
|
1316
|
+
MODEL_TENSOR.FFN_DOWN,
|
1317
|
+
MODEL_TENSOR.FFN_UP,
|
1318
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1319
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1320
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1321
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
1322
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
1323
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
1324
|
+
],
|
1180
1325
|
MODEL_ARCH.DEEPSEEK2: [
|
1181
1326
|
MODEL_TENSOR.TOKEN_EMBD,
|
1182
1327
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1203,6 +1348,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1203
1348
|
MODEL_TENSOR.FFN_GATE_SHEXP,
|
1204
1349
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
1205
1350
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
1351
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
1206
1352
|
],
|
1207
1353
|
MODEL_ARCH.CHATGLM : [
|
1208
1354
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -1211,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1211
1357
|
MODEL_TENSOR.OUTPUT,
|
1212
1358
|
MODEL_TENSOR.ATTN_NORM,
|
1213
1359
|
MODEL_TENSOR.ATTN_QKV,
|
1360
|
+
MODEL_TENSOR.ATTN_Q,
|
1361
|
+
MODEL_TENSOR.ATTN_K,
|
1362
|
+
MODEL_TENSOR.ATTN_V,
|
1214
1363
|
MODEL_TENSOR.ATTN_OUT,
|
1215
1364
|
MODEL_TENSOR.FFN_NORM,
|
1216
1365
|
MODEL_TENSOR.FFN_DOWN,
|
@@ -1366,6 +1515,28 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1366
1515
|
MODEL_TENSOR.FFN_DOWN,
|
1367
1516
|
MODEL_TENSOR.FFN_UP,
|
1368
1517
|
],
|
1518
|
+
MODEL_ARCH.WAVTOKENIZER_DEC: [
|
1519
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1520
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1521
|
+
MODEL_TENSOR.CONV1D,
|
1522
|
+
MODEL_TENSOR.CONVNEXT_DW,
|
1523
|
+
MODEL_TENSOR.CONVNEXT_NORM,
|
1524
|
+
MODEL_TENSOR.CONVNEXT_PW1,
|
1525
|
+
MODEL_TENSOR.CONVNEXT_PW2,
|
1526
|
+
MODEL_TENSOR.CONVNEXT_GAMMA,
|
1527
|
+
MODEL_TENSOR.OUTPUT,
|
1528
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1529
|
+
MODEL_TENSOR.POSNET_CONV1,
|
1530
|
+
MODEL_TENSOR.POSNET_CONV2,
|
1531
|
+
MODEL_TENSOR.POSNET_NORM,
|
1532
|
+
MODEL_TENSOR.POSNET_NORM1,
|
1533
|
+
MODEL_TENSOR.POSNET_NORM2,
|
1534
|
+
MODEL_TENSOR.POSNET_ATTN_NORM,
|
1535
|
+
MODEL_TENSOR.POSNET_ATTN_Q,
|
1536
|
+
MODEL_TENSOR.POSNET_ATTN_K,
|
1537
|
+
MODEL_TENSOR.POSNET_ATTN_V,
|
1538
|
+
MODEL_TENSOR.POSNET_ATTN_OUT,
|
1539
|
+
],
|
1369
1540
|
# TODO
|
1370
1541
|
}
|
1371
1542
|
|
@@ -1375,6 +1546,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1375
1546
|
MODEL_TENSOR.ROPE_FREQS,
|
1376
1547
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1377
1548
|
],
|
1549
|
+
MODEL_ARCH.DECI: [
|
1550
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1551
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1552
|
+
],
|
1378
1553
|
MODEL_ARCH.BAICHUAN: [
|
1379
1554
|
MODEL_TENSOR.ROPE_FREQS,
|
1380
1555
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
@@ -1399,6 +1574,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1399
1574
|
MODEL_TENSOR.ROPE_FREQS,
|
1400
1575
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1401
1576
|
],
|
1577
|
+
MODEL_ARCH.DEEPSEEK: [
|
1578
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1579
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1580
|
+
],
|
1402
1581
|
MODEL_ARCH.DEEPSEEK2: [
|
1403
1582
|
MODEL_TENSOR.ROPE_FREQS,
|
1404
1583
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
@@ -1473,6 +1652,11 @@ class GGMLQuantizationType(IntEnum):
|
|
1473
1652
|
TQ2_0 = 35
|
1474
1653
|
|
1475
1654
|
|
1655
|
+
class ExpertGatingFuncType(IntEnum):
|
1656
|
+
SOFTMAX = 1
|
1657
|
+
SIGMOID = 2
|
1658
|
+
|
1659
|
+
|
1476
1660
|
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
1477
1661
|
|
1478
1662
|
|
@@ -1655,7 +1839,6 @@ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
|
1655
1839
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
1656
1840
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
1657
1841
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
1658
|
-
KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
1659
1842
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
1660
1843
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
1661
1844
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|