bigdl-core-cpp 2.6.0b20250319__py3-none-win_amd64.whl → 2.6.0b20250321__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +687 -60
- bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
- bigdl/cpp/convert_lora_to_gguf.py +33 -5
- bigdl/cpp/gguf-py/gguf/constants.py +306 -123
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250319.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/METADATA +2 -2
- bigdl_core_cpp-2.6.0b20250321.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.6.0b20250319.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/WHEEL +1 -1
- bigdl_core_cpp-2.6.0b20250319.dist-info/RECORD +0 -57
- {bigdl_core_cpp-2.6.0b20250319.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250319.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250319.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250319.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/top_level.txt +0 -0
@@ -26,6 +26,7 @@ from .constants import (
|
|
26
26
|
RopeScalingType,
|
27
27
|
PoolingType,
|
28
28
|
TokenType,
|
29
|
+
ExpertGatingFuncType,
|
29
30
|
)
|
30
31
|
|
31
32
|
from .quants import quant_shape_from_byte_shape
|
@@ -631,6 +632,21 @@ class GGUFWriter:
|
|
631
632
|
def add_embedding_length(self, length: int) -> None:
|
632
633
|
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
633
634
|
|
635
|
+
def add_features_length(self, length: int) -> None:
|
636
|
+
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
|
637
|
+
|
638
|
+
def add_posnet_embedding_length(self, length: int) -> None:
|
639
|
+
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
640
|
+
|
641
|
+
def add_posnet_block_count(self, length: int) -> None:
|
642
|
+
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
|
643
|
+
|
644
|
+
def add_convnext_embedding_length(self, length: int) -> None:
|
645
|
+
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
646
|
+
|
647
|
+
def add_convnext_block_count(self, length: int) -> None:
|
648
|
+
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
649
|
+
|
634
650
|
def add_block_count(self, length: int) -> None:
|
635
651
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
636
652
|
|
@@ -700,6 +716,12 @@ class GGUFWriter:
|
|
700
716
|
def add_expert_weights_scale(self, value: float) -> None:
|
701
717
|
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
|
702
718
|
|
719
|
+
def add_expert_weights_norm(self, value: bool) -> None:
|
720
|
+
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
|
721
|
+
|
722
|
+
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
723
|
+
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
724
|
+
|
703
725
|
def add_swin_norm(self, value: bool) -> None:
|
704
726
|
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
705
727
|
|
@@ -721,12 +743,21 @@ class GGUFWriter:
|
|
721
743
|
def add_wkv_head_size(self, size: int) -> None:
|
722
744
|
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
723
745
|
|
746
|
+
def add_token_shift_count(self, count: int) -> None:
|
747
|
+
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
|
748
|
+
|
724
749
|
def add_layer_norm_eps(self, value: float) -> None:
|
725
750
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
726
751
|
|
727
752
|
def add_layer_norm_rms_eps(self, value: float) -> None:
|
728
753
|
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
729
754
|
|
755
|
+
def add_group_norm_eps(self, value: float) -> None:
|
756
|
+
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
|
757
|
+
|
758
|
+
def add_group_norm_groups(self, value: int) -> None:
|
759
|
+
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
|
760
|
+
|
730
761
|
def add_causal_attention(self, value: bool) -> None:
|
731
762
|
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
732
763
|
|
@@ -826,9 +857,6 @@ class GGUFWriter:
|
|
826
857
|
def add_pad_token_id(self, id: int) -> None:
|
827
858
|
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
828
859
|
|
829
|
-
def add_cls_token_id(self, id: int) -> None:
|
830
|
-
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
|
831
|
-
|
832
860
|
def add_mask_token_id(self, id: int) -> None:
|
833
861
|
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
834
862
|
|
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
|
-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
|
17
17
|
"tok_embeddings", # llama-pth
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
@@ -42,6 +42,7 @@ class TensorNameMap:
|
|
42
42
|
"emb_ln", # nomic-bert
|
43
43
|
"transformer.norm", # openelm
|
44
44
|
"rwkv.blocks.0.pre_ln", # rwkv
|
45
|
+
"backbone.norm", # wavtokenizer
|
45
46
|
),
|
46
47
|
|
47
48
|
# Position embeddings
|
@@ -54,19 +55,20 @@ class TensorNameMap:
|
|
54
55
|
# Output
|
55
56
|
MODEL_TENSOR.OUTPUT: (
|
56
57
|
"embed_out", # gptneox
|
57
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
|
58
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
|
58
59
|
"output", # llama-pth bloom internlm2
|
59
60
|
"word_embeddings_for_head", # persimmon
|
60
61
|
"lm_head.linear", # phi2
|
61
62
|
"output_layer", # chatglm
|
62
63
|
"head", # rwkv
|
64
|
+
"head.out", # wavtokenizer
|
63
65
|
),
|
64
66
|
|
65
67
|
# Output norm
|
66
68
|
MODEL_TENSOR.OUTPUT_NORM: (
|
67
69
|
"gpt_neox.final_layer_norm", # gptneox
|
68
70
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
69
|
-
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2
|
71
|
+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
|
70
72
|
"norm", # llama-pth
|
71
73
|
"transformer.norm_f", # mpt dbrx
|
72
74
|
"ln_f", # refact bloom qwen gpt2
|
@@ -80,6 +82,7 @@ class TensorNameMap:
|
|
80
82
|
"transformer.norm", # openelm
|
81
83
|
"model.norm", # nemotron
|
82
84
|
"rwkv.ln_out", # rwkv
|
85
|
+
"backbone.final_layer_norm", # wavtokenizer
|
83
86
|
),
|
84
87
|
|
85
88
|
# Rope frequencies
|
@@ -90,6 +93,13 @@ class TensorNameMap:
|
|
90
93
|
|
91
94
|
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
92
95
|
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
96
|
+
|
97
|
+
MODEL_TENSOR.CONV1D: (
|
98
|
+
"backbone.embed", # roberta
|
99
|
+
),
|
100
|
+
|
101
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
102
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
93
103
|
}
|
94
104
|
|
95
105
|
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
@@ -101,7 +111,7 @@ class TensorNameMap:
|
|
101
111
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
102
112
|
"h.{bid}.input_layernorm", # bloom
|
103
113
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
104
|
-
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe
|
114
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
105
115
|
"layers.{bid}.attention_norm", # llama-pth
|
106
116
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
107
117
|
"model.layers.{bid}.ln1", # yi
|
@@ -145,7 +155,7 @@ class TensorNameMap:
|
|
145
155
|
|
146
156
|
# Attention query
|
147
157
|
MODEL_TENSOR.ATTN_Q: (
|
148
|
-
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
|
158
|
+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
149
159
|
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
150
160
|
"layers.{bid}.attention.wq", # llama-pth
|
151
161
|
"encoder.layer.{bid}.attention.self.query", # bert
|
@@ -158,7 +168,7 @@ class TensorNameMap:
|
|
158
168
|
|
159
169
|
# Attention key
|
160
170
|
MODEL_TENSOR.ATTN_K: (
|
161
|
-
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
|
171
|
+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
162
172
|
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
163
173
|
"layers.{bid}.attention.wk", # llama-pth
|
164
174
|
"encoder.layer.{bid}.attention.self.key", # bert
|
@@ -172,7 +182,7 @@ class TensorNameMap:
|
|
172
182
|
|
173
183
|
# Attention value
|
174
184
|
MODEL_TENSOR.ATTN_V: (
|
175
|
-
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
|
185
|
+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
176
186
|
"layers.{bid}.attention.wv", # llama-pth
|
177
187
|
"encoder.layer.{bid}.attention.self.value", # bert
|
178
188
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
@@ -190,7 +200,8 @@ class TensorNameMap:
|
|
190
200
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
191
201
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
192
202
|
"h.{bid}.self_attention.dense", # bloom
|
193
|
-
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
|
203
|
+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
204
|
+
"model.layers.{bid}.self_attn.linear_attn", # deci
|
194
205
|
"layers.{bid}.attention.wo", # llama-pth
|
195
206
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
196
207
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
@@ -234,7 +245,7 @@ class TensorNameMap:
|
|
234
245
|
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
235
246
|
"h.{bid}.post_attention_layernorm", # bloom
|
236
247
|
"transformer.blocks.{bid}.norm_2", # mpt
|
237
|
-
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe
|
248
|
+
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
|
238
249
|
"layers.{bid}.ffn_norm", # llama-pth
|
239
250
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
240
251
|
"model.layers.{bid}.ln2", # yi
|
@@ -257,7 +268,7 @@ class TensorNameMap:
|
|
257
268
|
|
258
269
|
MODEL_TENSOR.FFN_GATE_INP: (
|
259
270
|
"layers.{bid}.feed_forward.gate", # mixtral
|
260
|
-
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
271
|
+
"model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
|
261
272
|
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
262
273
|
"transformer.decoder_layer.{bid}.router", # Grok
|
263
274
|
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
@@ -268,6 +279,10 @@ class TensorNameMap:
|
|
268
279
|
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
269
280
|
),
|
270
281
|
|
282
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
283
|
+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
|
284
|
+
),
|
285
|
+
|
271
286
|
# Feed-forward up
|
272
287
|
MODEL_TENSOR.FFN_UP: (
|
273
288
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
@@ -298,15 +313,16 @@ class TensorNameMap:
|
|
298
313
|
),
|
299
314
|
|
300
315
|
MODEL_TENSOR.FFN_UP_EXP: (
|
301
|
-
"layers.{bid}.feed_forward.experts.w3",
|
302
|
-
"transformer.decoder_layer.{bid}.moe.linear_v",
|
303
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.v1",
|
304
|
-
"model.layers.{bid}.mlp.experts.up_proj",
|
316
|
+
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
317
|
+
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
318
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
319
|
+
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
320
|
+
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
305
321
|
),
|
306
322
|
|
307
323
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
308
324
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
309
|
-
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
325
|
+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
310
326
|
),
|
311
327
|
|
312
328
|
# AWQ-activation gate
|
@@ -330,15 +346,16 @@ class TensorNameMap:
|
|
330
346
|
),
|
331
347
|
|
332
348
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
333
|
-
"layers.{bid}.feed_forward.experts.w1",
|
334
|
-
"transformer.decoder_layer.{bid}.moe.linear",
|
335
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w1",
|
336
|
-
"model.layers.{bid}.mlp.experts.gate_proj",
|
349
|
+
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
350
|
+
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
351
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
352
|
+
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
353
|
+
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
337
354
|
),
|
338
355
|
|
339
356
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
340
357
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
341
|
-
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
358
|
+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
342
359
|
),
|
343
360
|
|
344
361
|
# Feed-forward down
|
@@ -375,11 +392,12 @@ class TensorNameMap:
|
|
375
392
|
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
376
393
|
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
377
394
|
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
395
|
+
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
378
396
|
),
|
379
397
|
|
380
398
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
381
399
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
382
|
-
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
400
|
+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
383
401
|
),
|
384
402
|
|
385
403
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
@@ -449,34 +467,42 @@ class TensorNameMap:
|
|
449
467
|
|
450
468
|
MODEL_TENSOR.TIME_MIX_W1: (
|
451
469
|
"rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
|
470
|
+
"model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
|
452
471
|
),
|
453
472
|
|
454
473
|
MODEL_TENSOR.TIME_MIX_W2: (
|
455
474
|
"rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
|
475
|
+
"model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
|
456
476
|
),
|
457
477
|
|
458
478
|
MODEL_TENSOR.TIME_MIX_LERP_X: (
|
459
479
|
"rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
|
480
|
+
"model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
|
460
481
|
),
|
461
482
|
|
462
483
|
MODEL_TENSOR.TIME_MIX_LERP_K: (
|
463
484
|
"rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
|
485
|
+
"model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
|
464
486
|
),
|
465
487
|
|
466
488
|
MODEL_TENSOR.TIME_MIX_LERP_V: (
|
467
489
|
"rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
|
490
|
+
"model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
|
468
491
|
),
|
469
492
|
|
470
493
|
MODEL_TENSOR.TIME_MIX_LERP_R: (
|
471
494
|
"rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
|
495
|
+
"model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
|
472
496
|
),
|
473
497
|
|
474
498
|
MODEL_TENSOR.TIME_MIX_LERP_G: (
|
475
499
|
"rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
|
500
|
+
"model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
|
476
501
|
),
|
477
502
|
|
478
503
|
MODEL_TENSOR.TIME_MIX_LERP_W: (
|
479
504
|
"rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
|
505
|
+
"model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
|
480
506
|
),
|
481
507
|
|
482
508
|
MODEL_TENSOR.TIME_MIX_FIRST: (
|
@@ -485,30 +511,37 @@ class TensorNameMap:
|
|
485
511
|
|
486
512
|
MODEL_TENSOR.TIME_MIX_DECAY: (
|
487
513
|
"rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
|
514
|
+
"model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
|
488
515
|
),
|
489
516
|
|
490
517
|
MODEL_TENSOR.TIME_MIX_DECAY_W1: (
|
491
518
|
"rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
|
519
|
+
"model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
|
492
520
|
),
|
493
521
|
|
494
522
|
MODEL_TENSOR.TIME_MIX_DECAY_W2: (
|
495
523
|
"rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
|
524
|
+
"model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
|
496
525
|
),
|
497
526
|
|
498
527
|
MODEL_TENSOR.TIME_MIX_KEY: (
|
499
|
-
"rwkv.blocks.{bid}.attention.key",
|
528
|
+
"rwkv.blocks.{bid}.attention.key", # rwkv
|
529
|
+
"model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
|
500
530
|
),
|
501
531
|
|
502
532
|
MODEL_TENSOR.TIME_MIX_VALUE: (
|
503
|
-
"rwkv.blocks.{bid}.attention.value",
|
533
|
+
"rwkv.blocks.{bid}.attention.value", # rwkv
|
534
|
+
"model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
|
504
535
|
),
|
505
536
|
|
506
537
|
MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
|
507
538
|
"rwkv.blocks.{bid}.attention.receptance", # rwkv
|
539
|
+
"model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
|
508
540
|
),
|
509
541
|
|
510
542
|
MODEL_TENSOR.TIME_MIX_GATE: (
|
511
|
-
"rwkv.blocks.{bid}.attention.gate",
|
543
|
+
"rwkv.blocks.{bid}.attention.gate", # rwkv
|
544
|
+
"model.layers.{bid}.self_attn.gate", # rwkv6qwen2
|
512
545
|
),
|
513
546
|
|
514
547
|
MODEL_TENSOR.TIME_MIX_LN: (
|
@@ -516,7 +549,8 @@ class TensorNameMap:
|
|
516
549
|
),
|
517
550
|
|
518
551
|
MODEL_TENSOR.TIME_MIX_OUTPUT: (
|
519
|
-
"rwkv.blocks.{bid}.attention.output",
|
552
|
+
"rwkv.blocks.{bid}.attention.output", # rwkv
|
553
|
+
"model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
|
520
554
|
),
|
521
555
|
|
522
556
|
MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
|
@@ -681,6 +715,8 @@ class TensorNameMap:
|
|
681
715
|
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
682
716
|
),
|
683
717
|
|
718
|
+
############################################################################
|
719
|
+
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
684
720
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
685
721
|
"encoder.final_layer_norm", # t5
|
686
722
|
),
|
@@ -693,6 +729,67 @@ class TensorNameMap:
|
|
693
729
|
MODEL_TENSOR.CLS_OUT: (
|
694
730
|
"classifier.out_proj", # roberta
|
695
731
|
),
|
732
|
+
#############################################################################
|
733
|
+
|
734
|
+
MODEL_TENSOR.CONVNEXT_DW: (
|
735
|
+
"backbone.convnext.{bid}.dwconv", # wavtokenizer
|
736
|
+
),
|
737
|
+
|
738
|
+
MODEL_TENSOR.CONVNEXT_NORM: (
|
739
|
+
"backbone.convnext.{bid}.norm", # wavtokenizer
|
740
|
+
),
|
741
|
+
|
742
|
+
MODEL_TENSOR.CONVNEXT_PW1: (
|
743
|
+
"backbone.convnext.{bid}.pwconv1", # wavtokenizer
|
744
|
+
),
|
745
|
+
|
746
|
+
MODEL_TENSOR.CONVNEXT_PW2: (
|
747
|
+
"backbone.convnext.{bid}.pwconv2", # wavtokenizer
|
748
|
+
),
|
749
|
+
|
750
|
+
MODEL_TENSOR.CONVNEXT_GAMMA: (
|
751
|
+
"backbone.convnext.{bid}.gamma", # wavtokenizer
|
752
|
+
),
|
753
|
+
|
754
|
+
MODEL_TENSOR.POSNET_CONV1: (
|
755
|
+
"backbone.posnet.{bid}.conv1", # wavtokenizer
|
756
|
+
),
|
757
|
+
|
758
|
+
MODEL_TENSOR.POSNET_CONV2: (
|
759
|
+
"backbone.posnet.{bid}.conv2", # wavtokenizer
|
760
|
+
),
|
761
|
+
|
762
|
+
MODEL_TENSOR.POSNET_NORM: (
|
763
|
+
"backbone.posnet.{bid}.norm", # wavtokenizer
|
764
|
+
),
|
765
|
+
|
766
|
+
MODEL_TENSOR.POSNET_NORM1: (
|
767
|
+
"backbone.posnet.{bid}.norm1", # wavtokenizer
|
768
|
+
),
|
769
|
+
|
770
|
+
MODEL_TENSOR.POSNET_NORM2: (
|
771
|
+
"backbone.posnet.{bid}.norm2", # wavtokenizer
|
772
|
+
),
|
773
|
+
|
774
|
+
MODEL_TENSOR.POSNET_ATTN_NORM: (
|
775
|
+
"backbone.posnet.{bid}.norm", # wavtokenizer
|
776
|
+
),
|
777
|
+
|
778
|
+
MODEL_TENSOR.POSNET_ATTN_Q: (
|
779
|
+
"backbone.posnet.{bid}.q", # wavtokenizer
|
780
|
+
),
|
781
|
+
|
782
|
+
MODEL_TENSOR.POSNET_ATTN_K: (
|
783
|
+
"backbone.posnet.{bid}.k", # wavtokenizer
|
784
|
+
),
|
785
|
+
|
786
|
+
MODEL_TENSOR.POSNET_ATTN_V: (
|
787
|
+
"backbone.posnet.{bid}.v", # wavtokenizer
|
788
|
+
),
|
789
|
+
|
790
|
+
MODEL_TENSOR.POSNET_ATTN_OUT: (
|
791
|
+
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
792
|
+
),
|
696
793
|
}
|
697
794
|
|
698
795
|
# architecture-specific block mappings
|
@@ -47,7 +47,7 @@ def size_label(total_params: int, shared_params: int, expert_params: int, expert
|
|
47
47
|
|
48
48
|
|
49
49
|
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
50
|
-
# Reference: https://github.com/
|
50
|
+
# Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
51
51
|
|
52
52
|
if base_name is not None:
|
53
53
|
name = base_name.strip().replace(' ', '-').replace('/', '-')
|
bigdl/cpp/gguf-py/gguf/vocab.py
CHANGED
@@ -127,7 +127,7 @@ class SpecialVocab:
|
|
127
127
|
self.merges = merges
|
128
128
|
elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
|
129
129
|
# New format since transformers 4.45 to support spaces in merges
|
130
|
-
# ref: https://github.com/
|
130
|
+
# ref: https://github.com/ggml-org/llama.cpp/issues/9692
|
131
131
|
# TODO: internally store as the new format instead of converting to old
|
132
132
|
if any(' ' in s for pair in merges for s in pair):
|
133
133
|
logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
bigdl/cpp/libs/ggml-base.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ggml-cpu.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ggml-sycl.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ggml.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llama-batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-cli.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-gguf.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-lookup.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-server.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-simple.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ollama-lib.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama_ggml.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ollama_llama.dll
CHANGED
Binary file
|
Binary file
|
@@ -0,0 +1,57 @@
|
|
1
|
+
bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
bigdl/cpp/convert_hf_to_gguf.py,sha256=GB6mGc_deGraPhQfUgU8i33odUb6WfMw0vVPcgZ_-ow,240529
|
3
|
+
bigdl/cpp/convert_hf_to_gguf_update.py,sha256=1BFKEkj0BMDB90lUB5p_-iR9rSVcjgYPGWmEw28avB8,17721
|
4
|
+
bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
|
5
|
+
bigdl/cpp/convert_lora_to_gguf.py,sha256=sHrcutdgzrDR5H7ZiLPOLoMnkJKg8uZ7OcFhAZhPrLo,19073
|
6
|
+
bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
|
8
|
+
bigdl/cpp/gguf-py/gguf/constants.py,sha256=CJ0LigNqlnEqYP8IhnJsKcst9fIm-huE4RccvkTYUbg,69188
|
9
|
+
bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
|
10
|
+
bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=PUrx08ZwaUOz1gLw5JQ459Hi7JIeCdlHgZX7wXcTqbI,12702
|
11
|
+
bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=pFgnwrsDupKxI3SHNQbfiuz7dUopCOqj3ERBPuZMkMo,39955
|
12
|
+
bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
|
13
|
+
bigdl/cpp/gguf-py/gguf/metadata.py,sha256=oBTb4DXi_h1L_gYm8x_JRVuEPR4GHlVHuM-iN0OxWoY,33244
|
14
|
+
bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
|
16
|
+
bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=w1JZkRIKHj2tKYADLDUvCOsQfVf8y7Y0ZwqmtmrtLBA,39549
|
17
|
+
bigdl/cpp/gguf-py/gguf/utility.py,sha256=Mx4mqamXtatL15LCH04mG-7SNBwPzP2T75ts0uBnEuI,3002
|
18
|
+
bigdl/cpp/gguf-py/gguf/vocab.py,sha256=QTdt4HZrn7onHqm2tpHTaEq2sL3yG07zbHqQR9iVfu8,20815
|
19
|
+
bigdl/cpp/libs/common.lib,sha256=Vo5qx0PPK6ZyT0Z3TUoVqPhxI-A-zZR7IHWSeLYTIkM,8642856
|
20
|
+
bigdl/cpp/libs/ggml-base.dll,sha256=IUkUVt54mNJzwdTXmwvyJ_nQLOqmWrUpIrpRCg7VPFE,577024
|
21
|
+
bigdl/cpp/libs/ggml-cpu.dll,sha256=BTX-iKt3_8YgJcpRl0pXDrES2ZYTEy9ph6i4mMYwLqQ,1038336
|
22
|
+
bigdl/cpp/libs/ggml-sycl.dll,sha256=aqQdnqwEY-JlKm4Dny8nhy3OgSySYaiDWGSXwIp2SHc,5560832
|
23
|
+
bigdl/cpp/libs/ggml.dll,sha256=XjdK5jlGNhAxr7_IGiTWkvi4Z06ciDqiL5wbW9ZRuVc,118272
|
24
|
+
bigdl/cpp/libs/libc++.dll,sha256=U0TVK2WfFQIJPP6Bz9SeJmgskm2iqZWJorx_DGdfKIw,1561600
|
25
|
+
bigdl/cpp/libs/llama-batched.exe,sha256=v27oJ9gdolpQArt7Ih8m3qXCrIcEhWmgJrbVoxwpy0Q,1741824
|
26
|
+
bigdl/cpp/libs/llama-bench.exe,sha256=4aZtBZ2Bs-Q5kXVTd86WrE1uF3LuJCWfGaJcfHTkAH8,279552
|
27
|
+
bigdl/cpp/libs/llama-cli.exe,sha256=Sa6UWO5May8Ub1tgRiLI3gBJ1ayJbiZwQtshD9Ckdu0,1812480
|
28
|
+
bigdl/cpp/libs/llama-embedding.exe,sha256=WGqbovh_2njmpN1OE-5FU5yH-6D8hRQQ_cDKj_GlXlQ,1765376
|
29
|
+
bigdl/cpp/libs/llama-gemma3-cli.exe,sha256=94z47LKR7CrVSZ2w_tzuHJ66hJhw7v6BWrvbZIS1r_U,2033664
|
30
|
+
bigdl/cpp/libs/llama-gguf.exe,sha256=DlBsPv7pL0Rsp5hafby51gxRnXmzsti_569wySHAQ-Q,59392
|
31
|
+
bigdl/cpp/libs/llama-llava-cli.exe,sha256=DyoTXM5HOGMQANjfZbsYk50EsxZK5sBGVDuTfYX6Adg,2019840
|
32
|
+
bigdl/cpp/libs/llama-lookup.exe,sha256=lievf9j2yNC5uujL0oMmrpBj7J14PZWwi2XT_DX6_3U,1801216
|
33
|
+
bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=ZNfBEekcD4AiaNPkqafa1pzfo8WTp0DpS7I6Mw-RAwo,10240
|
34
|
+
bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=sozeVIjDxfa4NssDmTPBOO-6vgK1nJ-Bs5HDQp7QBE8,2017792
|
35
|
+
bigdl/cpp/libs/llama-perplexity.exe,sha256=vPzXHFomoFPgtLznM1fw1JQKDv_4q4bSqUuBb1qKEpI,1886208
|
36
|
+
bigdl/cpp/libs/llama-quantize.exe,sha256=CV3wKKGunBiBy6E8fStqXeFefjAnBmJUmXGwbMPyPb0,122880
|
37
|
+
bigdl/cpp/libs/llama-server.exe,sha256=haDGFy0qNNR_WUys-9CaDTB3RJwjvrBkCKBlhuewRBU,4130304
|
38
|
+
bigdl/cpp/libs/llama-simple.exe,sha256=Wfq0JYyVA4itZXPKs20UAOs0z-Q59SWOwpSlRgUWUw0,62464
|
39
|
+
bigdl/cpp/libs/llama-speculative.exe,sha256=rQG1WQizxNX0oj1nzvxxn0k3DCkkOkk0flLgw0_B2fs,1803776
|
40
|
+
bigdl/cpp/libs/llama-tokenize.exe,sha256=LA0ZLlq2Akt_CXTz8lSXw6-S9aCUnBm8RhEubpVZaz8,89088
|
41
|
+
bigdl/cpp/libs/llama.dll,sha256=W07-VmnRG1Hm7NkUdA9lFjrW9Wz81pQlKfZWF8yugRI,1470464
|
42
|
+
bigdl/cpp/libs/llava_shared.dll,sha256=IY0l5XnYnL014k398AHe33wTqwuBhKc1wTgn_uMeciA,380416
|
43
|
+
bigdl/cpp/libs/ollama-ggml-base.dll,sha256=ma9wJKTWKvr16EAlGJPwuNJN-dwUCx-gYPAacF0HO5U,459776
|
44
|
+
bigdl/cpp/libs/ollama-ggml-cpu.dll,sha256=lW9Ssioxrjm60ii9-LiLBoYJiFCpva8naloJpeGrjnI,477184
|
45
|
+
bigdl/cpp/libs/ollama-ggml-sycl.dll,sha256=I-Ksd6w8OGivjKsPwdNA0u7sGzPvyFwu1W5OvJi2IcA,5326336
|
46
|
+
bigdl/cpp/libs/ollama-lib.exe,sha256=LAaaV9voEPaeGBSSLsF0Eb9T0_zibH4SIe7NBakYses,25916416
|
47
|
+
bigdl/cpp/libs/ollama.exe,sha256=GceJtJEhtrlNDAY9n-7FMyWiLoHP6UqgwOrrM7RdUIk,207360
|
48
|
+
bigdl/cpp/libs/ollama_ggml.dll,sha256=X8XsTQd6Uc-LgMOzjNvGpnemqevACWoTVYdx7ZK6Zbc,113152
|
49
|
+
bigdl/cpp/libs/ollama_llama.dll,sha256=XKb_ypeqhlL9_lP_AcxNR6Z8xdQcr-3Inr6l-CzX1o4,1421312
|
50
|
+
bigdl/cpp/libs/ollama_llava_shared.dll,sha256=LtLN0l-jXVF8BNSH8XZAaAuIZXQWrc9cssBlPnYCixQ,398336
|
51
|
+
bigdl_core_cpp-2.6.0b20250321.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
|
52
|
+
bigdl_core_cpp-2.6.0b20250321.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
|
53
|
+
bigdl_core_cpp-2.6.0b20250321.data/scripts/init-ollama.bat,sha256=0I2iBOFv3kR9hvEySGMRUU52-qwVhE7oRZnyWz-2z_U,657
|
54
|
+
bigdl_core_cpp-2.6.0b20250321.dist-info/METADATA,sha256=9I4TcjPb4JO3gGlDHDAXgFbUhCRqhuZp9iOyVFX2Apo,750
|
55
|
+
bigdl_core_cpp-2.6.0b20250321.dist-info/WHEEL,sha256=pUQ3YzM9z7CMLK4Pdg7RxRLrm1NUy0aQs4ESywX3iFk,97
|
56
|
+
bigdl_core_cpp-2.6.0b20250321.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
|
57
|
+
bigdl_core_cpp-2.6.0b20250321.dist-info/RECORD,,
|