bigdl-core-cpp 2.6.0b20250320__py3-none-win_amd64.whl → 2.6.0b20250321__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +687 -60
  2. bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
  3. bigdl/cpp/convert_lora_to_gguf.py +33 -5
  4. bigdl/cpp/gguf-py/gguf/constants.py +306 -123
  5. bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
  6. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
  7. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  8. bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
  9. bigdl/cpp/libs/common.lib +0 -0
  10. bigdl/cpp/libs/ggml-base.dll +0 -0
  11. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  12. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  13. bigdl/cpp/libs/ggml.dll +0 -0
  14. bigdl/cpp/libs/llama-batched.exe +0 -0
  15. bigdl/cpp/libs/llama-bench.exe +0 -0
  16. bigdl/cpp/libs/llama-cli.exe +0 -0
  17. bigdl/cpp/libs/llama-embedding.exe +0 -0
  18. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  19. bigdl/cpp/libs/llama-gguf.exe +0 -0
  20. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  21. bigdl/cpp/libs/llama-lookup.exe +0 -0
  22. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  23. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  25. bigdl/cpp/libs/llama-quantize.exe +0 -0
  26. bigdl/cpp/libs/llama-server.exe +0 -0
  27. bigdl/cpp/libs/llama-simple.exe +0 -0
  28. bigdl/cpp/libs/llama-speculative.exe +0 -0
  29. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  30. bigdl/cpp/libs/llama.dll +0 -0
  31. bigdl/cpp/libs/llava_shared.dll +0 -0
  32. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  33. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  34. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  35. bigdl/cpp/libs/ollama-lib.exe +0 -0
  36. bigdl/cpp/libs/ollama.exe +0 -0
  37. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  38. bigdl/cpp/libs/ollama_llama.dll +0 -0
  39. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  40. {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/METADATA +1 -1
  41. bigdl_core_cpp-2.6.0b20250321.dist-info/RECORD +57 -0
  42. {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/WHEEL +1 -1
  43. bigdl_core_cpp-2.6.0b20250320.dist-info/RECORD +0 -57
  44. {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-llama-cpp.bat +0 -0
  45. {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-llama-cpp.ps1 +0 -0
  46. {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250321.data}/scripts/init-ollama.bat +0 -0
  47. {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250321.dist-info}/top_level.txt +0 -0
@@ -26,6 +26,7 @@ from .constants import (
26
26
  RopeScalingType,
27
27
  PoolingType,
28
28
  TokenType,
29
+ ExpertGatingFuncType,
29
30
  )
30
31
 
31
32
  from .quants import quant_shape_from_byte_shape
@@ -631,6 +632,21 @@ class GGUFWriter:
631
632
  def add_embedding_length(self, length: int) -> None:
632
633
  self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
633
634
 
635
+ def add_features_length(self, length: int) -> None:
636
+ self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
637
+
638
+ def add_posnet_embedding_length(self, length: int) -> None:
639
+ self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
640
+
641
+ def add_posnet_block_count(self, length: int) -> None:
642
+ self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
643
+
644
+ def add_convnext_embedding_length(self, length: int) -> None:
645
+ self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
646
+
647
+ def add_convnext_block_count(self, length: int) -> None:
648
+ self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
649
+
634
650
  def add_block_count(self, length: int) -> None:
635
651
  self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
636
652
 
@@ -700,6 +716,12 @@ class GGUFWriter:
700
716
  def add_expert_weights_scale(self, value: float) -> None:
701
717
  self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
702
718
 
719
+ def add_expert_weights_norm(self, value: bool) -> None:
720
+ self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
721
+
722
+ def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
723
+ self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
724
+
703
725
  def add_swin_norm(self, value: bool) -> None:
704
726
  self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
705
727
 
@@ -721,12 +743,21 @@ class GGUFWriter:
721
743
  def add_wkv_head_size(self, size: int) -> None:
722
744
  self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
723
745
 
746
+ def add_token_shift_count(self, count: int) -> None:
747
+ self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
748
+
724
749
  def add_layer_norm_eps(self, value: float) -> None:
725
750
  self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
726
751
 
727
752
  def add_layer_norm_rms_eps(self, value: float) -> None:
728
753
  self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
729
754
 
755
+ def add_group_norm_eps(self, value: float) -> None:
756
+ self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
757
+
758
+ def add_group_norm_groups(self, value: int) -> None:
759
+ self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
760
+
730
761
  def add_causal_attention(self, value: bool) -> None:
731
762
  self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
732
763
 
@@ -826,9 +857,6 @@ class GGUFWriter:
826
857
  def add_pad_token_id(self, id: int) -> None:
827
858
  self.add_uint32(Keys.Tokenizer.PAD_ID, id)
828
859
 
829
- def add_cls_token_id(self, id: int) -> None:
830
- self.add_uint32(Keys.Tokenizer.CLS_ID, id)
831
-
832
860
  def add_mask_token_id(self, id: int) -> None:
833
861
  self.add_uint32(Keys.Tokenizer.MASK_ID, id)
834
862
 
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron olmoe olmo2
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -42,6 +42,7 @@ class TensorNameMap:
42
42
  "emb_ln", # nomic-bert
43
43
  "transformer.norm", # openelm
44
44
  "rwkv.blocks.0.pre_ln", # rwkv
45
+ "backbone.norm", # wavtokenizer
45
46
  ),
46
47
 
47
48
  # Position embeddings
@@ -54,19 +55,20 @@ class TensorNameMap:
54
55
  # Output
55
56
  MODEL_TENSOR.OUTPUT: (
56
57
  "embed_out", # gptneox
57
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
58
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
58
59
  "output", # llama-pth bloom internlm2
59
60
  "word_embeddings_for_head", # persimmon
60
61
  "lm_head.linear", # phi2
61
62
  "output_layer", # chatglm
62
63
  "head", # rwkv
64
+ "head.out", # wavtokenizer
63
65
  ),
64
66
 
65
67
  # Output norm
66
68
  MODEL_TENSOR.OUTPUT_NORM: (
67
69
  "gpt_neox.final_layer_norm", # gptneox
68
70
  "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
69
- "model.norm", # llama-hf baichuan internlm2 olmoe olmo2
71
+ "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
70
72
  "norm", # llama-pth
71
73
  "transformer.norm_f", # mpt dbrx
72
74
  "ln_f", # refact bloom qwen gpt2
@@ -80,6 +82,7 @@ class TensorNameMap:
80
82
  "transformer.norm", # openelm
81
83
  "model.norm", # nemotron
82
84
  "rwkv.ln_out", # rwkv
85
+ "backbone.final_layer_norm", # wavtokenizer
83
86
  ),
84
87
 
85
88
  # Rope frequencies
@@ -90,6 +93,13 @@ class TensorNameMap:
90
93
 
91
94
  MODEL_TENSOR.ROPE_FACTORS_LONG: (),
92
95
  MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
96
+
97
+ MODEL_TENSOR.CONV1D: (
98
+ "backbone.embed", # roberta
99
+ ),
100
+
101
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
102
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
93
103
  }
94
104
 
95
105
  block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
@@ -101,7 +111,7 @@ class TensorNameMap:
101
111
  "transformer.h.{bid}.input_layernorm", # falcon7b
102
112
  "h.{bid}.input_layernorm", # bloom
103
113
  "transformer.h.{bid}.ln_mlp", # falcon40b
104
- "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe
114
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
105
115
  "layers.{bid}.attention_norm", # llama-pth
106
116
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
107
117
  "model.layers.{bid}.ln1", # yi
@@ -145,7 +155,7 @@ class TensorNameMap:
145
155
 
146
156
  # Attention query
147
157
  MODEL_TENSOR.ATTN_Q: (
148
- "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
158
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
149
159
  "model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
150
160
  "layers.{bid}.attention.wq", # llama-pth
151
161
  "encoder.layer.{bid}.attention.self.query", # bert
@@ -158,7 +168,7 @@ class TensorNameMap:
158
168
 
159
169
  # Attention key
160
170
  MODEL_TENSOR.ATTN_K: (
161
- "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
171
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
162
172
  "model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
163
173
  "layers.{bid}.attention.wk", # llama-pth
164
174
  "encoder.layer.{bid}.attention.self.key", # bert
@@ -172,7 +182,7 @@ class TensorNameMap:
172
182
 
173
183
  # Attention value
174
184
  MODEL_TENSOR.ATTN_V: (
175
- "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
185
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
176
186
  "layers.{bid}.attention.wv", # llama-pth
177
187
  "encoder.layer.{bid}.attention.self.value", # bert
178
188
  "transformer.h.{bid}.attn.v_proj", # gpt-j
@@ -190,7 +200,8 @@ class TensorNameMap:
190
200
  "transformer.blocks.{bid}.attn.out_proj", # mpt
191
201
  "transformer.h.{bid}.self_attention.dense", # falcon
192
202
  "h.{bid}.self_attention.dense", # bloom
193
- "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
203
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
204
+ "model.layers.{bid}.self_attn.linear_attn", # deci
194
205
  "layers.{bid}.attention.wo", # llama-pth
195
206
  "encoder.layer.{bid}.attention.output.dense", # bert
196
207
  "transformer.h.{bid}.attn.out_proj", # gpt-j
@@ -234,7 +245,7 @@ class TensorNameMap:
234
245
  "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
235
246
  "h.{bid}.post_attention_layernorm", # bloom
236
247
  "transformer.blocks.{bid}.norm_2", # mpt
237
- "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe
248
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
238
249
  "layers.{bid}.ffn_norm", # llama-pth
239
250
  "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
240
251
  "model.layers.{bid}.ln2", # yi
@@ -257,7 +268,7 @@ class TensorNameMap:
257
268
 
258
269
  MODEL_TENSOR.FFN_GATE_INP: (
259
270
  "layers.{bid}.feed_forward.gate", # mixtral
260
- "model.layers.{bid}.block_sparse_moe.gate", # mixtral
271
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
261
272
  "model.layers.{bid}.mlp.gate", # qwen2moe olmoe
262
273
  "transformer.decoder_layer.{bid}.router", # Grok
263
274
  "transformer.blocks.{bid}.ffn.router.layer", # dbrx
@@ -268,6 +279,10 @@ class TensorNameMap:
268
279
  "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
269
280
  ),
270
281
 
282
+ MODEL_TENSOR.FFN_EXP_PROBS_B: (
283
+ "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
284
+ ),
285
+
271
286
  # Feed-forward up
272
287
  MODEL_TENSOR.FFN_UP: (
273
288
  "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
@@ -298,15 +313,16 @@ class TensorNameMap:
298
313
  ),
299
314
 
300
315
  MODEL_TENSOR.FFN_UP_EXP: (
301
- "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
302
- "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
303
- "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
304
- "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
316
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
317
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
318
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
319
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
320
+ "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
305
321
  ),
306
322
 
307
323
  MODEL_TENSOR.FFN_UP_SHEXP: (
308
324
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
309
- "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
325
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
310
326
  ),
311
327
 
312
328
  # AWQ-activation gate
@@ -330,15 +346,16 @@ class TensorNameMap:
330
346
  ),
331
347
 
332
348
  MODEL_TENSOR.FFN_GATE_EXP: (
333
- "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
334
- "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
335
- "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
336
- "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
349
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
350
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
351
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
352
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
353
+ "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
337
354
  ),
338
355
 
339
356
  MODEL_TENSOR.FFN_GATE_SHEXP: (
340
357
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
341
- "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
358
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
342
359
  ),
343
360
 
344
361
  # Feed-forward down
@@ -375,11 +392,12 @@ class TensorNameMap:
375
392
  "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
376
393
  "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
377
394
  "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
395
+ "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
378
396
  ),
379
397
 
380
398
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
381
399
  "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
382
- "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
400
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
383
401
  ),
384
402
 
385
403
  MODEL_TENSOR.ATTN_Q_NORM: (
@@ -449,34 +467,42 @@ class TensorNameMap:
449
467
 
450
468
  MODEL_TENSOR.TIME_MIX_W1: (
451
469
  "rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
470
+ "model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
452
471
  ),
453
472
 
454
473
  MODEL_TENSOR.TIME_MIX_W2: (
455
474
  "rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
475
+ "model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
456
476
  ),
457
477
 
458
478
  MODEL_TENSOR.TIME_MIX_LERP_X: (
459
479
  "rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
480
+ "model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
460
481
  ),
461
482
 
462
483
  MODEL_TENSOR.TIME_MIX_LERP_K: (
463
484
  "rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
485
+ "model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
464
486
  ),
465
487
 
466
488
  MODEL_TENSOR.TIME_MIX_LERP_V: (
467
489
  "rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
490
+ "model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
468
491
  ),
469
492
 
470
493
  MODEL_TENSOR.TIME_MIX_LERP_R: (
471
494
  "rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
495
+ "model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
472
496
  ),
473
497
 
474
498
  MODEL_TENSOR.TIME_MIX_LERP_G: (
475
499
  "rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
500
+ "model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
476
501
  ),
477
502
 
478
503
  MODEL_TENSOR.TIME_MIX_LERP_W: (
479
504
  "rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
505
+ "model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
480
506
  ),
481
507
 
482
508
  MODEL_TENSOR.TIME_MIX_FIRST: (
@@ -485,30 +511,37 @@ class TensorNameMap:
485
511
 
486
512
  MODEL_TENSOR.TIME_MIX_DECAY: (
487
513
  "rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
514
+ "model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
488
515
  ),
489
516
 
490
517
  MODEL_TENSOR.TIME_MIX_DECAY_W1: (
491
518
  "rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
519
+ "model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
492
520
  ),
493
521
 
494
522
  MODEL_TENSOR.TIME_MIX_DECAY_W2: (
495
523
  "rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
524
+ "model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
496
525
  ),
497
526
 
498
527
  MODEL_TENSOR.TIME_MIX_KEY: (
499
- "rwkv.blocks.{bid}.attention.key", # rwkv
528
+ "rwkv.blocks.{bid}.attention.key", # rwkv
529
+ "model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
500
530
  ),
501
531
 
502
532
  MODEL_TENSOR.TIME_MIX_VALUE: (
503
- "rwkv.blocks.{bid}.attention.value", # rwkv
533
+ "rwkv.blocks.{bid}.attention.value", # rwkv
534
+ "model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
504
535
  ),
505
536
 
506
537
  MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
507
538
  "rwkv.blocks.{bid}.attention.receptance", # rwkv
539
+ "model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
508
540
  ),
509
541
 
510
542
  MODEL_TENSOR.TIME_MIX_GATE: (
511
- "rwkv.blocks.{bid}.attention.gate", # rwkv
543
+ "rwkv.blocks.{bid}.attention.gate", # rwkv
544
+ "model.layers.{bid}.self_attn.gate", # rwkv6qwen2
512
545
  ),
513
546
 
514
547
  MODEL_TENSOR.TIME_MIX_LN: (
@@ -516,7 +549,8 @@ class TensorNameMap:
516
549
  ),
517
550
 
518
551
  MODEL_TENSOR.TIME_MIX_OUTPUT: (
519
- "rwkv.blocks.{bid}.attention.output", # rwkv
552
+ "rwkv.blocks.{bid}.attention.output", # rwkv
553
+ "model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
520
554
  ),
521
555
 
522
556
  MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
@@ -681,6 +715,8 @@ class TensorNameMap:
681
715
  "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
682
716
  ),
683
717
 
718
+ ############################################################################
719
+ # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
684
720
  MODEL_TENSOR.ENC_OUTPUT_NORM: (
685
721
  "encoder.final_layer_norm", # t5
686
722
  ),
@@ -693,6 +729,67 @@ class TensorNameMap:
693
729
  MODEL_TENSOR.CLS_OUT: (
694
730
  "classifier.out_proj", # roberta
695
731
  ),
732
+ #############################################################################
733
+
734
+ MODEL_TENSOR.CONVNEXT_DW: (
735
+ "backbone.convnext.{bid}.dwconv", # wavtokenizer
736
+ ),
737
+
738
+ MODEL_TENSOR.CONVNEXT_NORM: (
739
+ "backbone.convnext.{bid}.norm", # wavtokenizer
740
+ ),
741
+
742
+ MODEL_TENSOR.CONVNEXT_PW1: (
743
+ "backbone.convnext.{bid}.pwconv1", # wavtokenizer
744
+ ),
745
+
746
+ MODEL_TENSOR.CONVNEXT_PW2: (
747
+ "backbone.convnext.{bid}.pwconv2", # wavtokenizer
748
+ ),
749
+
750
+ MODEL_TENSOR.CONVNEXT_GAMMA: (
751
+ "backbone.convnext.{bid}.gamma", # wavtokenizer
752
+ ),
753
+
754
+ MODEL_TENSOR.POSNET_CONV1: (
755
+ "backbone.posnet.{bid}.conv1", # wavtokenizer
756
+ ),
757
+
758
+ MODEL_TENSOR.POSNET_CONV2: (
759
+ "backbone.posnet.{bid}.conv2", # wavtokenizer
760
+ ),
761
+
762
+ MODEL_TENSOR.POSNET_NORM: (
763
+ "backbone.posnet.{bid}.norm", # wavtokenizer
764
+ ),
765
+
766
+ MODEL_TENSOR.POSNET_NORM1: (
767
+ "backbone.posnet.{bid}.norm1", # wavtokenizer
768
+ ),
769
+
770
+ MODEL_TENSOR.POSNET_NORM2: (
771
+ "backbone.posnet.{bid}.norm2", # wavtokenizer
772
+ ),
773
+
774
+ MODEL_TENSOR.POSNET_ATTN_NORM: (
775
+ "backbone.posnet.{bid}.norm", # wavtokenizer
776
+ ),
777
+
778
+ MODEL_TENSOR.POSNET_ATTN_Q: (
779
+ "backbone.posnet.{bid}.q", # wavtokenizer
780
+ ),
781
+
782
+ MODEL_TENSOR.POSNET_ATTN_K: (
783
+ "backbone.posnet.{bid}.k", # wavtokenizer
784
+ ),
785
+
786
+ MODEL_TENSOR.POSNET_ATTN_V: (
787
+ "backbone.posnet.{bid}.v", # wavtokenizer
788
+ ),
789
+
790
+ MODEL_TENSOR.POSNET_ATTN_OUT: (
791
+ "backbone.posnet.{bid}.proj_out", # wavtokenizer
792
+ ),
696
793
  }
697
794
 
698
795
  # architecture-specific block mappings
@@ -47,7 +47,7 @@ def size_label(total_params: int, shared_params: int, expert_params: int, expert
47
47
 
48
48
 
49
49
  def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
50
- # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
50
+ # Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
51
51
 
52
52
  if base_name is not None:
53
53
  name = base_name.strip().replace(' ', '-').replace('/', '-')
@@ -127,7 +127,7 @@ class SpecialVocab:
127
127
  self.merges = merges
128
128
  elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
129
129
  # New format since transformers 4.45 to support spaces in merges
130
- # ref: https://github.com/ggerganov/llama.cpp/issues/9692
130
+ # ref: https://github.com/ggml-org/llama.cpp/issues/9692
131
131
  # TODO: internally store as the new format instead of converting to old
132
132
  if any(' ' in s for pair in merges for s in pair):
133
133
  logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/ggml.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bigdl-core-cpp
3
- Version: 2.6.0b20250320
3
+ Version: 2.6.0b20250321
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Author: BigDL Authors
6
6
  License: Apache License, Version 2.0
@@ -0,0 +1,57 @@
1
+ bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ bigdl/cpp/convert_hf_to_gguf.py,sha256=GB6mGc_deGraPhQfUgU8i33odUb6WfMw0vVPcgZ_-ow,240529
3
+ bigdl/cpp/convert_hf_to_gguf_update.py,sha256=1BFKEkj0BMDB90lUB5p_-iR9rSVcjgYPGWmEw28avB8,17721
4
+ bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
5
+ bigdl/cpp/convert_lora_to_gguf.py,sha256=sHrcutdgzrDR5H7ZiLPOLoMnkJKg8uZ7OcFhAZhPrLo,19073
6
+ bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
8
+ bigdl/cpp/gguf-py/gguf/constants.py,sha256=CJ0LigNqlnEqYP8IhnJsKcst9fIm-huE4RccvkTYUbg,69188
9
+ bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
10
+ bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=PUrx08ZwaUOz1gLw5JQ459Hi7JIeCdlHgZX7wXcTqbI,12702
11
+ bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=pFgnwrsDupKxI3SHNQbfiuz7dUopCOqj3ERBPuZMkMo,39955
12
+ bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
13
+ bigdl/cpp/gguf-py/gguf/metadata.py,sha256=oBTb4DXi_h1L_gYm8x_JRVuEPR4GHlVHuM-iN0OxWoY,33244
14
+ bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
16
+ bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=w1JZkRIKHj2tKYADLDUvCOsQfVf8y7Y0ZwqmtmrtLBA,39549
17
+ bigdl/cpp/gguf-py/gguf/utility.py,sha256=Mx4mqamXtatL15LCH04mG-7SNBwPzP2T75ts0uBnEuI,3002
18
+ bigdl/cpp/gguf-py/gguf/vocab.py,sha256=QTdt4HZrn7onHqm2tpHTaEq2sL3yG07zbHqQR9iVfu8,20815
19
+ bigdl/cpp/libs/common.lib,sha256=Vo5qx0PPK6ZyT0Z3TUoVqPhxI-A-zZR7IHWSeLYTIkM,8642856
20
+ bigdl/cpp/libs/ggml-base.dll,sha256=IUkUVt54mNJzwdTXmwvyJ_nQLOqmWrUpIrpRCg7VPFE,577024
21
+ bigdl/cpp/libs/ggml-cpu.dll,sha256=BTX-iKt3_8YgJcpRl0pXDrES2ZYTEy9ph6i4mMYwLqQ,1038336
22
+ bigdl/cpp/libs/ggml-sycl.dll,sha256=aqQdnqwEY-JlKm4Dny8nhy3OgSySYaiDWGSXwIp2SHc,5560832
23
+ bigdl/cpp/libs/ggml.dll,sha256=XjdK5jlGNhAxr7_IGiTWkvi4Z06ciDqiL5wbW9ZRuVc,118272
24
+ bigdl/cpp/libs/libc++.dll,sha256=U0TVK2WfFQIJPP6Bz9SeJmgskm2iqZWJorx_DGdfKIw,1561600
25
+ bigdl/cpp/libs/llama-batched.exe,sha256=v27oJ9gdolpQArt7Ih8m3qXCrIcEhWmgJrbVoxwpy0Q,1741824
26
+ bigdl/cpp/libs/llama-bench.exe,sha256=4aZtBZ2Bs-Q5kXVTd86WrE1uF3LuJCWfGaJcfHTkAH8,279552
27
+ bigdl/cpp/libs/llama-cli.exe,sha256=Sa6UWO5May8Ub1tgRiLI3gBJ1ayJbiZwQtshD9Ckdu0,1812480
28
+ bigdl/cpp/libs/llama-embedding.exe,sha256=WGqbovh_2njmpN1OE-5FU5yH-6D8hRQQ_cDKj_GlXlQ,1765376
29
+ bigdl/cpp/libs/llama-gemma3-cli.exe,sha256=94z47LKR7CrVSZ2w_tzuHJ66hJhw7v6BWrvbZIS1r_U,2033664
30
+ bigdl/cpp/libs/llama-gguf.exe,sha256=DlBsPv7pL0Rsp5hafby51gxRnXmzsti_569wySHAQ-Q,59392
31
+ bigdl/cpp/libs/llama-llava-cli.exe,sha256=DyoTXM5HOGMQANjfZbsYk50EsxZK5sBGVDuTfYX6Adg,2019840
32
+ bigdl/cpp/libs/llama-lookup.exe,sha256=lievf9j2yNC5uujL0oMmrpBj7J14PZWwi2XT_DX6_3U,1801216
33
+ bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=ZNfBEekcD4AiaNPkqafa1pzfo8WTp0DpS7I6Mw-RAwo,10240
34
+ bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=sozeVIjDxfa4NssDmTPBOO-6vgK1nJ-Bs5HDQp7QBE8,2017792
35
+ bigdl/cpp/libs/llama-perplexity.exe,sha256=vPzXHFomoFPgtLznM1fw1JQKDv_4q4bSqUuBb1qKEpI,1886208
36
+ bigdl/cpp/libs/llama-quantize.exe,sha256=CV3wKKGunBiBy6E8fStqXeFefjAnBmJUmXGwbMPyPb0,122880
37
+ bigdl/cpp/libs/llama-server.exe,sha256=haDGFy0qNNR_WUys-9CaDTB3RJwjvrBkCKBlhuewRBU,4130304
38
+ bigdl/cpp/libs/llama-simple.exe,sha256=Wfq0JYyVA4itZXPKs20UAOs0z-Q59SWOwpSlRgUWUw0,62464
39
+ bigdl/cpp/libs/llama-speculative.exe,sha256=rQG1WQizxNX0oj1nzvxxn0k3DCkkOkk0flLgw0_B2fs,1803776
40
+ bigdl/cpp/libs/llama-tokenize.exe,sha256=LA0ZLlq2Akt_CXTz8lSXw6-S9aCUnBm8RhEubpVZaz8,89088
41
+ bigdl/cpp/libs/llama.dll,sha256=W07-VmnRG1Hm7NkUdA9lFjrW9Wz81pQlKfZWF8yugRI,1470464
42
+ bigdl/cpp/libs/llava_shared.dll,sha256=IY0l5XnYnL014k398AHe33wTqwuBhKc1wTgn_uMeciA,380416
43
+ bigdl/cpp/libs/ollama-ggml-base.dll,sha256=ma9wJKTWKvr16EAlGJPwuNJN-dwUCx-gYPAacF0HO5U,459776
44
+ bigdl/cpp/libs/ollama-ggml-cpu.dll,sha256=lW9Ssioxrjm60ii9-LiLBoYJiFCpva8naloJpeGrjnI,477184
45
+ bigdl/cpp/libs/ollama-ggml-sycl.dll,sha256=I-Ksd6w8OGivjKsPwdNA0u7sGzPvyFwu1W5OvJi2IcA,5326336
46
+ bigdl/cpp/libs/ollama-lib.exe,sha256=LAaaV9voEPaeGBSSLsF0Eb9T0_zibH4SIe7NBakYses,25916416
47
+ bigdl/cpp/libs/ollama.exe,sha256=GceJtJEhtrlNDAY9n-7FMyWiLoHP6UqgwOrrM7RdUIk,207360
48
+ bigdl/cpp/libs/ollama_ggml.dll,sha256=X8XsTQd6Uc-LgMOzjNvGpnemqevACWoTVYdx7ZK6Zbc,113152
49
+ bigdl/cpp/libs/ollama_llama.dll,sha256=XKb_ypeqhlL9_lP_AcxNR6Z8xdQcr-3Inr6l-CzX1o4,1421312
50
+ bigdl/cpp/libs/ollama_llava_shared.dll,sha256=LtLN0l-jXVF8BNSH8XZAaAuIZXQWrc9cssBlPnYCixQ,398336
51
+ bigdl_core_cpp-2.6.0b20250321.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
52
+ bigdl_core_cpp-2.6.0b20250321.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
53
+ bigdl_core_cpp-2.6.0b20250321.data/scripts/init-ollama.bat,sha256=0I2iBOFv3kR9hvEySGMRUU52-qwVhE7oRZnyWz-2z_U,657
54
+ bigdl_core_cpp-2.6.0b20250321.dist-info/METADATA,sha256=9I4TcjPb4JO3gGlDHDAXgFbUhCRqhuZp9iOyVFX2Apo,750
55
+ bigdl_core_cpp-2.6.0b20250321.dist-info/WHEEL,sha256=pUQ3YzM9z7CMLK4Pdg7RxRLrm1NUy0aQs4ESywX3iFk,97
56
+ bigdl_core_cpp-2.6.0b20250321.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
57
+ bigdl_core_cpp-2.6.0b20250321.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.1)
2
+ Generator: setuptools (77.0.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-win_amd64
5
5