bigdl-core-cpp 2.6.0b20241204__py3-none-win_amd64.whl → 2.6.0b20241211__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +404 -37
  2. bigdl/cpp/convert_hf_to_gguf_update.py +25 -6
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
  4. bigdl/cpp/convert_lora_to_gguf.py +11 -1
  5. bigdl/cpp/gguf-py/gguf/constants.py +276 -81
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +25 -1
  7. bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
  8. bigdl/cpp/gguf-py/gguf/quants.py +81 -0
  9. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +135 -23
  10. bigdl/cpp/libs/common.lib +0 -0
  11. bigdl/cpp/libs/ggml.dll +0 -0
  12. bigdl/cpp/libs/llama-batched.exe +0 -0
  13. bigdl/cpp/libs/llama-bench.exe +0 -0
  14. bigdl/cpp/libs/llama-cli.exe +0 -0
  15. bigdl/cpp/libs/llama-embedding.exe +0 -0
  16. bigdl/cpp/libs/llama-gguf.exe +0 -0
  17. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  18. bigdl/cpp/libs/llama-lookup.exe +0 -0
  19. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  20. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  21. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  22. bigdl/cpp/libs/llama-quantize.exe +0 -0
  23. bigdl/cpp/libs/llama-server.exe +0 -0
  24. bigdl/cpp/libs/llama-simple.exe +0 -0
  25. bigdl/cpp/libs/llama-speculative.exe +0 -0
  26. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  27. bigdl/cpp/libs/llama.dll +0 -0
  28. bigdl/cpp/libs/llava_shared.dll +0 -0
  29. bigdl/cpp/libs/ollama.exe +0 -0
  30. {bigdl_core_cpp-2.6.0b20241204.dist-info → bigdl_core_cpp-2.6.0b20241211.dist-info}/METADATA +1 -1
  31. bigdl_core_cpp-2.6.0b20241211.dist-info/RECORD +45 -0
  32. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
  33. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
  34. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
  35. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
  36. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
  37. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
  38. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
  39. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
  40. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
  41. bigdl_core_cpp-2.6.0b20241204.dist-info/RECORD +0 -54
  42. {bigdl_core_cpp-2.6.0b20241204.data → bigdl_core_cpp-2.6.0b20241211.data}/scripts/init-llama-cpp.bat +0 -0
  43. {bigdl_core_cpp-2.6.0b20241204.data → bigdl_core_cpp-2.6.0b20241211.data}/scripts/init-llama-cpp.ps1 +0 -0
  44. {bigdl_core_cpp-2.6.0b20241204.data → bigdl_core_cpp-2.6.0b20241211.data}/scripts/init-ollama.bat +0 -0
  45. {bigdl_core_cpp-2.6.0b20241204.dist-info → bigdl_core_cpp-2.6.0b20241211.dist-info}/WHEEL +0 -0
  46. {bigdl_core_cpp-2.6.0b20241204.dist-info → bigdl_core_cpp-2.6.0b20241211.dist-info}/top_level.txt +0 -0
@@ -574,6 +574,87 @@ class Q6_K(__Quant, qtype=GGMLQuantizationType.Q6_K):
574
574
  return (d * q).reshape((n_blocks, QK_K))
575
575
 
576
576
 
577
+ class TQ1_0(__Quant, qtype=GGMLQuantizationType.TQ1_0):
578
+ @classmethod
579
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
580
+ n_blocks = blocks.shape[0]
581
+
582
+ d = abs(blocks).max(axis=-1, keepdims=True)
583
+ with np.errstate(divide="ignore"):
584
+ id = np.where(d == 0, 0, 1 / d)
585
+ qs = np_roundf(blocks * id)
586
+ qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8)
587
+
588
+ qs0, qs1, qh = qs[..., :(32 * 5)], qs[..., (32 * 5):(48 * 5)], qs[..., (48 * 5):]
589
+ qs0 = qs0.reshape((n_blocks, -1, 5, 32)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1))
590
+ qs0 = np.sum(qs0, axis=-2).reshape((n_blocks, -1))
591
+ qs1 = qs1.reshape((n_blocks, -1, 5, 16)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1))
592
+ qs1 = np.sum(qs1, axis=-2).reshape((n_blocks, -1))
593
+ qh = qh.reshape((n_blocks, -1, 4, 4)) * np.array([81, 27, 9, 3], dtype=np.uint8).reshape((1, 1, 4, 1))
594
+ qh = np.sum(qh, axis=-2).reshape((n_blocks, -1))
595
+ qs = np.concatenate([qs0, qs1, qh], axis=-1)
596
+ qs = (qs.astype(np.uint16) * 256 + (243 - 1)) // 243
597
+
598
+ qs = qs.astype(np.uint8)
599
+ d = d.astype(np.float16).view(np.uint8)
600
+
601
+ return np.concatenate([qs, d], axis=-1)
602
+
603
+ @classmethod
604
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
605
+ n_blocks = blocks.shape[0]
606
+
607
+ qs, rest = np.hsplit(blocks, [(QK_K - 4 * QK_K // 64) // 5])
608
+ qh, d = np.hsplit(rest, [QK_K // 64])
609
+
610
+ d = d.view(np.float16).astype(np.float32)
611
+
612
+ qs0, qs1 = qs[..., :32], qs[..., 32:]
613
+ qs0 = qs0.reshape((n_blocks, -1, 1, 32)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1))
614
+ qs0 = qs0.reshape((n_blocks, -1))
615
+ qs1 = qs1.reshape((n_blocks, -1, 1, 16)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1))
616
+ qs1 = qs1.reshape((n_blocks, -1))
617
+ qh = qh.reshape((n_blocks, -1, 1, 4)) * np.array([1, 3, 9, 27], dtype=np.uint8).reshape((1, 1, 4, 1))
618
+ qh = qh.reshape((n_blocks, -1))
619
+ qs = np.concatenate([qs0, qs1, qh], axis=-1)
620
+ qs = ((qs.astype(np.uint16) * 3) >> 8).astype(np.int8) - np.int8(1)
621
+
622
+ return (d * qs.astype(np.float32))
623
+
624
+
625
+ class TQ2_0(__Quant, qtype=GGMLQuantizationType.TQ2_0):
626
+ @classmethod
627
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
628
+ n_blocks = blocks.shape[0]
629
+
630
+ d = abs(blocks).max(axis=-1, keepdims=True)
631
+ with np.errstate(divide="ignore"):
632
+ id = np.where(d == 0, 0, 1 / d)
633
+ qs = np_roundf(blocks * id)
634
+ qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8)
635
+
636
+ qs = qs.reshape((n_blocks, -1, 4, 32)) << np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
637
+ qs = qs[..., 0, :] | qs[..., 1, :] | qs[..., 2, :] | qs[..., 3, :]
638
+ qs = qs.reshape((n_blocks, -1))
639
+
640
+ d = d.astype(np.float16).view(np.uint8)
641
+
642
+ return np.concatenate([qs, d], axis=-1)
643
+
644
+ @classmethod
645
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
646
+ n_blocks = blocks.shape[0]
647
+
648
+ qs, d = np.hsplit(blocks, [QK_K // 4])
649
+
650
+ d = d.view(np.float16).astype(np.float32)
651
+
652
+ qs = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
653
+ qs = (qs & 0x03).reshape((n_blocks, -1)).astype(np.int8) - np.int8(1)
654
+
655
+ return (d * qs.astype(np.float32))
656
+
657
+
577
658
  class IQ2_XXS(__Quant, qtype=GGMLQuantizationType.IQ2_XXS):
578
659
  ksigns: bytes = (
579
660
  b"\x00\x81\x82\x03\x84\x05\x06\x87\x88\x09\x0a\x8b\x0c\x8d\x8e\x0f"
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron
16
+ "model.embed_tokens", # llama-hf nemotron olmoe
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -27,6 +27,7 @@ class TensorNameMap:
27
27
  "embedding.word_embeddings", # chatglm
28
28
  "transformer.token_embeddings", # openelm
29
29
  "shared", # t5
30
+ "rwkv.embeddings", # rwkv
30
31
  ),
31
32
 
32
33
  # Token type embeddings
@@ -40,6 +41,7 @@ class TensorNameMap:
40
41
  "embeddings.LayerNorm", # bert
41
42
  "emb_ln", # nomic-bert
42
43
  "transformer.norm", # openelm
44
+ "rwkv.blocks.0.pre_ln", # rwkv
43
45
  ),
44
46
 
45
47
  # Position embeddings
@@ -52,18 +54,19 @@ class TensorNameMap:
52
54
  # Output
53
55
  MODEL_TENSOR.OUTPUT: (
54
56
  "embed_out", # gptneox
55
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
57
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe
56
58
  "output", # llama-pth bloom internlm2
57
59
  "word_embeddings_for_head", # persimmon
58
60
  "lm_head.linear", # phi2
59
61
  "output_layer", # chatglm
62
+ "head", # rwkv
60
63
  ),
61
64
 
62
65
  # Output norm
63
66
  MODEL_TENSOR.OUTPUT_NORM: (
64
67
  "gpt_neox.final_layer_norm", # gptneox
65
68
  "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
66
- "model.norm", # llama-hf baichuan internlm2
69
+ "model.norm", # llama-hf baichuan internlm2 olmoe
67
70
  "norm", # llama-pth
68
71
  "transformer.norm_f", # mpt dbrx
69
72
  "ln_f", # refact bloom qwen gpt2
@@ -76,6 +79,7 @@ class TensorNameMap:
76
79
  "encoder.final_layernorm", # chatglm
77
80
  "transformer.norm", # openelm
78
81
  "model.norm", # nemotron
82
+ "rwkv.ln_out", # rwkv
79
83
  ),
80
84
 
81
85
  # Rope frequencies
@@ -83,6 +87,9 @@ class TensorNameMap:
83
87
  "rope.freqs", # llama-pth
84
88
  "rotary_pos_emb.inv_freq", # chatglm
85
89
  ),
90
+
91
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
92
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
86
93
  }
87
94
 
88
95
  block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
@@ -94,7 +101,7 @@ class TensorNameMap:
94
101
  "transformer.h.{bid}.input_layernorm", # falcon7b
95
102
  "h.{bid}.input_layernorm", # bloom
96
103
  "transformer.h.{bid}.ln_mlp", # falcon40b
97
- "model.layers.{bid}.input_layernorm", # llama-hf nemotron
104
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe
98
105
  "layers.{bid}.attention_norm", # llama-pth
99
106
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
100
107
  "model.layers.{bid}.ln1", # yi
@@ -108,12 +115,14 @@ class TensorNameMap:
108
115
  "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
109
116
  "encoder.layers.{bid}.input_layernorm", # chatglm
110
117
  "transformer.layers.{bid}.attn_norm", # openelm
118
+ "rwkv.blocks.{bid}.ln1", # rwkv
111
119
  ),
112
120
 
113
121
  # Attention norm 2
114
122
  MODEL_TENSOR.ATTN_NORM_2: (
115
- "transformer.h.{bid}.ln_attn", # falcon40b
123
+ "transformer.h.{bid}.ln_attn", # falcon40b
116
124
  "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
125
+ "rwkv.blocks.{bid}.ln2", # rwkv
117
126
  ),
118
127
 
119
128
  # Attention query-key-value
@@ -136,7 +145,7 @@ class TensorNameMap:
136
145
 
137
146
  # Attention query
138
147
  MODEL_TENSOR.ATTN_Q: (
139
- "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron
148
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe
140
149
  "layers.{bid}.attention.wq", # llama-pth
141
150
  "encoder.layer.{bid}.attention.self.query", # bert
142
151
  "transformer.h.{bid}.attn.q_proj", # gpt-j
@@ -148,7 +157,7 @@ class TensorNameMap:
148
157
 
149
158
  # Attention key
150
159
  MODEL_TENSOR.ATTN_K: (
151
- "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron
160
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe
152
161
  "layers.{bid}.attention.wk", # llama-pth
153
162
  "encoder.layer.{bid}.attention.self.key", # bert
154
163
  "transformer.h.{bid}.attn.k_proj", # gpt-j
@@ -161,7 +170,7 @@ class TensorNameMap:
161
170
 
162
171
  # Attention value
163
172
  MODEL_TENSOR.ATTN_V: (
164
- "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron
173
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe
165
174
  "layers.{bid}.attention.wv", # llama-pth
166
175
  "encoder.layer.{bid}.attention.self.value", # bert
167
176
  "transformer.h.{bid}.attn.v_proj", # gpt-j
@@ -179,7 +188,7 @@ class TensorNameMap:
179
188
  "transformer.blocks.{bid}.attn.out_proj", # mpt
180
189
  "transformer.h.{bid}.self_attention.dense", # falcon
181
190
  "h.{bid}.self_attention.dense", # bloom
182
- "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron
191
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe
183
192
  "layers.{bid}.attention.wo", # llama-pth
184
193
  "encoder.layer.{bid}.attention.output.dense", # bert
185
194
  "transformer.h.{bid}.attn.out_proj", # gpt-j
@@ -223,7 +232,7 @@ class TensorNameMap:
223
232
  "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
224
233
  "h.{bid}.post_attention_layernorm", # bloom
225
234
  "transformer.blocks.{bid}.norm_2", # mpt
226
- "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron
235
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe
227
236
  "layers.{bid}.ffn_norm", # llama-pth
228
237
  "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
229
238
  "model.layers.{bid}.ln2", # yi
@@ -245,11 +254,12 @@ class TensorNameMap:
245
254
  ),
246
255
 
247
256
  MODEL_TENSOR.FFN_GATE_INP: (
248
- "layers.{bid}.feed_forward.gate", # mixtral
249
- "model.layers.{bid}.block_sparse_moe.gate", # mixtral
250
- "model.layers.{bid}.mlp.gate", # qwen2moe
251
- "transformer.decoder_layer.{bid}.router", # Grok
252
- "transformer.blocks.{bid}.ffn.router.layer", # dbrx
257
+ "layers.{bid}.feed_forward.gate", # mixtral
258
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral
259
+ "model.layers.{bid}.mlp.gate", # qwen2moe olmoe
260
+ "transformer.decoder_layer.{bid}.router", # Grok
261
+ "transformer.blocks.{bid}.ffn.router.layer", # dbrx
262
+ "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
253
263
  ),
254
264
 
255
265
  MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -289,7 +299,7 @@ class TensorNameMap:
289
299
  "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
290
300
  "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
291
301
  "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
292
- "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
302
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
293
303
  ),
294
304
 
295
305
  MODEL_TENSOR.FFN_UP_SHEXP: (
@@ -321,7 +331,7 @@ class TensorNameMap:
321
331
  "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
322
332
  "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
323
333
  "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
324
- "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
334
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
325
335
  ),
326
336
 
327
337
  MODEL_TENSOR.FFN_GATE_SHEXP: (
@@ -358,10 +368,11 @@ class TensorNameMap:
358
368
  ),
359
369
 
360
370
  MODEL_TENSOR.FFN_DOWN_EXP: (
361
- "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
362
- "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
363
- "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
364
- "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
371
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
372
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
373
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
374
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
375
+ "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
365
376
  ),
366
377
 
367
378
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
@@ -372,7 +383,7 @@ class TensorNameMap:
372
383
  MODEL_TENSOR.ATTN_Q_NORM: (
373
384
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
374
385
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
375
- "model.layers.{bid}.self_attn.q_norm", # cohere
386
+ "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon
376
387
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
377
388
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
378
389
  "transformer.layers.{bid}.attn.q_norm", # openelm
@@ -381,7 +392,7 @@ class TensorNameMap:
381
392
  MODEL_TENSOR.ATTN_K_NORM: (
382
393
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
383
394
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
384
- "model.layers.{bid}.self_attn.k_norm", # cohere
395
+ "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon
385
396
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
386
397
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
387
398
  "transformer.layers.{bid}.attn.k_norm", # openelm
@@ -434,6 +445,98 @@ class TensorNameMap:
434
445
  "backbone.layers.{bid}.mixer.out_proj",
435
446
  ),
436
447
 
448
+ MODEL_TENSOR.TIME_MIX_W1: (
449
+ "rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
450
+ ),
451
+
452
+ MODEL_TENSOR.TIME_MIX_W2: (
453
+ "rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
454
+ ),
455
+
456
+ MODEL_TENSOR.TIME_MIX_LERP_X: (
457
+ "rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
458
+ ),
459
+
460
+ MODEL_TENSOR.TIME_MIX_LERP_K: (
461
+ "rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
462
+ ),
463
+
464
+ MODEL_TENSOR.TIME_MIX_LERP_V: (
465
+ "rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
466
+ ),
467
+
468
+ MODEL_TENSOR.TIME_MIX_LERP_R: (
469
+ "rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
470
+ ),
471
+
472
+ MODEL_TENSOR.TIME_MIX_LERP_G: (
473
+ "rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
474
+ ),
475
+
476
+ MODEL_TENSOR.TIME_MIX_LERP_W: (
477
+ "rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
478
+ ),
479
+
480
+ MODEL_TENSOR.TIME_MIX_FIRST: (
481
+ "rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
482
+ ),
483
+
484
+ MODEL_TENSOR.TIME_MIX_DECAY: (
485
+ "rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
486
+ ),
487
+
488
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: (
489
+ "rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
490
+ ),
491
+
492
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: (
493
+ "rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
494
+ ),
495
+
496
+ MODEL_TENSOR.TIME_MIX_KEY: (
497
+ "rwkv.blocks.{bid}.attention.key", # rwkv
498
+ ),
499
+
500
+ MODEL_TENSOR.TIME_MIX_VALUE: (
501
+ "rwkv.blocks.{bid}.attention.value", # rwkv
502
+ ),
503
+
504
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
505
+ "rwkv.blocks.{bid}.attention.receptance", # rwkv
506
+ ),
507
+
508
+ MODEL_TENSOR.TIME_MIX_GATE: (
509
+ "rwkv.blocks.{bid}.attention.gate", # rwkv
510
+ ),
511
+
512
+ MODEL_TENSOR.TIME_MIX_LN: (
513
+ "rwkv.blocks.{bid}.attention.ln_x", # rwkv
514
+ ),
515
+
516
+ MODEL_TENSOR.TIME_MIX_OUTPUT: (
517
+ "rwkv.blocks.{bid}.attention.output", # rwkv
518
+ ),
519
+
520
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
521
+ "rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
522
+ ),
523
+
524
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
525
+ "rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
526
+ ),
527
+
528
+ MODEL_TENSOR.CHANNEL_MIX_KEY: (
529
+ "rwkv.blocks.{bid}.feed_forward.key", # rwkv
530
+ ),
531
+
532
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
533
+ "rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
534
+ ),
535
+
536
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: (
537
+ "rwkv.blocks.{bid}.feed_forward.value", # rwkv
538
+ ),
539
+
437
540
  MODEL_TENSOR.ATTN_Q_A: (
438
541
  "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
439
542
  ),
@@ -579,6 +682,15 @@ class TensorNameMap:
579
682
  MODEL_TENSOR.ENC_OUTPUT_NORM: (
580
683
  "encoder.final_layer_norm", # t5
581
684
  ),
685
+
686
+ MODEL_TENSOR.CLS: (
687
+ "classifier", # jina
688
+ "classifier.dense", # roberta
689
+ ),
690
+
691
+ MODEL_TENSOR.CLS_OUT: (
692
+ "classifier.out_proj", # roberta
693
+ ),
582
694
  }
583
695
 
584
696
  # architecture-specific block mappings
bigdl/cpp/libs/common.lib CHANGED
Binary file
bigdl/cpp/libs/ggml.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bigdl-core-cpp
3
- Version: 2.6.0b20241204
3
+ Version: 2.6.0b20241211
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Author: BigDL Authors
6
6
  License: Apache License, Version 2.0
@@ -0,0 +1,45 @@
1
+ bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ bigdl/cpp/convert_hf_to_gguf.py,sha256=QcBpqyIsrqLlLFwCp0Z8M3IzMobHygjQY0ZgvFoF_u0,207430
3
+ bigdl/cpp/convert_hf_to_gguf_update.py,sha256=O1NH13YPWT9Af778goJOg8pccbrc5cOgwYcPOIOqYq0,16612
4
+ bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
5
+ bigdl/cpp/convert_lora_to_gguf.py,sha256=qBJSMA_w3cIN_Mi5pNsi4zI1P5GYIeRi4nZPTpAs8QQ,15461
6
+ bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
8
+ bigdl/cpp/gguf-py/gguf/constants.py,sha256=8_u4WadRGm7XeN0hxaIzDUgGajyUdHB4XsbslumYS2U,58733
9
+ bigdl/cpp/gguf-py/gguf/gguf.py,sha256=V5jY968TEJn6GJHVdjzH0_aIkZ1QC967vPdHDKDoxZw,491
10
+ bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=N3LnQQ30t-S0U85-EvZZzIBfHzo0XuyFVUltdg7Sj3c,12680
11
+ bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=tHtbF0ogWwfclKCvO7VyGKoJuCieFrrBx-vVpnruoQA,37092
12
+ bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
13
+ bigdl/cpp/gguf-py/gguf/metadata.py,sha256=wtquhynkyH8R7m3zxgeSUe2bnaekJi6HoCMiYJfJBmk,26232
14
+ bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
16
+ bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=oW7E5hnCWy7IeiQeN0v7xoLWeSatDUgEmKq_ZYlLV8s,35299
17
+ bigdl/cpp/gguf-py/gguf/utility.py,sha256=LAwLstUlosYQ1oX9LlJZ-1uCmwyNtOFcJfXpkLnju0k,3003
18
+ bigdl/cpp/gguf-py/gguf/vocab.py,sha256=FtNcm8M5aX9RIr6rRR6UXsUlKMagRUC2xnIWb-xu6rI,19511
19
+ bigdl/cpp/libs/common.lib,sha256=KiEclGjWr4Re-koAlu_EJ0dx_eJWOZ_-cxynqjEhiMs,4205058
20
+ bigdl/cpp/libs/ggml.dll,sha256=ZYnnmLc1Ncyn3rDEJrW6LwWYLHU_Obz80O3TGukLZIs,6219776
21
+ bigdl/cpp/libs/llama-batched.exe,sha256=YiRTTJYU5HzIBRSABWfUsouCYZbUO8A33BJ-sZMN7NM,838656
22
+ bigdl/cpp/libs/llama-bench.exe,sha256=sCqTPsHULyNLaelX2zsJf5eytVUFTJ0CGIQqhCjbKxA,290304
23
+ bigdl/cpp/libs/llama-cli.exe,sha256=CfOwSxxDB9Mnjmv3f--NcQ8o8mnJNn35oIpAiMboAwI,929280
24
+ bigdl/cpp/libs/llama-embedding.exe,sha256=25gJuxi33L9YGz6rqJaB-RwNrPoKJV7TmqkpiFD5D3w,860672
25
+ bigdl/cpp/libs/llama-gguf.exe,sha256=i9_oh1DcUMvQeYo6vJVxha_clodUD2cmGiLiEajco5w,66560
26
+ bigdl/cpp/libs/llama-llava-cli.exe,sha256=Qz63VFbVJos_YcBOJ9lZmz4ls4_WOkb-Ja4Zbxl5qwg,1091072
27
+ bigdl/cpp/libs/llama-lookup.exe,sha256=Xu6BHMpzrXd0KCqbIsxd2v1wYG0tVYl3BQMzCedus74,893952
28
+ bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=n3r_ttydRhyAfwo9pXZC3Cx81ffJZJg5Iu9R9ObkM-w,10240
29
+ bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=nzshnx0yvRmS8vf4JQcdllN5gnE55yXEKcDvRlVZoc4,1089024
30
+ bigdl/cpp/libs/llama-perplexity.exe,sha256=2PImsgokgDW2koGU61FADV5BnUhlBAbzZ153bde7aZ4,997376
31
+ bigdl/cpp/libs/llama-quantize.exe,sha256=Zx5Nofwfm9ITX-b1X2ka9VB8oEeTDcq9FLIUKAB9A6I,139264
32
+ bigdl/cpp/libs/llama-server.exe,sha256=_DEZ1za1cqW7lg9FqeTf7UMUc-QOVmuvAKaYxyMkDJY,1960960
33
+ bigdl/cpp/libs/llama-simple.exe,sha256=9CI4Y7tfoXn0zOkUGtNJXcouLpxqqyYdyYKkypxMPUU,834048
34
+ bigdl/cpp/libs/llama-speculative.exe,sha256=yAFNeNEpxzSEbrEqRZlG9b2h975tGyYWlwOO3GvAmew,896512
35
+ bigdl/cpp/libs/llama-tokenize.exe,sha256=445caGydC-Tof4OX9xfyYUItyKrCE6dTZJ8mWpb8CFc,102400
36
+ bigdl/cpp/libs/llama.dll,sha256=na9ZxrYLqG3CmutTL7SCFF__kt2MbbCOQOG-YkWwo6M,2471936
37
+ bigdl/cpp/libs/llava_shared.dll,sha256=Rxxf-Mq63MDfas2omN-A8Y4Fjk06GmmcaGc1uD69a3g,369152
38
+ bigdl/cpp/libs/ollama.exe,sha256=8IcEJkzPcoE0r3K2sAacTX6aVfvKI-xu-1ysEOQ8yJA,64879199
39
+ bigdl_core_cpp-2.6.0b20241211.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
40
+ bigdl_core_cpp-2.6.0b20241211.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
41
+ bigdl_core_cpp-2.6.0b20241211.data/scripts/init-ollama.bat,sha256=2rScaP2-_yWuAnc86NyX-i6pg7BxjvLc96Kt7HuvQVM,603
42
+ bigdl_core_cpp-2.6.0b20241211.dist-info/METADATA,sha256=eNkNqvxgOYJelp2IlQ17j-t19SmJf1pcKGmQfhyXaBw,652
43
+ bigdl_core_cpp-2.6.0b20241211.dist-info/WHEEL,sha256=z8gukVdnGwjcwo0VnsfJMrhPu5QJT68VcMWmAgvAufw,97
44
+ bigdl_core_cpp-2.6.0b20241211.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
45
+ bigdl_core_cpp-2.6.0b20241211.dist-info/RECORD,,
@@ -1,54 +0,0 @@
1
- bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- bigdl/cpp/convert_hf_to_gguf.py,sha256=5DxO33LLwJX4aYpWMwTTTvnZd2JinTWatBwnW54A8iQ,189773
3
- bigdl/cpp/convert_hf_to_gguf_update.py,sha256=pKKPaDe8Dhsvcu_ofSPVEgZ6Ojgk8P9bmDIFi1Hm7lo,15503
4
- bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=VyHM3jMYwzM5uQByh-W2DKHEXiwQDk8RBonpdbBL5l8,19734
5
- bigdl/cpp/convert_lora_to_gguf.py,sha256=bc-D5-lINVo6SXrt-Lws8wdWeIfwcdA7GBptP3MttcM,14775
6
- bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
8
- bigdl/cpp/gguf-py/gguf/constants.py,sha256=J1dMukNMfNKF_yEbjvOSQpYODfifY05TwdiTQqHw27E,50556
9
- bigdl/cpp/gguf-py/gguf/gguf.py,sha256=V5jY968TEJn6GJHVdjzH0_aIkZ1QC967vPdHDKDoxZw,491
10
- bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=N3LnQQ30t-S0U85-EvZZzIBfHzo0XuyFVUltdg7Sj3c,12680
11
- bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=VZneSoXRxmxCFP55CMIqBws4XgboeC8sJeGNoMHy6Uc,35976
12
- bigdl/cpp/gguf-py/gguf/lazy.py,sha256=kckbqp8tj7NMkDNwePxwI_1WxK6qH5pMk9p7Lu8cj6A,8816
13
- bigdl/cpp/gguf-py/gguf/metadata.py,sha256=wtquhynkyH8R7m3zxgeSUe2bnaekJi6HoCMiYJfJBmk,26232
14
- bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- bigdl/cpp/gguf-py/gguf/quants.py,sha256=8broI4A53_Zbd8nQ-a6Qcw2TY4T8XgqHhHFq-J6E9eY,58524
16
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=V2UiBqnPVfFeOK_sg9JY6mXqOLBhCsHGJy7kPHn5jMQ,31548
17
- bigdl/cpp/gguf-py/gguf/utility.py,sha256=LAwLstUlosYQ1oX9LlJZ-1uCmwyNtOFcJfXpkLnju0k,3003
18
- bigdl/cpp/gguf-py/gguf/vocab.py,sha256=FtNcm8M5aX9RIr6rRR6UXsUlKMagRUC2xnIWb-xu6rI,19511
19
- bigdl/cpp/libs/common.lib,sha256=fu4v0lhqD5X44oleXPd2r_Hb7vM6oxu7GWgHZEN5wBA,4057932
20
- bigdl/cpp/libs/ggml.dll,sha256=Yqvqkmai6yfxHPNj9tQIGQy5UpDxQr3ovxWN6dXNPjo,5747200
21
- bigdl/cpp/libs/llama-batched.exe,sha256=Gmapigvr9e5mRnTKQQoMiOPM0MQ2Oo-Jbv_Aj3C7uBI,771072
22
- bigdl/cpp/libs/llama-bench.exe,sha256=bPgiGEdSVK1UTDaTu1UjAx09k4rU9ddnWd5MLFI0G6U,336896
23
- bigdl/cpp/libs/llama-cli.exe,sha256=zOZkZowVzARtz_uGGMcZjgN2HWYiPNy-r-hYLnH-S9o,906752
24
- bigdl/cpp/libs/llama-embedding.exe,sha256=d4Z12U85skGuHWP2OnsLZb3GWrq7Imf0jlXEFe67aGo,785408
25
- bigdl/cpp/libs/llama-gguf.exe,sha256=_SFeDN1R9l-6JlAD4cqDpHvJJQPz9umomskj4WDV1EU,66560
26
- bigdl/cpp/libs/llama-llava-cli.exe,sha256=fgf_QoWWvEJUE1jivwfNv-4kNTKlUUpknBWrVO2x41M,1091072
27
- bigdl/cpp/libs/llama-lookup.exe,sha256=jk7gecgU4-JMPQzvuZ_5Ua_DADcREsZJJKJDaODv3S4,851968
28
- bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=0T3nyCsh3ipx3MAg2jnKJ9s8zpFcHqUp2VVTKgPFWPU,10240
29
- bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=kAXSj0uQPKQTyilHF9v8O3uzEd_3Fz682UKJQEPBx4Y,1082880
30
- bigdl/cpp/libs/llama-perplexity.exe,sha256=QSbQlsT097fzly9acGyuRrFbj-WRPHnoH1aSnfpNuUg,915456
31
- bigdl/cpp/libs/llama-quantize.exe,sha256=CMhHE0XPHcX3yGf1dDnRVEsYu6A5p_u9Ehm_2xnwfJk,227840
32
- bigdl/cpp/libs/llama-server.exe,sha256=vzkcVuu80vwff2jTbnAbh5oqtCgftNDzhX5H1yaIHss,2143744
33
- bigdl/cpp/libs/llama-simple.exe,sha256=dfsypF6HfCj3mzLxGGgb-MNdlb0doLcqJfzTITYKi7o,763392
34
- bigdl/cpp/libs/llama-speculative.exe,sha256=Z-QEL9Ica-cigbvXJV-z0ouAzzySSY5QbbGAvJU1IEU,849920
35
- bigdl/cpp/libs/llama-tokenize.exe,sha256=qd8XY05zRbbmu9S2YXE-NCjZjYg8HRUkG_fJM0iPz9I,200704
36
- bigdl/cpp/libs/llama.dll,sha256=sjV8oupuQX40hruymke6Gr8hpBmbBUnp1_F3XtHevQg,2217984
37
- bigdl/cpp/libs/llava_shared.dll,sha256=1gxdRqpsm18_PDOqj6WYhf9QXAG49nz0bqyc-L2LkgE,404480
38
- bigdl/cpp/libs/ollama.exe,sha256=kRJZ_YpcUh8dAwmRNX9T_GdPB2Wqkt2B5aknoCm4Gn8,65494851
39
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll,sha256=EcgDjYmlAZ1y2onVuQaVzFwmVKORQbAYapdQFrEi77E,5703680
40
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll,sha256=ccXu6uypObtwy7EOlsGBoFp6RCWqjfnwziCgT_rcfZk,2217984
41
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe,sha256=djHmEslhDOBSliETcM6xB3fzkyaC_O46w-s3kZOVVEk,1376768
42
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll,sha256=X8NcVQRPqasgn6EgagaN3rLshBRZTWB1SeLamJsXXEc,5703680
43
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll,sha256=-9vr01rKVrFo3g4nvTEOCFeVOj7RMlJed2rLrWaZMV8,2217984
44
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe,sha256=1gl5SaFXe0W4kq0CJ14ffSl3abx4ux9wiqXfgkxk7lc,1376768
45
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll,sha256=08UQw5mL9lA2QtfOV-Io9cLFhShGk5U6umJkQroSCPU,5703680
46
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll,sha256=iqPp4mL-l_v_Bi2rRHxdqraqW89vPyfgL6rR3C18djI,2217984
47
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe,sha256=uFHFQrpYs6a-nYPbotrkX1Soum3T-gyPiR549E1ukiI,1376768
48
- bigdl_core_cpp-2.6.0b20241204.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
49
- bigdl_core_cpp-2.6.0b20241204.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
50
- bigdl_core_cpp-2.6.0b20241204.data/scripts/init-ollama.bat,sha256=2rScaP2-_yWuAnc86NyX-i6pg7BxjvLc96Kt7HuvQVM,603
51
- bigdl_core_cpp-2.6.0b20241204.dist-info/METADATA,sha256=zybu715duUbDtl0TbBZfoY1k1Rv1FGsHGLd0son1f8g,652
52
- bigdl_core_cpp-2.6.0b20241204.dist-info/WHEEL,sha256=z8gukVdnGwjcwo0VnsfJMrhPu5QJT68VcMWmAgvAufw,97
53
- bigdl_core_cpp-2.6.0b20241204.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
54
- bigdl_core_cpp-2.6.0b20241204.dist-info/RECORD,,