bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +1196 -147
  2. bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
  4. bigdl/cpp/convert_lora_to_gguf.py +82 -14
  5. bigdl/cpp/gguf-py/gguf/constants.py +645 -187
  6. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  7. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  8. bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
  9. bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
  10. bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
  11. bigdl/cpp/gguf-py/gguf/quants.py +81 -0
  12. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
  13. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  14. bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
  15. bigdl/cpp/libs/common.lib +0 -0
  16. bigdl/cpp/libs/ggml-base.dll +0 -0
  17. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  18. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  19. bigdl/cpp/libs/ggml.dll +0 -0
  20. bigdl/cpp/libs/libc++.dll +0 -0
  21. bigdl/cpp/libs/llama-batched.exe +0 -0
  22. bigdl/cpp/libs/llama-bench.exe +0 -0
  23. bigdl/cpp/libs/llama-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-embedding.exe +0 -0
  25. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  26. bigdl/cpp/libs/llama-gguf.exe +0 -0
  27. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  28. bigdl/cpp/libs/llama-lookup.exe +0 -0
  29. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  30. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  31. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  32. bigdl/cpp/libs/llama-quantize.exe +0 -0
  33. bigdl/cpp/libs/llama-server.exe +0 -0
  34. bigdl/cpp/libs/llama-simple.exe +0 -0
  35. bigdl/cpp/libs/llama-speculative.exe +0 -0
  36. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  37. bigdl/cpp/libs/llama.dll +0 -0
  38. bigdl/cpp/libs/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  42. bigdl/cpp/libs/ollama-lib.exe +0 -0
  43. bigdl/cpp/libs/ollama.exe +0 -0
  44. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  45. bigdl/cpp/libs/ollama_llama.dll +0 -0
  46. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  47. bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
  48. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
  49. bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
  50. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
  51. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
  52. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
  53. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
  54. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
  55. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
  56. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
  57. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
  58. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
  59. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
  60. bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
  61. bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
  62. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
  63. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
  64. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -27,6 +27,7 @@ class TensorNameMap:
27
27
  "embedding.word_embeddings", # chatglm
28
28
  "transformer.token_embeddings", # openelm
29
29
  "shared", # t5
30
+ "rwkv.embeddings", # rwkv
30
31
  ),
31
32
 
32
33
  # Token type embeddings
@@ -40,6 +41,8 @@ class TensorNameMap:
40
41
  "embeddings.LayerNorm", # bert
41
42
  "emb_ln", # nomic-bert
42
43
  "transformer.norm", # openelm
44
+ "rwkv.blocks.0.pre_ln", # rwkv
45
+ "backbone.norm", # wavtokenizer
43
46
  ),
44
47
 
45
48
  # Position embeddings
@@ -52,18 +55,20 @@ class TensorNameMap:
52
55
  # Output
53
56
  MODEL_TENSOR.OUTPUT: (
54
57
  "embed_out", # gptneox
55
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
58
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
56
59
  "output", # llama-pth bloom internlm2
57
60
  "word_embeddings_for_head", # persimmon
58
61
  "lm_head.linear", # phi2
59
62
  "output_layer", # chatglm
63
+ "head", # rwkv
64
+ "head.out", # wavtokenizer
60
65
  ),
61
66
 
62
67
  # Output norm
63
68
  MODEL_TENSOR.OUTPUT_NORM: (
64
69
  "gpt_neox.final_layer_norm", # gptneox
65
70
  "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
66
- "model.norm", # llama-hf baichuan internlm2
71
+ "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
67
72
  "norm", # llama-pth
68
73
  "transformer.norm_f", # mpt dbrx
69
74
  "ln_f", # refact bloom qwen gpt2
@@ -76,6 +81,8 @@ class TensorNameMap:
76
81
  "encoder.final_layernorm", # chatglm
77
82
  "transformer.norm", # openelm
78
83
  "model.norm", # nemotron
84
+ "rwkv.ln_out", # rwkv
85
+ "backbone.final_layer_norm", # wavtokenizer
79
86
  ),
80
87
 
81
88
  # Rope frequencies
@@ -83,6 +90,16 @@ class TensorNameMap:
83
90
  "rope.freqs", # llama-pth
84
91
  "rotary_pos_emb.inv_freq", # chatglm
85
92
  ),
93
+
94
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
95
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
96
+
97
+ MODEL_TENSOR.CONV1D: (
98
+ "backbone.embed", # roberta
99
+ ),
100
+
101
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
102
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
86
103
  }
87
104
 
88
105
  block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
@@ -94,7 +111,7 @@ class TensorNameMap:
94
111
  "transformer.h.{bid}.input_layernorm", # falcon7b
95
112
  "h.{bid}.input_layernorm", # bloom
96
113
  "transformer.h.{bid}.ln_mlp", # falcon40b
97
- "model.layers.{bid}.input_layernorm", # llama-hf nemotron
114
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
98
115
  "layers.{bid}.attention_norm", # llama-pth
99
116
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
100
117
  "model.layers.{bid}.ln1", # yi
@@ -108,12 +125,14 @@ class TensorNameMap:
108
125
  "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
109
126
  "encoder.layers.{bid}.input_layernorm", # chatglm
110
127
  "transformer.layers.{bid}.attn_norm", # openelm
128
+ "rwkv.blocks.{bid}.ln1", # rwkv
111
129
  ),
112
130
 
113
131
  # Attention norm 2
114
132
  MODEL_TENSOR.ATTN_NORM_2: (
115
- "transformer.h.{bid}.ln_attn", # falcon40b
133
+ "transformer.h.{bid}.ln_attn", # falcon40b
116
134
  "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
135
+ "rwkv.blocks.{bid}.ln2", # rwkv
117
136
  ),
118
137
 
119
138
  # Attention query-key-value
@@ -136,7 +155,8 @@ class TensorNameMap:
136
155
 
137
156
  # Attention query
138
157
  MODEL_TENSOR.ATTN_Q: (
139
- "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron
158
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
159
+ "model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
140
160
  "layers.{bid}.attention.wq", # llama-pth
141
161
  "encoder.layer.{bid}.attention.self.query", # bert
142
162
  "transformer.h.{bid}.attn.q_proj", # gpt-j
@@ -148,7 +168,8 @@ class TensorNameMap:
148
168
 
149
169
  # Attention key
150
170
  MODEL_TENSOR.ATTN_K: (
151
- "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron
171
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
172
+ "model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
152
173
  "layers.{bid}.attention.wk", # llama-pth
153
174
  "encoder.layer.{bid}.attention.self.key", # bert
154
175
  "transformer.h.{bid}.attn.k_proj", # gpt-j
@@ -161,7 +182,7 @@ class TensorNameMap:
161
182
 
162
183
  # Attention value
163
184
  MODEL_TENSOR.ATTN_V: (
164
- "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron
185
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
165
186
  "layers.{bid}.attention.wv", # llama-pth
166
187
  "encoder.layer.{bid}.attention.self.value", # bert
167
188
  "transformer.h.{bid}.attn.v_proj", # gpt-j
@@ -179,7 +200,8 @@ class TensorNameMap:
179
200
  "transformer.blocks.{bid}.attn.out_proj", # mpt
180
201
  "transformer.h.{bid}.self_attention.dense", # falcon
181
202
  "h.{bid}.self_attention.dense", # bloom
182
- "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron
203
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
204
+ "model.layers.{bid}.self_attn.linear_attn", # deci
183
205
  "layers.{bid}.attention.wo", # llama-pth
184
206
  "encoder.layer.{bid}.attention.output.dense", # bert
185
207
  "transformer.h.{bid}.attn.out_proj", # gpt-j
@@ -206,7 +228,7 @@ class TensorNameMap:
206
228
  ),
207
229
 
208
230
  MODEL_TENSOR.ATTN_POST_NORM: (
209
- "model.layers.{bid}.post_attention_layernorm", # gemma2
231
+ "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
210
232
  ),
211
233
 
212
234
  # Rotary embeddings
@@ -223,7 +245,7 @@ class TensorNameMap:
223
245
  "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
224
246
  "h.{bid}.post_attention_layernorm", # bloom
225
247
  "transformer.blocks.{bid}.norm_2", # mpt
226
- "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron
248
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
227
249
  "layers.{bid}.ffn_norm", # llama-pth
228
250
  "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
229
251
  "model.layers.{bid}.ln2", # yi
@@ -241,21 +263,26 @@ class TensorNameMap:
241
263
 
242
264
  # Post feed-forward norm
243
265
  MODEL_TENSOR.FFN_POST_NORM: (
244
- "model.layers.{bid}.post_feedforward_layernorm", # gemma2
266
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
245
267
  ),
246
268
 
247
269
  MODEL_TENSOR.FFN_GATE_INP: (
248
- "layers.{bid}.feed_forward.gate", # mixtral
249
- "model.layers.{bid}.block_sparse_moe.gate", # mixtral
250
- "model.layers.{bid}.mlp.gate", # qwen2moe
251
- "transformer.decoder_layer.{bid}.router", # Grok
252
- "transformer.blocks.{bid}.ffn.router.layer", # dbrx
270
+ "layers.{bid}.feed_forward.gate", # mixtral
271
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
272
+ "model.layers.{bid}.mlp.gate", # qwen2moe olmoe
273
+ "transformer.decoder_layer.{bid}.router", # Grok
274
+ "transformer.blocks.{bid}.ffn.router.layer", # dbrx
275
+ "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
253
276
  ),
254
277
 
255
278
  MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
256
279
  "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
257
280
  ),
258
281
 
282
+ MODEL_TENSOR.FFN_EXP_PROBS_B: (
283
+ "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
284
+ ),
285
+
259
286
  # Feed-forward up
260
287
  MODEL_TENSOR.FFN_UP: (
261
288
  "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
@@ -263,7 +290,7 @@ class TensorNameMap:
263
290
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
264
291
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
265
292
  "h.{bid}.mlp.dense_h_to_4h", # bloom
266
- "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
293
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
267
294
  "layers.{bid}.feed_forward.w3", # llama-pth
268
295
  "encoder.layer.{bid}.intermediate.dense", # bert
269
296
  "transformer.h.{bid}.mlp.fc_in", # gpt-j
@@ -286,15 +313,16 @@ class TensorNameMap:
286
313
  ),
287
314
 
288
315
  MODEL_TENSOR.FFN_UP_EXP: (
289
- "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
290
- "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
291
- "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
292
- "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
316
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
317
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
318
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
319
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
320
+ "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
293
321
  ),
294
322
 
295
323
  MODEL_TENSOR.FFN_UP_SHEXP: (
296
324
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
297
- "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
325
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
298
326
  ),
299
327
 
300
328
  # AWQ-activation gate
@@ -304,7 +332,7 @@ class TensorNameMap:
304
332
 
305
333
  # Feed-forward gate
306
334
  MODEL_TENSOR.FFN_GATE: (
307
- "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
335
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
308
336
  "layers.{bid}.feed_forward.w1", # llama-pth
309
337
  "transformer.h.{bid}.mlp.w2", # qwen
310
338
  "transformer.h.{bid}.mlp.c_fc2", # jais
@@ -318,15 +346,16 @@ class TensorNameMap:
318
346
  ),
319
347
 
320
348
  MODEL_TENSOR.FFN_GATE_EXP: (
321
- "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
322
- "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
323
- "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
324
- "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
349
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
350
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
351
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
352
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
353
+ "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
325
354
  ),
326
355
 
327
356
  MODEL_TENSOR.FFN_GATE_SHEXP: (
328
357
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
329
- "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
358
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
330
359
  ),
331
360
 
332
361
  # Feed-forward down
@@ -336,7 +365,7 @@ class TensorNameMap:
336
365
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
337
366
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
338
367
  "h.{bid}.mlp.dense_4h_to_h", # bloom
339
- "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
368
+ "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
340
369
  "layers.{bid}.feed_forward.w2", # llama-pth
341
370
  "encoder.layer.{bid}.output.dense", # bert
342
371
  "transformer.h.{bid}.mlp.fc_out", # gpt-j
@@ -358,21 +387,23 @@ class TensorNameMap:
358
387
  ),
359
388
 
360
389
  MODEL_TENSOR.FFN_DOWN_EXP: (
361
- "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
362
- "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
363
- "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
364
- "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
390
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
391
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
392
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
393
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
394
+ "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
395
+ "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
365
396
  ),
366
397
 
367
398
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
368
399
  "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
369
- "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
400
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
370
401
  ),
371
402
 
372
403
  MODEL_TENSOR.ATTN_Q_NORM: (
373
404
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
374
405
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
375
- "model.layers.{bid}.self_attn.q_norm", # cohere
406
+ "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
376
407
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
377
408
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
378
409
  "transformer.layers.{bid}.attn.q_norm", # openelm
@@ -381,7 +412,7 @@ class TensorNameMap:
381
412
  MODEL_TENSOR.ATTN_K_NORM: (
382
413
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
383
414
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
384
- "model.layers.{bid}.self_attn.k_norm", # cohere
415
+ "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
385
416
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
386
417
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
387
418
  "transformer.layers.{bid}.attn.k_norm", # openelm
@@ -434,6 +465,114 @@ class TensorNameMap:
434
465
  "backbone.layers.{bid}.mixer.out_proj",
435
466
  ),
436
467
 
468
+ MODEL_TENSOR.TIME_MIX_W1: (
469
+ "rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
470
+ "model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
471
+ ),
472
+
473
+ MODEL_TENSOR.TIME_MIX_W2: (
474
+ "rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
475
+ "model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
476
+ ),
477
+
478
+ MODEL_TENSOR.TIME_MIX_LERP_X: (
479
+ "rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
480
+ "model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
481
+ ),
482
+
483
+ MODEL_TENSOR.TIME_MIX_LERP_K: (
484
+ "rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
485
+ "model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
486
+ ),
487
+
488
+ MODEL_TENSOR.TIME_MIX_LERP_V: (
489
+ "rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
490
+ "model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
491
+ ),
492
+
493
+ MODEL_TENSOR.TIME_MIX_LERP_R: (
494
+ "rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
495
+ "model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
496
+ ),
497
+
498
+ MODEL_TENSOR.TIME_MIX_LERP_G: (
499
+ "rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
500
+ "model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
501
+ ),
502
+
503
+ MODEL_TENSOR.TIME_MIX_LERP_W: (
504
+ "rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
505
+ "model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
506
+ ),
507
+
508
+ MODEL_TENSOR.TIME_MIX_FIRST: (
509
+ "rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
510
+ ),
511
+
512
+ MODEL_TENSOR.TIME_MIX_DECAY: (
513
+ "rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
514
+ "model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
515
+ ),
516
+
517
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: (
518
+ "rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
519
+ "model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
520
+ ),
521
+
522
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: (
523
+ "rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
524
+ "model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
525
+ ),
526
+
527
+ MODEL_TENSOR.TIME_MIX_KEY: (
528
+ "rwkv.blocks.{bid}.attention.key", # rwkv
529
+ "model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
530
+ ),
531
+
532
+ MODEL_TENSOR.TIME_MIX_VALUE: (
533
+ "rwkv.blocks.{bid}.attention.value", # rwkv
534
+ "model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
535
+ ),
536
+
537
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
538
+ "rwkv.blocks.{bid}.attention.receptance", # rwkv
539
+ "model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
540
+ ),
541
+
542
+ MODEL_TENSOR.TIME_MIX_GATE: (
543
+ "rwkv.blocks.{bid}.attention.gate", # rwkv
544
+ "model.layers.{bid}.self_attn.gate", # rwkv6qwen2
545
+ ),
546
+
547
+ MODEL_TENSOR.TIME_MIX_LN: (
548
+ "rwkv.blocks.{bid}.attention.ln_x", # rwkv
549
+ ),
550
+
551
+ MODEL_TENSOR.TIME_MIX_OUTPUT: (
552
+ "rwkv.blocks.{bid}.attention.output", # rwkv
553
+ "model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
554
+ ),
555
+
556
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
557
+ "rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
558
+ ),
559
+
560
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
561
+ "rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
562
+ ),
563
+
564
+ MODEL_TENSOR.CHANNEL_MIX_KEY: (
565
+ "rwkv.blocks.{bid}.feed_forward.key", # rwkv
566
+ ),
567
+
568
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
569
+ "rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
570
+ ),
571
+
572
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: (
573
+ "rwkv.blocks.{bid}.feed_forward.value", # rwkv
574
+ ),
575
+
437
576
  MODEL_TENSOR.ATTN_Q_A: (
438
577
  "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
439
578
  ),
@@ -576,9 +715,81 @@ class TensorNameMap:
576
715
  "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
577
716
  ),
578
717
 
718
+ ############################################################################
719
+ # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
579
720
  MODEL_TENSOR.ENC_OUTPUT_NORM: (
580
721
  "encoder.final_layer_norm", # t5
581
722
  ),
723
+
724
+ MODEL_TENSOR.CLS: (
725
+ "classifier", # jina
726
+ "classifier.dense", # roberta
727
+ ),
728
+
729
+ MODEL_TENSOR.CLS_OUT: (
730
+ "classifier.out_proj", # roberta
731
+ ),
732
+ #############################################################################
733
+
734
+ MODEL_TENSOR.CONVNEXT_DW: (
735
+ "backbone.convnext.{bid}.dwconv", # wavtokenizer
736
+ ),
737
+
738
+ MODEL_TENSOR.CONVNEXT_NORM: (
739
+ "backbone.convnext.{bid}.norm", # wavtokenizer
740
+ ),
741
+
742
+ MODEL_TENSOR.CONVNEXT_PW1: (
743
+ "backbone.convnext.{bid}.pwconv1", # wavtokenizer
744
+ ),
745
+
746
+ MODEL_TENSOR.CONVNEXT_PW2: (
747
+ "backbone.convnext.{bid}.pwconv2", # wavtokenizer
748
+ ),
749
+
750
+ MODEL_TENSOR.CONVNEXT_GAMMA: (
751
+ "backbone.convnext.{bid}.gamma", # wavtokenizer
752
+ ),
753
+
754
+ MODEL_TENSOR.POSNET_CONV1: (
755
+ "backbone.posnet.{bid}.conv1", # wavtokenizer
756
+ ),
757
+
758
+ MODEL_TENSOR.POSNET_CONV2: (
759
+ "backbone.posnet.{bid}.conv2", # wavtokenizer
760
+ ),
761
+
762
+ MODEL_TENSOR.POSNET_NORM: (
763
+ "backbone.posnet.{bid}.norm", # wavtokenizer
764
+ ),
765
+
766
+ MODEL_TENSOR.POSNET_NORM1: (
767
+ "backbone.posnet.{bid}.norm1", # wavtokenizer
768
+ ),
769
+
770
+ MODEL_TENSOR.POSNET_NORM2: (
771
+ "backbone.posnet.{bid}.norm2", # wavtokenizer
772
+ ),
773
+
774
+ MODEL_TENSOR.POSNET_ATTN_NORM: (
775
+ "backbone.posnet.{bid}.norm", # wavtokenizer
776
+ ),
777
+
778
+ MODEL_TENSOR.POSNET_ATTN_Q: (
779
+ "backbone.posnet.{bid}.q", # wavtokenizer
780
+ ),
781
+
782
+ MODEL_TENSOR.POSNET_ATTN_K: (
783
+ "backbone.posnet.{bid}.k", # wavtokenizer
784
+ ),
785
+
786
+ MODEL_TENSOR.POSNET_ATTN_V: (
787
+ "backbone.posnet.{bid}.v", # wavtokenizer
788
+ ),
789
+
790
+ MODEL_TENSOR.POSNET_ATTN_OUT: (
791
+ "backbone.posnet.{bid}.proj_out", # wavtokenizer
792
+ ),
582
793
  }
583
794
 
584
795
  # architecture-specific block mappings
@@ -654,4 +865,4 @@ class TensorNameMap:
654
865
 
655
866
 
656
867
  def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
657
- return TensorNameMap(arch, n_blocks)
868
+ return TensorNameMap(arch, n_blocks)
@@ -47,7 +47,7 @@ def size_label(total_params: int, shared_params: int, expert_params: int, expert
47
47
 
48
48
 
49
49
  def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
50
- # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
50
+ # Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
51
51
 
52
52
  if base_name is not None:
53
53
  name = base_name.strip().replace(' ', '-').replace('/', '-')
@@ -122,8 +122,30 @@ class SpecialVocab:
122
122
  tokenizer = json.load(f)
123
123
  if self.load_merges:
124
124
  merges = tokenizer.get('model', {}).get('merges')
125
- if isinstance(merges, list) and merges and isinstance(merges[0], str):
126
- self.merges = merges
125
+ if isinstance(merges, list) and merges:
126
+ if isinstance(merges[0], str):
127
+ self.merges = merges
128
+ elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
129
+ # New format since transformers 4.45 to support spaces in merges
130
+ # ref: https://github.com/ggml-org/llama.cpp/issues/9692
131
+ # TODO: internally store as the new format instead of converting to old
132
+ if any(' ' in s for pair in merges for s in pair):
133
+ logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
134
+ self.merges = [
135
+ ' '.join(
136
+ [
137
+ # ensure the spaces are properly encoded
138
+ ''.join(
139
+ chr(ord(c) + 256) if c == ' ' else c
140
+ for c in part
141
+ )
142
+ for part in pair
143
+ ]
144
+ )
145
+ for pair in merges
146
+ ]
147
+ else:
148
+ raise ValueError("Unknown tokenizer merges format")
127
149
  added_tokens = tokenizer.get('added_tokens', {})
128
150
  else:
129
151
  added_tokens = {}
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/ggml.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,16 @@
1
+ @echo off
2
+ for /f "delims=" %%i in ('python -c "import bigdl.cpp; print(bigdl.cpp.__file__)"') do set "cpp_file=%%i"
3
+ for %%a in ("%cpp_file%") do set "cpp_dir=%%~dpa"
4
+
5
+ set "cpp_dir=%cpp_dir:~0,-1%"
6
+ set "lib_dir=%cpp_dir%\libs"
7
+
8
+ :: Create symlinks for DLLs and EXE
9
+ for %%f in (ollama.exe ollama-lib.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll ollama-ggml-base.dll ollama-ggml-cpu.dll ollama-ggml-sycl.dll libc++.dll) do (
10
+ if exist "%cd%\%%f" del /f "%cd%\%%f"
11
+ mklink "%cd%\%%f" "%lib_dir%\%%f"
12
+ )
13
+
14
+ :: Create symlink for dist directory
15
+ if exist "%cd%\dist" rmdir /s /q "%cd%\dist"
16
+ mklink /D "%cd%\dist" "%lib_dir%\dist"