bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +1196 -147
- bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
- bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
- bigdl/cpp/convert_lora_to_gguf.py +82 -14
- bigdl/cpp/gguf-py/gguf/constants.py +645 -187
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
- bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/quants.py +81 -0
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/libc++.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
- bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
- bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
- {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
|
-
"model.embed_tokens", # llama-hf nemotron
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
|
17
17
|
"tok_embeddings", # llama-pth
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
@@ -27,6 +27,7 @@ class TensorNameMap:
|
|
27
27
|
"embedding.word_embeddings", # chatglm
|
28
28
|
"transformer.token_embeddings", # openelm
|
29
29
|
"shared", # t5
|
30
|
+
"rwkv.embeddings", # rwkv
|
30
31
|
),
|
31
32
|
|
32
33
|
# Token type embeddings
|
@@ -40,6 +41,8 @@ class TensorNameMap:
|
|
40
41
|
"embeddings.LayerNorm", # bert
|
41
42
|
"emb_ln", # nomic-bert
|
42
43
|
"transformer.norm", # openelm
|
44
|
+
"rwkv.blocks.0.pre_ln", # rwkv
|
45
|
+
"backbone.norm", # wavtokenizer
|
43
46
|
),
|
44
47
|
|
45
48
|
# Position embeddings
|
@@ -52,18 +55,20 @@ class TensorNameMap:
|
|
52
55
|
# Output
|
53
56
|
MODEL_TENSOR.OUTPUT: (
|
54
57
|
"embed_out", # gptneox
|
55
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
|
58
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
|
56
59
|
"output", # llama-pth bloom internlm2
|
57
60
|
"word_embeddings_for_head", # persimmon
|
58
61
|
"lm_head.linear", # phi2
|
59
62
|
"output_layer", # chatglm
|
63
|
+
"head", # rwkv
|
64
|
+
"head.out", # wavtokenizer
|
60
65
|
),
|
61
66
|
|
62
67
|
# Output norm
|
63
68
|
MODEL_TENSOR.OUTPUT_NORM: (
|
64
69
|
"gpt_neox.final_layer_norm", # gptneox
|
65
70
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
66
|
-
"model.norm", # llama-hf baichuan internlm2
|
71
|
+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
|
67
72
|
"norm", # llama-pth
|
68
73
|
"transformer.norm_f", # mpt dbrx
|
69
74
|
"ln_f", # refact bloom qwen gpt2
|
@@ -76,6 +81,8 @@ class TensorNameMap:
|
|
76
81
|
"encoder.final_layernorm", # chatglm
|
77
82
|
"transformer.norm", # openelm
|
78
83
|
"model.norm", # nemotron
|
84
|
+
"rwkv.ln_out", # rwkv
|
85
|
+
"backbone.final_layer_norm", # wavtokenizer
|
79
86
|
),
|
80
87
|
|
81
88
|
# Rope frequencies
|
@@ -83,6 +90,16 @@ class TensorNameMap:
|
|
83
90
|
"rope.freqs", # llama-pth
|
84
91
|
"rotary_pos_emb.inv_freq", # chatglm
|
85
92
|
),
|
93
|
+
|
94
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
95
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
96
|
+
|
97
|
+
MODEL_TENSOR.CONV1D: (
|
98
|
+
"backbone.embed", # roberta
|
99
|
+
),
|
100
|
+
|
101
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
102
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
86
103
|
}
|
87
104
|
|
88
105
|
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
@@ -94,7 +111,7 @@ class TensorNameMap:
|
|
94
111
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
95
112
|
"h.{bid}.input_layernorm", # bloom
|
96
113
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
97
|
-
"model.layers.{bid}.input_layernorm", # llama-hf nemotron
|
114
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
98
115
|
"layers.{bid}.attention_norm", # llama-pth
|
99
116
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
100
117
|
"model.layers.{bid}.ln1", # yi
|
@@ -108,12 +125,14 @@ class TensorNameMap:
|
|
108
125
|
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
109
126
|
"encoder.layers.{bid}.input_layernorm", # chatglm
|
110
127
|
"transformer.layers.{bid}.attn_norm", # openelm
|
128
|
+
"rwkv.blocks.{bid}.ln1", # rwkv
|
111
129
|
),
|
112
130
|
|
113
131
|
# Attention norm 2
|
114
132
|
MODEL_TENSOR.ATTN_NORM_2: (
|
115
|
-
"transformer.h.{bid}.ln_attn",
|
133
|
+
"transformer.h.{bid}.ln_attn", # falcon40b
|
116
134
|
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
135
|
+
"rwkv.blocks.{bid}.ln2", # rwkv
|
117
136
|
),
|
118
137
|
|
119
138
|
# Attention query-key-value
|
@@ -136,7 +155,8 @@ class TensorNameMap:
|
|
136
155
|
|
137
156
|
# Attention query
|
138
157
|
MODEL_TENSOR.ATTN_Q: (
|
139
|
-
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron
|
158
|
+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
159
|
+
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
140
160
|
"layers.{bid}.attention.wq", # llama-pth
|
141
161
|
"encoder.layer.{bid}.attention.self.query", # bert
|
142
162
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
@@ -148,7 +168,8 @@ class TensorNameMap:
|
|
148
168
|
|
149
169
|
# Attention key
|
150
170
|
MODEL_TENSOR.ATTN_K: (
|
151
|
-
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron
|
171
|
+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
172
|
+
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
152
173
|
"layers.{bid}.attention.wk", # llama-pth
|
153
174
|
"encoder.layer.{bid}.attention.self.key", # bert
|
154
175
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
@@ -161,7 +182,7 @@ class TensorNameMap:
|
|
161
182
|
|
162
183
|
# Attention value
|
163
184
|
MODEL_TENSOR.ATTN_V: (
|
164
|
-
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron
|
185
|
+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
165
186
|
"layers.{bid}.attention.wv", # llama-pth
|
166
187
|
"encoder.layer.{bid}.attention.self.value", # bert
|
167
188
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
@@ -179,7 +200,8 @@ class TensorNameMap:
|
|
179
200
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
180
201
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
181
202
|
"h.{bid}.self_attention.dense", # bloom
|
182
|
-
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron
|
203
|
+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
204
|
+
"model.layers.{bid}.self_attn.linear_attn", # deci
|
183
205
|
"layers.{bid}.attention.wo", # llama-pth
|
184
206
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
185
207
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
@@ -206,7 +228,7 @@ class TensorNameMap:
|
|
206
228
|
),
|
207
229
|
|
208
230
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
209
|
-
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
231
|
+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
210
232
|
),
|
211
233
|
|
212
234
|
# Rotary embeddings
|
@@ -223,7 +245,7 @@ class TensorNameMap:
|
|
223
245
|
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
224
246
|
"h.{bid}.post_attention_layernorm", # bloom
|
225
247
|
"transformer.blocks.{bid}.norm_2", # mpt
|
226
|
-
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron
|
248
|
+
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
|
227
249
|
"layers.{bid}.ffn_norm", # llama-pth
|
228
250
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
229
251
|
"model.layers.{bid}.ln2", # yi
|
@@ -241,21 +263,26 @@ class TensorNameMap:
|
|
241
263
|
|
242
264
|
# Post feed-forward norm
|
243
265
|
MODEL_TENSOR.FFN_POST_NORM: (
|
244
|
-
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
266
|
+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
245
267
|
),
|
246
268
|
|
247
269
|
MODEL_TENSOR.FFN_GATE_INP: (
|
248
|
-
"layers.{bid}.feed_forward.gate",
|
249
|
-
"model.layers.{bid}.block_sparse_moe.gate",
|
250
|
-
"model.layers.{bid}.mlp.gate",
|
251
|
-
"transformer.decoder_layer.{bid}.router",
|
252
|
-
"transformer.blocks.{bid}.ffn.router.layer",
|
270
|
+
"layers.{bid}.feed_forward.gate", # mixtral
|
271
|
+
"model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
|
272
|
+
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
273
|
+
"transformer.decoder_layer.{bid}.router", # Grok
|
274
|
+
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
275
|
+
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
253
276
|
),
|
254
277
|
|
255
278
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
256
279
|
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
257
280
|
),
|
258
281
|
|
282
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
283
|
+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
|
284
|
+
),
|
285
|
+
|
259
286
|
# Feed-forward up
|
260
287
|
MODEL_TENSOR.FFN_UP: (
|
261
288
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
@@ -263,7 +290,7 @@ class TensorNameMap:
|
|
263
290
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
264
291
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
265
292
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
266
|
-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
|
293
|
+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
267
294
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
268
295
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
269
296
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
@@ -286,15 +313,16 @@ class TensorNameMap:
|
|
286
313
|
),
|
287
314
|
|
288
315
|
MODEL_TENSOR.FFN_UP_EXP: (
|
289
|
-
"layers.{bid}.feed_forward.experts.w3",
|
290
|
-
"transformer.decoder_layer.{bid}.moe.linear_v",
|
291
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.v1",
|
292
|
-
"model.layers.{bid}.mlp.experts.up_proj",
|
316
|
+
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
317
|
+
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
318
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
319
|
+
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
320
|
+
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
293
321
|
),
|
294
322
|
|
295
323
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
296
324
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
297
|
-
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
325
|
+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
298
326
|
),
|
299
327
|
|
300
328
|
# AWQ-activation gate
|
@@ -304,7 +332,7 @@ class TensorNameMap:
|
|
304
332
|
|
305
333
|
# Feed-forward gate
|
306
334
|
MODEL_TENSOR.FFN_GATE: (
|
307
|
-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
335
|
+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
|
308
336
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
309
337
|
"transformer.h.{bid}.mlp.w2", # qwen
|
310
338
|
"transformer.h.{bid}.mlp.c_fc2", # jais
|
@@ -318,15 +346,16 @@ class TensorNameMap:
|
|
318
346
|
),
|
319
347
|
|
320
348
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
321
|
-
"layers.{bid}.feed_forward.experts.w1",
|
322
|
-
"transformer.decoder_layer.{bid}.moe.linear",
|
323
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w1",
|
324
|
-
"model.layers.{bid}.mlp.experts.gate_proj",
|
349
|
+
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
350
|
+
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
351
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
352
|
+
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
353
|
+
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
325
354
|
),
|
326
355
|
|
327
356
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
328
357
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
329
|
-
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
358
|
+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
330
359
|
),
|
331
360
|
|
332
361
|
# Feed-forward down
|
@@ -336,7 +365,7 @@ class TensorNameMap:
|
|
336
365
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
337
366
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
338
367
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
339
|
-
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
|
368
|
+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
340
369
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
341
370
|
"encoder.layer.{bid}.output.dense", # bert
|
342
371
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
@@ -358,21 +387,23 @@ class TensorNameMap:
|
|
358
387
|
),
|
359
388
|
|
360
389
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
361
|
-
"layers.{bid}.feed_forward.experts.w2",
|
362
|
-
"transformer.decoder_layer.{bid}.moe.linear_1",
|
363
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w2",
|
364
|
-
"model.layers.{bid}.mlp.experts.down_proj",
|
390
|
+
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
391
|
+
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
392
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
393
|
+
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
394
|
+
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
395
|
+
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
365
396
|
),
|
366
397
|
|
367
398
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
368
399
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
369
|
-
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
400
|
+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
370
401
|
),
|
371
402
|
|
372
403
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
373
404
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
374
405
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
375
|
-
"model.layers.{bid}.self_attn.q_norm", # cohere
|
406
|
+
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
376
407
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
377
408
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
378
409
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
@@ -381,7 +412,7 @@ class TensorNameMap:
|
|
381
412
|
MODEL_TENSOR.ATTN_K_NORM: (
|
382
413
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
383
414
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
384
|
-
"model.layers.{bid}.self_attn.k_norm", # cohere
|
415
|
+
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
385
416
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
386
417
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
387
418
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
@@ -434,6 +465,114 @@ class TensorNameMap:
|
|
434
465
|
"backbone.layers.{bid}.mixer.out_proj",
|
435
466
|
),
|
436
467
|
|
468
|
+
MODEL_TENSOR.TIME_MIX_W1: (
|
469
|
+
"rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
|
470
|
+
"model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
|
471
|
+
),
|
472
|
+
|
473
|
+
MODEL_TENSOR.TIME_MIX_W2: (
|
474
|
+
"rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
|
475
|
+
"model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
|
476
|
+
),
|
477
|
+
|
478
|
+
MODEL_TENSOR.TIME_MIX_LERP_X: (
|
479
|
+
"rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
|
480
|
+
"model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
|
481
|
+
),
|
482
|
+
|
483
|
+
MODEL_TENSOR.TIME_MIX_LERP_K: (
|
484
|
+
"rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
|
485
|
+
"model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
|
486
|
+
),
|
487
|
+
|
488
|
+
MODEL_TENSOR.TIME_MIX_LERP_V: (
|
489
|
+
"rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
|
490
|
+
"model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
|
491
|
+
),
|
492
|
+
|
493
|
+
MODEL_TENSOR.TIME_MIX_LERP_R: (
|
494
|
+
"rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
|
495
|
+
"model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
|
496
|
+
),
|
497
|
+
|
498
|
+
MODEL_TENSOR.TIME_MIX_LERP_G: (
|
499
|
+
"rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
|
500
|
+
"model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
|
501
|
+
),
|
502
|
+
|
503
|
+
MODEL_TENSOR.TIME_MIX_LERP_W: (
|
504
|
+
"rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
|
505
|
+
"model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
|
506
|
+
),
|
507
|
+
|
508
|
+
MODEL_TENSOR.TIME_MIX_FIRST: (
|
509
|
+
"rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
|
510
|
+
),
|
511
|
+
|
512
|
+
MODEL_TENSOR.TIME_MIX_DECAY: (
|
513
|
+
"rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
|
514
|
+
"model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
|
515
|
+
),
|
516
|
+
|
517
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1: (
|
518
|
+
"rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
|
519
|
+
"model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
|
520
|
+
),
|
521
|
+
|
522
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2: (
|
523
|
+
"rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
|
524
|
+
"model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
|
525
|
+
),
|
526
|
+
|
527
|
+
MODEL_TENSOR.TIME_MIX_KEY: (
|
528
|
+
"rwkv.blocks.{bid}.attention.key", # rwkv
|
529
|
+
"model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
|
530
|
+
),
|
531
|
+
|
532
|
+
MODEL_TENSOR.TIME_MIX_VALUE: (
|
533
|
+
"rwkv.blocks.{bid}.attention.value", # rwkv
|
534
|
+
"model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
|
535
|
+
),
|
536
|
+
|
537
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
|
538
|
+
"rwkv.blocks.{bid}.attention.receptance", # rwkv
|
539
|
+
"model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
|
540
|
+
),
|
541
|
+
|
542
|
+
MODEL_TENSOR.TIME_MIX_GATE: (
|
543
|
+
"rwkv.blocks.{bid}.attention.gate", # rwkv
|
544
|
+
"model.layers.{bid}.self_attn.gate", # rwkv6qwen2
|
545
|
+
),
|
546
|
+
|
547
|
+
MODEL_TENSOR.TIME_MIX_LN: (
|
548
|
+
"rwkv.blocks.{bid}.attention.ln_x", # rwkv
|
549
|
+
),
|
550
|
+
|
551
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT: (
|
552
|
+
"rwkv.blocks.{bid}.attention.output", # rwkv
|
553
|
+
"model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
|
554
|
+
),
|
555
|
+
|
556
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
|
557
|
+
"rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
|
558
|
+
),
|
559
|
+
|
560
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
|
561
|
+
"rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
|
562
|
+
),
|
563
|
+
|
564
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY: (
|
565
|
+
"rwkv.blocks.{bid}.feed_forward.key", # rwkv
|
566
|
+
),
|
567
|
+
|
568
|
+
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
|
569
|
+
"rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
|
570
|
+
),
|
571
|
+
|
572
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE: (
|
573
|
+
"rwkv.blocks.{bid}.feed_forward.value", # rwkv
|
574
|
+
),
|
575
|
+
|
437
576
|
MODEL_TENSOR.ATTN_Q_A: (
|
438
577
|
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
|
439
578
|
),
|
@@ -576,9 +715,81 @@ class TensorNameMap:
|
|
576
715
|
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
577
716
|
),
|
578
717
|
|
718
|
+
############################################################################
|
719
|
+
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
579
720
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
580
721
|
"encoder.final_layer_norm", # t5
|
581
722
|
),
|
723
|
+
|
724
|
+
MODEL_TENSOR.CLS: (
|
725
|
+
"classifier", # jina
|
726
|
+
"classifier.dense", # roberta
|
727
|
+
),
|
728
|
+
|
729
|
+
MODEL_TENSOR.CLS_OUT: (
|
730
|
+
"classifier.out_proj", # roberta
|
731
|
+
),
|
732
|
+
#############################################################################
|
733
|
+
|
734
|
+
MODEL_TENSOR.CONVNEXT_DW: (
|
735
|
+
"backbone.convnext.{bid}.dwconv", # wavtokenizer
|
736
|
+
),
|
737
|
+
|
738
|
+
MODEL_TENSOR.CONVNEXT_NORM: (
|
739
|
+
"backbone.convnext.{bid}.norm", # wavtokenizer
|
740
|
+
),
|
741
|
+
|
742
|
+
MODEL_TENSOR.CONVNEXT_PW1: (
|
743
|
+
"backbone.convnext.{bid}.pwconv1", # wavtokenizer
|
744
|
+
),
|
745
|
+
|
746
|
+
MODEL_TENSOR.CONVNEXT_PW2: (
|
747
|
+
"backbone.convnext.{bid}.pwconv2", # wavtokenizer
|
748
|
+
),
|
749
|
+
|
750
|
+
MODEL_TENSOR.CONVNEXT_GAMMA: (
|
751
|
+
"backbone.convnext.{bid}.gamma", # wavtokenizer
|
752
|
+
),
|
753
|
+
|
754
|
+
MODEL_TENSOR.POSNET_CONV1: (
|
755
|
+
"backbone.posnet.{bid}.conv1", # wavtokenizer
|
756
|
+
),
|
757
|
+
|
758
|
+
MODEL_TENSOR.POSNET_CONV2: (
|
759
|
+
"backbone.posnet.{bid}.conv2", # wavtokenizer
|
760
|
+
),
|
761
|
+
|
762
|
+
MODEL_TENSOR.POSNET_NORM: (
|
763
|
+
"backbone.posnet.{bid}.norm", # wavtokenizer
|
764
|
+
),
|
765
|
+
|
766
|
+
MODEL_TENSOR.POSNET_NORM1: (
|
767
|
+
"backbone.posnet.{bid}.norm1", # wavtokenizer
|
768
|
+
),
|
769
|
+
|
770
|
+
MODEL_TENSOR.POSNET_NORM2: (
|
771
|
+
"backbone.posnet.{bid}.norm2", # wavtokenizer
|
772
|
+
),
|
773
|
+
|
774
|
+
MODEL_TENSOR.POSNET_ATTN_NORM: (
|
775
|
+
"backbone.posnet.{bid}.norm", # wavtokenizer
|
776
|
+
),
|
777
|
+
|
778
|
+
MODEL_TENSOR.POSNET_ATTN_Q: (
|
779
|
+
"backbone.posnet.{bid}.q", # wavtokenizer
|
780
|
+
),
|
781
|
+
|
782
|
+
MODEL_TENSOR.POSNET_ATTN_K: (
|
783
|
+
"backbone.posnet.{bid}.k", # wavtokenizer
|
784
|
+
),
|
785
|
+
|
786
|
+
MODEL_TENSOR.POSNET_ATTN_V: (
|
787
|
+
"backbone.posnet.{bid}.v", # wavtokenizer
|
788
|
+
),
|
789
|
+
|
790
|
+
MODEL_TENSOR.POSNET_ATTN_OUT: (
|
791
|
+
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
792
|
+
),
|
582
793
|
}
|
583
794
|
|
584
795
|
# architecture-specific block mappings
|
@@ -654,4 +865,4 @@ class TensorNameMap:
|
|
654
865
|
|
655
866
|
|
656
867
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
657
|
-
return TensorNameMap(arch, n_blocks)
|
868
|
+
return TensorNameMap(arch, n_blocks)
|
@@ -47,7 +47,7 @@ def size_label(total_params: int, shared_params: int, expert_params: int, expert
|
|
47
47
|
|
48
48
|
|
49
49
|
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
50
|
-
# Reference: https://github.com/
|
50
|
+
# Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
51
51
|
|
52
52
|
if base_name is not None:
|
53
53
|
name = base_name.strip().replace(' ', '-').replace('/', '-')
|
bigdl/cpp/gguf-py/gguf/vocab.py
CHANGED
@@ -122,8 +122,30 @@ class SpecialVocab:
|
|
122
122
|
tokenizer = json.load(f)
|
123
123
|
if self.load_merges:
|
124
124
|
merges = tokenizer.get('model', {}).get('merges')
|
125
|
-
if isinstance(merges, list) and merges
|
126
|
-
|
125
|
+
if isinstance(merges, list) and merges:
|
126
|
+
if isinstance(merges[0], str):
|
127
|
+
self.merges = merges
|
128
|
+
elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
|
129
|
+
# New format since transformers 4.45 to support spaces in merges
|
130
|
+
# ref: https://github.com/ggml-org/llama.cpp/issues/9692
|
131
|
+
# TODO: internally store as the new format instead of converting to old
|
132
|
+
if any(' ' in s for pair in merges for s in pair):
|
133
|
+
logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
|
134
|
+
self.merges = [
|
135
|
+
' '.join(
|
136
|
+
[
|
137
|
+
# ensure the spaces are properly encoded
|
138
|
+
''.join(
|
139
|
+
chr(ord(c) + 256) if c == ' ' else c
|
140
|
+
for c in part
|
141
|
+
)
|
142
|
+
for part in pair
|
143
|
+
]
|
144
|
+
)
|
145
|
+
for pair in merges
|
146
|
+
]
|
147
|
+
else:
|
148
|
+
raise ValueError("Unknown tokenizer merges format")
|
127
149
|
added_tokens = tokenizer.get('added_tokens', {})
|
128
150
|
else:
|
129
151
|
added_tokens = {}
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ggml.dll
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-cli.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-gguf.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-lookup.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-server.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-simple.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,16 @@
|
|
1
|
+
@echo off
|
2
|
+
for /f "delims=" %%i in ('python -c "import bigdl.cpp; print(bigdl.cpp.__file__)"') do set "cpp_file=%%i"
|
3
|
+
for %%a in ("%cpp_file%") do set "cpp_dir=%%~dpa"
|
4
|
+
|
5
|
+
set "cpp_dir=%cpp_dir:~0,-1%"
|
6
|
+
set "lib_dir=%cpp_dir%\libs"
|
7
|
+
|
8
|
+
:: Create symlinks for DLLs and EXE
|
9
|
+
for %%f in (ollama.exe ollama-lib.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll ollama-ggml-base.dll ollama-ggml-cpu.dll ollama-ggml-sycl.dll libc++.dll) do (
|
10
|
+
if exist "%cd%\%%f" del /f "%cd%\%%f"
|
11
|
+
mklink "%cd%\%%f" "%lib_dir%\%%f"
|
12
|
+
)
|
13
|
+
|
14
|
+
:: Create symlink for dist directory
|
15
|
+
if exist "%cd%\dist" rmdir /s /q "%cd%\dist"
|
16
|
+
mklink /D "%cd%\dist" "%lib_dir%\dist"
|