bigdl-core-cpp 2.5.0rc1__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +1673 -278
- bigdl/cpp/convert_hf_to_gguf_update.py +381 -0
- bigdl/cpp/convert_llama_ggml_to_gguf.py +450 -0
- bigdl/cpp/convert_lora_to_gguf.py +461 -0
- bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
- bigdl/cpp/gguf-py/gguf/constants.py +698 -171
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +108 -17
- bigdl/cpp/gguf-py/gguf/lazy.py +3 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +195 -76
- bigdl/cpp/gguf-py/gguf/quants.py +1210 -64
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +262 -43
- bigdl/cpp/gguf-py/gguf/utility.py +2 -2
- bigdl/cpp/gguf-py/gguf/vocab.py +325 -3
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/libc++.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +7 -2
- bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
- bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
- bigdl/cpp/convert.py +0 -1714
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- bigdl_core_cpp-2.5.0rc1.data/scripts/init-ollama.bat +0 -13
- bigdl_core_cpp-2.5.0rc1.dist-info/RECORD +0 -63
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,10 @@ class TensorNameMap:
|
|
10
10
|
# Token embeddings
|
11
11
|
MODEL_TENSOR.TOKEN_EMBD: (
|
12
12
|
"gpt_neox.embed_in", # gptneox
|
13
|
-
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
13
|
+
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
|
-
"model.embed_tokens", # llama-hf
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
|
17
17
|
"tok_embeddings", # llama-pth
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
@@ -27,6 +27,7 @@ class TensorNameMap:
|
|
27
27
|
"embedding.word_embeddings", # chatglm
|
28
28
|
"transformer.token_embeddings", # openelm
|
29
29
|
"shared", # t5
|
30
|
+
"rwkv.embeddings", # rwkv
|
30
31
|
),
|
31
32
|
|
32
33
|
# Token type embeddings
|
@@ -40,6 +41,8 @@ class TensorNameMap:
|
|
40
41
|
"embeddings.LayerNorm", # bert
|
41
42
|
"emb_ln", # nomic-bert
|
42
43
|
"transformer.norm", # openelm
|
44
|
+
"rwkv.blocks.0.pre_ln", # rwkv
|
45
|
+
"backbone.norm", # wavtokenizer
|
43
46
|
),
|
44
47
|
|
45
48
|
# Position embeddings
|
@@ -52,18 +55,20 @@ class TensorNameMap:
|
|
52
55
|
# Output
|
53
56
|
MODEL_TENSOR.OUTPUT: (
|
54
57
|
"embed_out", # gptneox
|
55
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
58
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
|
56
59
|
"output", # llama-pth bloom internlm2
|
57
60
|
"word_embeddings_for_head", # persimmon
|
58
61
|
"lm_head.linear", # phi2
|
59
62
|
"output_layer", # chatglm
|
63
|
+
"head", # rwkv
|
64
|
+
"head.out", # wavtokenizer
|
60
65
|
),
|
61
66
|
|
62
67
|
# Output norm
|
63
68
|
MODEL_TENSOR.OUTPUT_NORM: (
|
64
69
|
"gpt_neox.final_layer_norm", # gptneox
|
65
|
-
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
66
|
-
"model.norm", # llama-hf baichuan internlm2
|
70
|
+
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
71
|
+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
|
67
72
|
"norm", # llama-pth
|
68
73
|
"transformer.norm_f", # mpt dbrx
|
69
74
|
"ln_f", # refact bloom qwen gpt2
|
@@ -75,6 +80,9 @@ class TensorNameMap:
|
|
75
80
|
"transformer.rms_norm", # Grok
|
76
81
|
"encoder.final_layernorm", # chatglm
|
77
82
|
"transformer.norm", # openelm
|
83
|
+
"model.norm", # nemotron
|
84
|
+
"rwkv.ln_out", # rwkv
|
85
|
+
"backbone.final_layer_norm", # wavtokenizer
|
78
86
|
),
|
79
87
|
|
80
88
|
# Rope frequencies
|
@@ -82,18 +90,28 @@ class TensorNameMap:
|
|
82
90
|
"rope.freqs", # llama-pth
|
83
91
|
"rotary_pos_emb.inv_freq", # chatglm
|
84
92
|
),
|
93
|
+
|
94
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
95
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
96
|
+
|
97
|
+
MODEL_TENSOR.CONV1D: (
|
98
|
+
"backbone.embed", # roberta
|
99
|
+
),
|
100
|
+
|
101
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
|
102
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
|
85
103
|
}
|
86
104
|
|
87
105
|
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
88
106
|
# Attention norm
|
89
107
|
MODEL_TENSOR.ATTN_NORM: (
|
90
108
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
91
|
-
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
109
|
+
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais exaone
|
92
110
|
"transformer.blocks.{bid}.norm_1", # mpt
|
93
111
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
94
112
|
"h.{bid}.input_layernorm", # bloom
|
95
113
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
96
|
-
"model.layers.{bid}.input_layernorm", # llama-hf
|
114
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
|
97
115
|
"layers.{bid}.attention_norm", # llama-pth
|
98
116
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
99
117
|
"model.layers.{bid}.ln1", # yi
|
@@ -107,12 +125,14 @@ class TensorNameMap:
|
|
107
125
|
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
108
126
|
"encoder.layers.{bid}.input_layernorm", # chatglm
|
109
127
|
"transformer.layers.{bid}.attn_norm", # openelm
|
128
|
+
"rwkv.blocks.{bid}.ln1", # rwkv
|
110
129
|
),
|
111
130
|
|
112
131
|
# Attention norm 2
|
113
132
|
MODEL_TENSOR.ATTN_NORM_2: (
|
114
|
-
"transformer.h.{bid}.ln_attn",
|
133
|
+
"transformer.h.{bid}.ln_attn", # falcon40b
|
115
134
|
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
135
|
+
"rwkv.blocks.{bid}.ln2", # rwkv
|
116
136
|
),
|
117
137
|
|
118
138
|
# Attention query-key-value
|
@@ -135,18 +155,21 @@ class TensorNameMap:
|
|
135
155
|
|
136
156
|
# Attention query
|
137
157
|
MODEL_TENSOR.ATTN_Q: (
|
138
|
-
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
158
|
+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
159
|
+
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
139
160
|
"layers.{bid}.attention.wq", # llama-pth
|
140
161
|
"encoder.layer.{bid}.attention.self.query", # bert
|
141
162
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
142
163
|
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
143
164
|
"model.layers.{bid}.attention.wq", # internlm2
|
144
165
|
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
166
|
+
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
145
167
|
),
|
146
168
|
|
147
169
|
# Attention key
|
148
170
|
MODEL_TENSOR.ATTN_K: (
|
149
|
-
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
171
|
+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
172
|
+
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
150
173
|
"layers.{bid}.attention.wk", # llama-pth
|
151
174
|
"encoder.layer.{bid}.attention.self.key", # bert
|
152
175
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
@@ -154,18 +177,20 @@ class TensorNameMap:
|
|
154
177
|
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
155
178
|
"model.layers.{bid}.attention.wk", # internlm2
|
156
179
|
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
180
|
+
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
157
181
|
),
|
158
182
|
|
159
183
|
# Attention value
|
160
184
|
MODEL_TENSOR.ATTN_V: (
|
161
|
-
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
185
|
+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
162
186
|
"layers.{bid}.attention.wv", # llama-pth
|
163
187
|
"encoder.layer.{bid}.attention.self.value", # bert
|
164
188
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
165
189
|
"transformer.h.{bid}.attn.v", # refact
|
166
190
|
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
167
191
|
"model.layers.{bid}.attention.wv", # internlm2
|
168
|
-
"transformer.decoder_layer.{bid}.multi_head_attention.value"
|
192
|
+
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
|
193
|
+
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
169
194
|
),
|
170
195
|
|
171
196
|
# Attention output
|
@@ -175,7 +200,8 @@ class TensorNameMap:
|
|
175
200
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
176
201
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
177
202
|
"h.{bid}.self_attention.dense", # bloom
|
178
|
-
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
203
|
+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
|
204
|
+
"model.layers.{bid}.self_attn.linear_attn", # deci
|
179
205
|
"layers.{bid}.attention.wo", # llama-pth
|
180
206
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
181
207
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
@@ -190,6 +216,7 @@ class TensorNameMap:
|
|
190
216
|
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
191
217
|
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
192
218
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
219
|
+
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
193
220
|
),
|
194
221
|
|
195
222
|
# Attention output norm
|
@@ -201,7 +228,7 @@ class TensorNameMap:
|
|
201
228
|
),
|
202
229
|
|
203
230
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
204
|
-
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
231
|
+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
205
232
|
),
|
206
233
|
|
207
234
|
# Rotary embeddings
|
@@ -215,10 +242,10 @@ class TensorNameMap:
|
|
215
242
|
# Feed-forward norm
|
216
243
|
MODEL_TENSOR.FFN_NORM: (
|
217
244
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
218
|
-
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
245
|
+
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
219
246
|
"h.{bid}.post_attention_layernorm", # bloom
|
220
247
|
"transformer.blocks.{bid}.norm_2", # mpt
|
221
|
-
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
248
|
+
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
|
222
249
|
"layers.{bid}.ffn_norm", # llama-pth
|
223
250
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
224
251
|
"model.layers.{bid}.ln2", # yi
|
@@ -236,21 +263,26 @@ class TensorNameMap:
|
|
236
263
|
|
237
264
|
# Post feed-forward norm
|
238
265
|
MODEL_TENSOR.FFN_POST_NORM: (
|
239
|
-
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
266
|
+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
240
267
|
),
|
241
268
|
|
242
269
|
MODEL_TENSOR.FFN_GATE_INP: (
|
243
|
-
"layers.{bid}.feed_forward.gate",
|
244
|
-
"model.layers.{bid}.block_sparse_moe.gate",
|
245
|
-
"model.layers.{bid}.mlp.gate",
|
246
|
-
"transformer.decoder_layer.{bid}.router",
|
247
|
-
"transformer.blocks.{bid}.ffn.router.layer",
|
270
|
+
"layers.{bid}.feed_forward.gate", # mixtral
|
271
|
+
"model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
|
272
|
+
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
|
273
|
+
"transformer.decoder_layer.{bid}.router", # Grok
|
274
|
+
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
275
|
+
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
248
276
|
),
|
249
277
|
|
250
278
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
251
279
|
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
|
252
280
|
),
|
253
281
|
|
282
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
283
|
+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
|
284
|
+
),
|
285
|
+
|
254
286
|
# Feed-forward up
|
255
287
|
MODEL_TENSOR.FFN_UP: (
|
256
288
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
@@ -258,7 +290,7 @@ class TensorNameMap:
|
|
258
290
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
259
291
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
260
292
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
261
|
-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
293
|
+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
262
294
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
263
295
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
264
296
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
@@ -277,18 +309,20 @@ class TensorNameMap:
|
|
277
309
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
278
310
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
279
311
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
312
|
+
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
280
313
|
),
|
281
314
|
|
282
315
|
MODEL_TENSOR.FFN_UP_EXP: (
|
283
|
-
"layers.{bid}.feed_forward.experts.w3",
|
284
|
-
"transformer.decoder_layer.{bid}.moe.linear_v",
|
285
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.v1",
|
286
|
-
"model.layers.{bid}.mlp.experts.up_proj",
|
316
|
+
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
317
|
+
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
318
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
319
|
+
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
320
|
+
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
287
321
|
),
|
288
322
|
|
289
323
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
290
324
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
291
|
-
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
325
|
+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
292
326
|
),
|
293
327
|
|
294
328
|
# AWQ-activation gate
|
@@ -298,7 +332,7 @@ class TensorNameMap:
|
|
298
332
|
|
299
333
|
# Feed-forward gate
|
300
334
|
MODEL_TENSOR.FFN_GATE: (
|
301
|
-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
335
|
+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
|
302
336
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
303
337
|
"transformer.h.{bid}.mlp.w2", # qwen
|
304
338
|
"transformer.h.{bid}.mlp.c_fc2", # jais
|
@@ -308,18 +342,20 @@ class TensorNameMap:
|
|
308
342
|
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
309
343
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
310
344
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
345
|
+
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
311
346
|
),
|
312
347
|
|
313
348
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
314
|
-
"layers.{bid}.feed_forward.experts.w1",
|
315
|
-
"transformer.decoder_layer.{bid}.moe.linear",
|
316
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w1",
|
317
|
-
"model.layers.{bid}.mlp.experts.gate_proj",
|
349
|
+
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
350
|
+
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
|
351
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
352
|
+
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
353
|
+
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
318
354
|
),
|
319
355
|
|
320
356
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
321
357
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
322
|
-
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
358
|
+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
323
359
|
),
|
324
360
|
|
325
361
|
# Feed-forward down
|
@@ -329,7 +365,7 @@ class TensorNameMap:
|
|
329
365
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
330
366
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
331
367
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
332
|
-
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
368
|
+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
333
369
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
334
370
|
"encoder.layer.{bid}.output.dense", # bert
|
335
371
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
@@ -347,24 +383,27 @@ class TensorNameMap:
|
|
347
383
|
"model.layers.{bid}.residual_mlp.w2", # arctic
|
348
384
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
349
385
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
386
|
+
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
350
387
|
),
|
351
388
|
|
352
389
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
353
|
-
"layers.{bid}.feed_forward.experts.w2",
|
354
|
-
"transformer.decoder_layer.{bid}.moe.linear_1",
|
355
|
-
"transformer.blocks.{bid}.ffn.experts.mlp.w2",
|
356
|
-
"model.layers.{bid}.mlp.experts.down_proj",
|
390
|
+
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
391
|
+
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
392
|
+
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
393
|
+
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
394
|
+
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
395
|
+
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
357
396
|
),
|
358
397
|
|
359
398
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
360
399
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
361
|
-
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
400
|
+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
362
401
|
),
|
363
402
|
|
364
403
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
365
404
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
366
405
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
367
|
-
"model.layers.{bid}.self_attn.q_norm", # cohere
|
406
|
+
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
368
407
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
369
408
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
370
409
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
@@ -373,7 +412,7 @@ class TensorNameMap:
|
|
373
412
|
MODEL_TENSOR.ATTN_K_NORM: (
|
374
413
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
375
414
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
376
|
-
"model.layers.{bid}.self_attn.k_norm", # cohere
|
415
|
+
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
377
416
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
378
417
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
379
418
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
@@ -426,6 +465,114 @@ class TensorNameMap:
|
|
426
465
|
"backbone.layers.{bid}.mixer.out_proj",
|
427
466
|
),
|
428
467
|
|
468
|
+
MODEL_TENSOR.TIME_MIX_W1: (
|
469
|
+
"rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
|
470
|
+
"model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
|
471
|
+
),
|
472
|
+
|
473
|
+
MODEL_TENSOR.TIME_MIX_W2: (
|
474
|
+
"rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
|
475
|
+
"model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
|
476
|
+
),
|
477
|
+
|
478
|
+
MODEL_TENSOR.TIME_MIX_LERP_X: (
|
479
|
+
"rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
|
480
|
+
"model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
|
481
|
+
),
|
482
|
+
|
483
|
+
MODEL_TENSOR.TIME_MIX_LERP_K: (
|
484
|
+
"rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
|
485
|
+
"model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
|
486
|
+
),
|
487
|
+
|
488
|
+
MODEL_TENSOR.TIME_MIX_LERP_V: (
|
489
|
+
"rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
|
490
|
+
"model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
|
491
|
+
),
|
492
|
+
|
493
|
+
MODEL_TENSOR.TIME_MIX_LERP_R: (
|
494
|
+
"rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
|
495
|
+
"model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
|
496
|
+
),
|
497
|
+
|
498
|
+
MODEL_TENSOR.TIME_MIX_LERP_G: (
|
499
|
+
"rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
|
500
|
+
"model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
|
501
|
+
),
|
502
|
+
|
503
|
+
MODEL_TENSOR.TIME_MIX_LERP_W: (
|
504
|
+
"rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
|
505
|
+
"model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
|
506
|
+
),
|
507
|
+
|
508
|
+
MODEL_TENSOR.TIME_MIX_FIRST: (
|
509
|
+
"rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
|
510
|
+
),
|
511
|
+
|
512
|
+
MODEL_TENSOR.TIME_MIX_DECAY: (
|
513
|
+
"rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
|
514
|
+
"model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
|
515
|
+
),
|
516
|
+
|
517
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1: (
|
518
|
+
"rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
|
519
|
+
"model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
|
520
|
+
),
|
521
|
+
|
522
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2: (
|
523
|
+
"rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
|
524
|
+
"model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
|
525
|
+
),
|
526
|
+
|
527
|
+
MODEL_TENSOR.TIME_MIX_KEY: (
|
528
|
+
"rwkv.blocks.{bid}.attention.key", # rwkv
|
529
|
+
"model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
|
530
|
+
),
|
531
|
+
|
532
|
+
MODEL_TENSOR.TIME_MIX_VALUE: (
|
533
|
+
"rwkv.blocks.{bid}.attention.value", # rwkv
|
534
|
+
"model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
|
535
|
+
),
|
536
|
+
|
537
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
|
538
|
+
"rwkv.blocks.{bid}.attention.receptance", # rwkv
|
539
|
+
"model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
|
540
|
+
),
|
541
|
+
|
542
|
+
MODEL_TENSOR.TIME_MIX_GATE: (
|
543
|
+
"rwkv.blocks.{bid}.attention.gate", # rwkv
|
544
|
+
"model.layers.{bid}.self_attn.gate", # rwkv6qwen2
|
545
|
+
),
|
546
|
+
|
547
|
+
MODEL_TENSOR.TIME_MIX_LN: (
|
548
|
+
"rwkv.blocks.{bid}.attention.ln_x", # rwkv
|
549
|
+
),
|
550
|
+
|
551
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT: (
|
552
|
+
"rwkv.blocks.{bid}.attention.output", # rwkv
|
553
|
+
"model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
|
554
|
+
),
|
555
|
+
|
556
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
|
557
|
+
"rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
|
558
|
+
),
|
559
|
+
|
560
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
|
561
|
+
"rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
|
562
|
+
),
|
563
|
+
|
564
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY: (
|
565
|
+
"rwkv.blocks.{bid}.feed_forward.key", # rwkv
|
566
|
+
),
|
567
|
+
|
568
|
+
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
|
569
|
+
"rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
|
570
|
+
),
|
571
|
+
|
572
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE: (
|
573
|
+
"rwkv.blocks.{bid}.feed_forward.value", # rwkv
|
574
|
+
),
|
575
|
+
|
429
576
|
MODEL_TENSOR.ATTN_Q_A: (
|
430
577
|
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
|
431
578
|
),
|
@@ -568,9 +715,81 @@ class TensorNameMap:
|
|
568
715
|
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
569
716
|
),
|
570
717
|
|
718
|
+
############################################################################
|
719
|
+
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
571
720
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
572
721
|
"encoder.final_layer_norm", # t5
|
573
722
|
),
|
723
|
+
|
724
|
+
MODEL_TENSOR.CLS: (
|
725
|
+
"classifier", # jina
|
726
|
+
"classifier.dense", # roberta
|
727
|
+
),
|
728
|
+
|
729
|
+
MODEL_TENSOR.CLS_OUT: (
|
730
|
+
"classifier.out_proj", # roberta
|
731
|
+
),
|
732
|
+
#############################################################################
|
733
|
+
|
734
|
+
MODEL_TENSOR.CONVNEXT_DW: (
|
735
|
+
"backbone.convnext.{bid}.dwconv", # wavtokenizer
|
736
|
+
),
|
737
|
+
|
738
|
+
MODEL_TENSOR.CONVNEXT_NORM: (
|
739
|
+
"backbone.convnext.{bid}.norm", # wavtokenizer
|
740
|
+
),
|
741
|
+
|
742
|
+
MODEL_TENSOR.CONVNEXT_PW1: (
|
743
|
+
"backbone.convnext.{bid}.pwconv1", # wavtokenizer
|
744
|
+
),
|
745
|
+
|
746
|
+
MODEL_TENSOR.CONVNEXT_PW2: (
|
747
|
+
"backbone.convnext.{bid}.pwconv2", # wavtokenizer
|
748
|
+
),
|
749
|
+
|
750
|
+
MODEL_TENSOR.CONVNEXT_GAMMA: (
|
751
|
+
"backbone.convnext.{bid}.gamma", # wavtokenizer
|
752
|
+
),
|
753
|
+
|
754
|
+
MODEL_TENSOR.POSNET_CONV1: (
|
755
|
+
"backbone.posnet.{bid}.conv1", # wavtokenizer
|
756
|
+
),
|
757
|
+
|
758
|
+
MODEL_TENSOR.POSNET_CONV2: (
|
759
|
+
"backbone.posnet.{bid}.conv2", # wavtokenizer
|
760
|
+
),
|
761
|
+
|
762
|
+
MODEL_TENSOR.POSNET_NORM: (
|
763
|
+
"backbone.posnet.{bid}.norm", # wavtokenizer
|
764
|
+
),
|
765
|
+
|
766
|
+
MODEL_TENSOR.POSNET_NORM1: (
|
767
|
+
"backbone.posnet.{bid}.norm1", # wavtokenizer
|
768
|
+
),
|
769
|
+
|
770
|
+
MODEL_TENSOR.POSNET_NORM2: (
|
771
|
+
"backbone.posnet.{bid}.norm2", # wavtokenizer
|
772
|
+
),
|
773
|
+
|
774
|
+
MODEL_TENSOR.POSNET_ATTN_NORM: (
|
775
|
+
"backbone.posnet.{bid}.norm", # wavtokenizer
|
776
|
+
),
|
777
|
+
|
778
|
+
MODEL_TENSOR.POSNET_ATTN_Q: (
|
779
|
+
"backbone.posnet.{bid}.q", # wavtokenizer
|
780
|
+
),
|
781
|
+
|
782
|
+
MODEL_TENSOR.POSNET_ATTN_K: (
|
783
|
+
"backbone.posnet.{bid}.k", # wavtokenizer
|
784
|
+
),
|
785
|
+
|
786
|
+
MODEL_TENSOR.POSNET_ATTN_V: (
|
787
|
+
"backbone.posnet.{bid}.v", # wavtokenizer
|
788
|
+
),
|
789
|
+
|
790
|
+
MODEL_TENSOR.POSNET_ATTN_OUT: (
|
791
|
+
"backbone.posnet.{bid}.proj_out", # wavtokenizer
|
792
|
+
),
|
574
793
|
}
|
575
794
|
|
576
795
|
# architecture-specific block mappings
|
@@ -646,4 +865,4 @@ class TensorNameMap:
|
|
646
865
|
|
647
866
|
|
648
867
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
649
|
-
return TensorNameMap(arch, n_blocks)
|
868
|
+
return TensorNameMap(arch, n_blocks)
|
@@ -47,7 +47,7 @@ def size_label(total_params: int, shared_params: int, expert_params: int, expert
|
|
47
47
|
|
48
48
|
|
49
49
|
def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
|
50
|
-
# Reference: https://github.com/
|
50
|
+
# Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
|
51
51
|
|
52
52
|
if base_name is not None:
|
53
53
|
name = base_name.strip().replace(' ', '-').replace('/', '-')
|
@@ -66,4 +66,4 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st
|
|
66
66
|
|
67
67
|
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
68
68
|
|
69
|
-
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
69
|
+
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|