bigdl-core-cpp 2.5.0rc1__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +1673 -278
  2. bigdl/cpp/convert_hf_to_gguf_update.py +381 -0
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +450 -0
  4. bigdl/cpp/convert_lora_to_gguf.py +461 -0
  5. bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
  6. bigdl/cpp/gguf-py/gguf/constants.py +698 -171
  7. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  8. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  9. bigdl/cpp/gguf-py/gguf/gguf_writer.py +108 -17
  10. bigdl/cpp/gguf-py/gguf/lazy.py +3 -1
  11. bigdl/cpp/gguf-py/gguf/metadata.py +195 -76
  12. bigdl/cpp/gguf-py/gguf/quants.py +1210 -64
  13. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +262 -43
  14. bigdl/cpp/gguf-py/gguf/utility.py +2 -2
  15. bigdl/cpp/gguf-py/gguf/vocab.py +325 -3
  16. bigdl/cpp/libs/common.lib +0 -0
  17. bigdl/cpp/libs/ggml-base.dll +0 -0
  18. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  19. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  20. bigdl/cpp/libs/ggml.dll +0 -0
  21. bigdl/cpp/libs/libc++.dll +0 -0
  22. bigdl/cpp/libs/llama-batched.exe +0 -0
  23. bigdl/cpp/libs/llama-bench.exe +0 -0
  24. bigdl/cpp/libs/llama-cli.exe +0 -0
  25. bigdl/cpp/libs/llama-embedding.exe +0 -0
  26. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  27. bigdl/cpp/libs/llama-gguf.exe +0 -0
  28. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  29. bigdl/cpp/libs/llama-lookup.exe +0 -0
  30. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  31. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  32. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  33. bigdl/cpp/libs/llama-quantize.exe +0 -0
  34. bigdl/cpp/libs/llama-server.exe +0 -0
  35. bigdl/cpp/libs/llama-simple.exe +0 -0
  36. bigdl/cpp/libs/llama-speculative.exe +0 -0
  37. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  38. bigdl/cpp/libs/llama.dll +0 -0
  39. bigdl/cpp/libs/llava_shared.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  42. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  43. bigdl/cpp/libs/ollama-lib.exe +0 -0
  44. bigdl/cpp/libs/ollama.exe +0 -0
  45. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  46. bigdl/cpp/libs/ollama_llama.dll +0 -0
  47. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  48. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +7 -2
  49. bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
  50. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
  51. bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
  52. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
  53. bigdl/cpp/convert.py +0 -1714
  54. bigdl/cpp/libs/baby-llama.exe +0 -0
  55. bigdl/cpp/libs/batched-bench.exe +0 -0
  56. bigdl/cpp/libs/batched.exe +0 -0
  57. bigdl/cpp/libs/beam-search.exe +0 -0
  58. bigdl/cpp/libs/benchmark.exe +0 -0
  59. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  60. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  61. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  62. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  63. bigdl/cpp/libs/embedding.exe +0 -0
  64. bigdl/cpp/libs/export-lora.exe +0 -0
  65. bigdl/cpp/libs/finetune.exe +0 -0
  66. bigdl/cpp/libs/ggml_shared.dll +0 -0
  67. bigdl/cpp/libs/gguf.exe +0 -0
  68. bigdl/cpp/libs/gritlm.exe +0 -0
  69. bigdl/cpp/libs/imatrix.exe +0 -0
  70. bigdl/cpp/libs/infill.exe +0 -0
  71. bigdl/cpp/libs/llava-cli.exe +0 -0
  72. bigdl/cpp/libs/lookahead.exe +0 -0
  73. bigdl/cpp/libs/lookup.exe +0 -0
  74. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  75. bigdl/cpp/libs/main.exe +0 -0
  76. bigdl/cpp/libs/parallel.exe +0 -0
  77. bigdl/cpp/libs/passkey.exe +0 -0
  78. bigdl/cpp/libs/perplexity.exe +0 -0
  79. bigdl/cpp/libs/q8dot.exe +0 -0
  80. bigdl/cpp/libs/quantize-stats.exe +0 -0
  81. bigdl/cpp/libs/quantize.exe +0 -0
  82. bigdl/cpp/libs/save-load-state.exe +0 -0
  83. bigdl/cpp/libs/server.exe +0 -0
  84. bigdl/cpp/libs/simple.exe +0 -0
  85. bigdl/cpp/libs/speculative.exe +0 -0
  86. bigdl/cpp/libs/tokenize.exe +0 -0
  87. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  88. bigdl/cpp/libs/vdot.exe +0 -0
  89. bigdl_core_cpp-2.5.0rc1.data/scripts/init-ollama.bat +0 -13
  90. bigdl_core_cpp-2.5.0rc1.dist-info/RECORD +0 -63
  91. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
  92. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,10 @@ class TensorNameMap:
10
10
  # Token embeddings
11
11
  MODEL_TENSOR.TOKEN_EMBD: (
12
12
  "gpt_neox.embed_in", # gptneox
13
- "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -27,6 +27,7 @@ class TensorNameMap:
27
27
  "embedding.word_embeddings", # chatglm
28
28
  "transformer.token_embeddings", # openelm
29
29
  "shared", # t5
30
+ "rwkv.embeddings", # rwkv
30
31
  ),
31
32
 
32
33
  # Token type embeddings
@@ -40,6 +41,8 @@ class TensorNameMap:
40
41
  "embeddings.LayerNorm", # bert
41
42
  "emb_ln", # nomic-bert
42
43
  "transformer.norm", # openelm
44
+ "rwkv.blocks.0.pre_ln", # rwkv
45
+ "backbone.norm", # wavtokenizer
43
46
  ),
44
47
 
45
48
  # Position embeddings
@@ -52,18 +55,20 @@ class TensorNameMap:
52
55
  # Output
53
56
  MODEL_TENSOR.OUTPUT: (
54
57
  "embed_out", # gptneox
55
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
58
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
56
59
  "output", # llama-pth bloom internlm2
57
60
  "word_embeddings_for_head", # persimmon
58
61
  "lm_head.linear", # phi2
59
62
  "output_layer", # chatglm
63
+ "head", # rwkv
64
+ "head.out", # wavtokenizer
60
65
  ),
61
66
 
62
67
  # Output norm
63
68
  MODEL_TENSOR.OUTPUT_NORM: (
64
69
  "gpt_neox.final_layer_norm", # gptneox
65
- "transformer.ln_f", # gpt2 gpt-j falcon jais
66
- "model.norm", # llama-hf baichuan internlm2
70
+ "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
71
+ "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
67
72
  "norm", # llama-pth
68
73
  "transformer.norm_f", # mpt dbrx
69
74
  "ln_f", # refact bloom qwen gpt2
@@ -75,6 +80,9 @@ class TensorNameMap:
75
80
  "transformer.rms_norm", # Grok
76
81
  "encoder.final_layernorm", # chatglm
77
82
  "transformer.norm", # openelm
83
+ "model.norm", # nemotron
84
+ "rwkv.ln_out", # rwkv
85
+ "backbone.final_layer_norm", # wavtokenizer
78
86
  ),
79
87
 
80
88
  # Rope frequencies
@@ -82,18 +90,28 @@ class TensorNameMap:
82
90
  "rope.freqs", # llama-pth
83
91
  "rotary_pos_emb.inv_freq", # chatglm
84
92
  ),
93
+
94
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
95
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
96
+
97
+ MODEL_TENSOR.CONV1D: (
98
+ "backbone.embed", # roberta
99
+ ),
100
+
101
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
102
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
85
103
  }
86
104
 
87
105
  block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
88
106
  # Attention norm
89
107
  MODEL_TENSOR.ATTN_NORM: (
90
108
  "gpt_neox.layers.{bid}.input_layernorm", # gptneox
91
- "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
109
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais exaone
92
110
  "transformer.blocks.{bid}.norm_1", # mpt
93
111
  "transformer.h.{bid}.input_layernorm", # falcon7b
94
112
  "h.{bid}.input_layernorm", # bloom
95
113
  "transformer.h.{bid}.ln_mlp", # falcon40b
96
- "model.layers.{bid}.input_layernorm", # llama-hf
114
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe
97
115
  "layers.{bid}.attention_norm", # llama-pth
98
116
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
99
117
  "model.layers.{bid}.ln1", # yi
@@ -107,12 +125,14 @@ class TensorNameMap:
107
125
  "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
108
126
  "encoder.layers.{bid}.input_layernorm", # chatglm
109
127
  "transformer.layers.{bid}.attn_norm", # openelm
128
+ "rwkv.blocks.{bid}.ln1", # rwkv
110
129
  ),
111
130
 
112
131
  # Attention norm 2
113
132
  MODEL_TENSOR.ATTN_NORM_2: (
114
- "transformer.h.{bid}.ln_attn", # falcon40b
133
+ "transformer.h.{bid}.ln_attn", # falcon40b
115
134
  "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
135
+ "rwkv.blocks.{bid}.ln2", # rwkv
116
136
  ),
117
137
 
118
138
  # Attention query-key-value
@@ -135,18 +155,21 @@ class TensorNameMap:
135
155
 
136
156
  # Attention query
137
157
  MODEL_TENSOR.ATTN_Q: (
138
- "model.layers.{bid}.self_attn.q_proj", # llama-hf
158
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
159
+ "model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
139
160
  "layers.{bid}.attention.wq", # llama-pth
140
161
  "encoder.layer.{bid}.attention.self.query", # bert
141
162
  "transformer.h.{bid}.attn.q_proj", # gpt-j
142
163
  "model.layers.layers.{bid}.self_attn.q_proj", # plamo
143
164
  "model.layers.{bid}.attention.wq", # internlm2
144
165
  "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
166
+ "transformer.h.{bid}.attn.attention.q_proj", # exaone
145
167
  ),
146
168
 
147
169
  # Attention key
148
170
  MODEL_TENSOR.ATTN_K: (
149
- "model.layers.{bid}.self_attn.k_proj", # llama-hf
171
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
172
+ "model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
150
173
  "layers.{bid}.attention.wk", # llama-pth
151
174
  "encoder.layer.{bid}.attention.self.key", # bert
152
175
  "transformer.h.{bid}.attn.k_proj", # gpt-j
@@ -154,18 +177,20 @@ class TensorNameMap:
154
177
  "model.layers.layers.{bid}.self_attn.k_proj", # plamo
155
178
  "model.layers.{bid}.attention.wk", # internlm2
156
179
  "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
180
+ "transformer.h.{bid}.attn.attention.k_proj", # exaone
157
181
  ),
158
182
 
159
183
  # Attention value
160
184
  MODEL_TENSOR.ATTN_V: (
161
- "model.layers.{bid}.self_attn.v_proj", # llama-hf
185
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
162
186
  "layers.{bid}.attention.wv", # llama-pth
163
187
  "encoder.layer.{bid}.attention.self.value", # bert
164
188
  "transformer.h.{bid}.attn.v_proj", # gpt-j
165
189
  "transformer.h.{bid}.attn.v", # refact
166
190
  "model.layers.layers.{bid}.self_attn.v_proj", # plamo
167
191
  "model.layers.{bid}.attention.wv", # internlm2
168
- "transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
192
+ "transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
193
+ "transformer.h.{bid}.attn.attention.v_proj", # exaone
169
194
  ),
170
195
 
171
196
  # Attention output
@@ -175,7 +200,8 @@ class TensorNameMap:
175
200
  "transformer.blocks.{bid}.attn.out_proj", # mpt
176
201
  "transformer.h.{bid}.self_attention.dense", # falcon
177
202
  "h.{bid}.self_attention.dense", # bloom
178
- "model.layers.{bid}.self_attn.o_proj", # llama-hf
203
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
204
+ "model.layers.{bid}.self_attn.linear_attn", # deci
179
205
  "layers.{bid}.attention.wo", # llama-pth
180
206
  "encoder.layer.{bid}.attention.output.dense", # bert
181
207
  "transformer.h.{bid}.attn.out_proj", # gpt-j
@@ -190,6 +216,7 @@ class TensorNameMap:
190
216
  "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
191
217
  "encoder.layers.{bid}.self_attention.dense", # chatglm
192
218
  "transformer.layers.{bid}.attn.out_proj", # openelm
219
+ "transformer.h.{bid}.attn.attention.out_proj", # exaone
193
220
  ),
194
221
 
195
222
  # Attention output norm
@@ -201,7 +228,7 @@ class TensorNameMap:
201
228
  ),
202
229
 
203
230
  MODEL_TENSOR.ATTN_POST_NORM: (
204
- "model.layers.{bid}.post_attention_layernorm", # gemma2
231
+ "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
205
232
  ),
206
233
 
207
234
  # Rotary embeddings
@@ -215,10 +242,10 @@ class TensorNameMap:
215
242
  # Feed-forward norm
216
243
  MODEL_TENSOR.FFN_NORM: (
217
244
  "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
218
- "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
245
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
219
246
  "h.{bid}.post_attention_layernorm", # bloom
220
247
  "transformer.blocks.{bid}.norm_2", # mpt
221
- "model.layers.{bid}.post_attention_layernorm", # llama-hf
248
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
222
249
  "layers.{bid}.ffn_norm", # llama-pth
223
250
  "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
224
251
  "model.layers.{bid}.ln2", # yi
@@ -236,21 +263,26 @@ class TensorNameMap:
236
263
 
237
264
  # Post feed-forward norm
238
265
  MODEL_TENSOR.FFN_POST_NORM: (
239
- "model.layers.{bid}.post_feedforward_layernorm", # gemma2
266
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
240
267
  ),
241
268
 
242
269
  MODEL_TENSOR.FFN_GATE_INP: (
243
- "layers.{bid}.feed_forward.gate", # mixtral
244
- "model.layers.{bid}.block_sparse_moe.gate", # mixtral
245
- "model.layers.{bid}.mlp.gate", # qwen2moe
246
- "transformer.decoder_layer.{bid}.router", # Grok
247
- "transformer.blocks.{bid}.ffn.router.layer", # dbrx
270
+ "layers.{bid}.feed_forward.gate", # mixtral
271
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral phimoe
272
+ "model.layers.{bid}.mlp.gate", # qwen2moe olmoe
273
+ "transformer.decoder_layer.{bid}.router", # Grok
274
+ "transformer.blocks.{bid}.ffn.router.layer", # dbrx
275
+ "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
248
276
  ),
249
277
 
250
278
  MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
251
279
  "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
252
280
  ),
253
281
 
282
+ MODEL_TENSOR.FFN_EXP_PROBS_B: (
283
+ "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
284
+ ),
285
+
254
286
  # Feed-forward up
255
287
  MODEL_TENSOR.FFN_UP: (
256
288
  "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
@@ -258,7 +290,7 @@ class TensorNameMap:
258
290
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
259
291
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
260
292
  "h.{bid}.mlp.dense_h_to_4h", # bloom
261
- "model.layers.{bid}.mlp.up_proj", # llama-hf refact
293
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
262
294
  "layers.{bid}.feed_forward.w3", # llama-pth
263
295
  "encoder.layer.{bid}.intermediate.dense", # bert
264
296
  "transformer.h.{bid}.mlp.fc_in", # gpt-j
@@ -277,18 +309,20 @@ class TensorNameMap:
277
309
  "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
278
310
  "model.layers.{bid}.residual_mlp.w3", # arctic
279
311
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
312
+ "transformer.h.{bid}.mlp.c_fc_1", # exaone
280
313
  ),
281
314
 
282
315
  MODEL_TENSOR.FFN_UP_EXP: (
283
- "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
284
- "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
285
- "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
286
- "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
316
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
317
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
318
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
319
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
320
+ "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
287
321
  ),
288
322
 
289
323
  MODEL_TENSOR.FFN_UP_SHEXP: (
290
324
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
291
- "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
325
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
292
326
  ),
293
327
 
294
328
  # AWQ-activation gate
@@ -298,7 +332,7 @@ class TensorNameMap:
298
332
 
299
333
  # Feed-forward gate
300
334
  MODEL_TENSOR.FFN_GATE: (
301
- "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
335
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
302
336
  "layers.{bid}.feed_forward.w1", # llama-pth
303
337
  "transformer.h.{bid}.mlp.w2", # qwen
304
338
  "transformer.h.{bid}.mlp.c_fc2", # jais
@@ -308,18 +342,20 @@ class TensorNameMap:
308
342
  "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
309
343
  "transformer.h.{bid}.mlp.linear_1", # refact
310
344
  "model.layers.{bid}.residual_mlp.w1", # arctic
345
+ "transformer.h.{bid}.mlp.c_fc_0", # exaone
311
346
  ),
312
347
 
313
348
  MODEL_TENSOR.FFN_GATE_EXP: (
314
- "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
315
- "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
316
- "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
317
- "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
349
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
350
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
351
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
352
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
353
+ "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
318
354
  ),
319
355
 
320
356
  MODEL_TENSOR.FFN_GATE_SHEXP: (
321
357
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
322
- "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
358
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
323
359
  ),
324
360
 
325
361
  # Feed-forward down
@@ -329,7 +365,7 @@ class TensorNameMap:
329
365
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
330
366
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
331
367
  "h.{bid}.mlp.dense_4h_to_h", # bloom
332
- "model.layers.{bid}.mlp.down_proj", # llama-hf
368
+ "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
333
369
  "layers.{bid}.feed_forward.w2", # llama-pth
334
370
  "encoder.layer.{bid}.output.dense", # bert
335
371
  "transformer.h.{bid}.mlp.fc_out", # gpt-j
@@ -347,24 +383,27 @@ class TensorNameMap:
347
383
  "model.layers.{bid}.residual_mlp.w2", # arctic
348
384
  "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
349
385
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
386
+ "model.layers.h.{bid}.mlp.c_proj", # exaone
350
387
  ),
351
388
 
352
389
  MODEL_TENSOR.FFN_DOWN_EXP: (
353
- "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
354
- "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
355
- "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
356
- "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
390
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
391
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
392
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
393
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
394
+ "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
395
+ "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
357
396
  ),
358
397
 
359
398
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
360
399
  "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
361
- "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
400
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
362
401
  ),
363
402
 
364
403
  MODEL_TENSOR.ATTN_Q_NORM: (
365
404
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
366
405
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
367
- "model.layers.{bid}.self_attn.q_norm", # cohere
406
+ "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
368
407
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
369
408
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
370
409
  "transformer.layers.{bid}.attn.q_norm", # openelm
@@ -373,7 +412,7 @@ class TensorNameMap:
373
412
  MODEL_TENSOR.ATTN_K_NORM: (
374
413
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
375
414
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
376
- "model.layers.{bid}.self_attn.k_norm", # cohere
415
+ "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
377
416
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
378
417
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
379
418
  "transformer.layers.{bid}.attn.k_norm", # openelm
@@ -426,6 +465,114 @@ class TensorNameMap:
426
465
  "backbone.layers.{bid}.mixer.out_proj",
427
466
  ),
428
467
 
468
+ MODEL_TENSOR.TIME_MIX_W1: (
469
+ "rwkv.blocks.{bid}.attention.time_maa_w1", # rwkv v6
470
+ "model.layers.{bid}.self_attn.time_maa_w1", # rwkv6qwen2
471
+ ),
472
+
473
+ MODEL_TENSOR.TIME_MIX_W2: (
474
+ "rwkv.blocks.{bid}.attention.time_maa_w2", # rwkv v6
475
+ "model.layers.{bid}.self_attn.time_maa_w2", # rwkv6qwen2
476
+ ),
477
+
478
+ MODEL_TENSOR.TIME_MIX_LERP_X: (
479
+ "rwkv.blocks.{bid}.attention.time_maa_x", # rwkv v6
480
+ "model.layers.{bid}.self_attn.time_maa_x", # rwkv6qwen2
481
+ ),
482
+
483
+ MODEL_TENSOR.TIME_MIX_LERP_K: (
484
+ "rwkv.blocks.{bid}.attention.time_maa_k", # rwkv v6
485
+ "model.layers.{bid}.self_attn.time_maa_k", # rwkv6qwen2
486
+ ),
487
+
488
+ MODEL_TENSOR.TIME_MIX_LERP_V: (
489
+ "rwkv.blocks.{bid}.attention.time_maa_v", # rwkv v6
490
+ "model.layers.{bid}.self_attn.time_maa_v", # rwkv6qwen2
491
+ ),
492
+
493
+ MODEL_TENSOR.TIME_MIX_LERP_R: (
494
+ "rwkv.blocks.{bid}.attention.time_maa_r", # rwkv v6
495
+ "model.layers.{bid}.self_attn.time_maa_r", # rwkv6qwen2
496
+ ),
497
+
498
+ MODEL_TENSOR.TIME_MIX_LERP_G: (
499
+ "rwkv.blocks.{bid}.attention.time_maa_g", # rwkv v6
500
+ "model.layers.{bid}.self_attn.time_maa_g", # rwkv6qwen2
501
+ ),
502
+
503
+ MODEL_TENSOR.TIME_MIX_LERP_W: (
504
+ "rwkv.blocks.{bid}.attention.time_maa_w", # rwkv v6
505
+ "model.layers.{bid}.self_attn.time_maa_w", # rwkv6qwen2
506
+ ),
507
+
508
+ MODEL_TENSOR.TIME_MIX_FIRST: (
509
+ "rwkv.blocks.{bid}.attention.time_faaaa", # rwkv v6
510
+ ),
511
+
512
+ MODEL_TENSOR.TIME_MIX_DECAY: (
513
+ "rwkv.blocks.{bid}.attention.time_decay", # rwkv v6
514
+ "model.layers.{bid}.self_attn.time_decay", # rwkv6qwen2
515
+ ),
516
+
517
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: (
518
+ "rwkv.blocks.{bid}.attention.time_decay_w1", # rwkv v6
519
+ "model.layers.{bid}.self_attn.time_decay_w1", # rwkv6qwen2
520
+ ),
521
+
522
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: (
523
+ "rwkv.blocks.{bid}.attention.time_decay_w2", # rwkv v6
524
+ "model.layers.{bid}.self_attn.time_decay_w2", # rwkv6qwen2
525
+ ),
526
+
527
+ MODEL_TENSOR.TIME_MIX_KEY: (
528
+ "rwkv.blocks.{bid}.attention.key", # rwkv
529
+ "model.layers.{bid}.self_attn.k_proj", # rwkv6qwen2
530
+ ),
531
+
532
+ MODEL_TENSOR.TIME_MIX_VALUE: (
533
+ "rwkv.blocks.{bid}.attention.value", # rwkv
534
+ "model.layers.{bid}.self_attn.v_proj", # rwkv6qwen2
535
+ ),
536
+
537
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: (
538
+ "rwkv.blocks.{bid}.attention.receptance", # rwkv
539
+ "model.layers.{bid}.self_attn.q_proj", # rwkv6qwen2
540
+ ),
541
+
542
+ MODEL_TENSOR.TIME_MIX_GATE: (
543
+ "rwkv.blocks.{bid}.attention.gate", # rwkv
544
+ "model.layers.{bid}.self_attn.gate", # rwkv6qwen2
545
+ ),
546
+
547
+ MODEL_TENSOR.TIME_MIX_LN: (
548
+ "rwkv.blocks.{bid}.attention.ln_x", # rwkv
549
+ ),
550
+
551
+ MODEL_TENSOR.TIME_MIX_OUTPUT: (
552
+ "rwkv.blocks.{bid}.attention.output", # rwkv
553
+ "model.layers.{bid}.self_attn.o_proj", # rwkv6qwen2
554
+ ),
555
+
556
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: (
557
+ "rwkv.blocks.{bid}.feed_forward.time_maa_k", # rwkv v6
558
+ ),
559
+
560
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: (
561
+ "rwkv.blocks.{bid}.feed_forward.time_maa_r", # rwkv v6
562
+ ),
563
+
564
+ MODEL_TENSOR.CHANNEL_MIX_KEY: (
565
+ "rwkv.blocks.{bid}.feed_forward.key", # rwkv
566
+ ),
567
+
568
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: (
569
+ "rwkv.blocks.{bid}.feed_forward.receptance", # rwkv
570
+ ),
571
+
572
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: (
573
+ "rwkv.blocks.{bid}.feed_forward.value", # rwkv
574
+ ),
575
+
429
576
  MODEL_TENSOR.ATTN_Q_A: (
430
577
  "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
431
578
  ),
@@ -568,9 +715,81 @@ class TensorNameMap:
568
715
  "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
569
716
  ),
570
717
 
718
+ ############################################################################
719
+ # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
571
720
  MODEL_TENSOR.ENC_OUTPUT_NORM: (
572
721
  "encoder.final_layer_norm", # t5
573
722
  ),
723
+
724
+ MODEL_TENSOR.CLS: (
725
+ "classifier", # jina
726
+ "classifier.dense", # roberta
727
+ ),
728
+
729
+ MODEL_TENSOR.CLS_OUT: (
730
+ "classifier.out_proj", # roberta
731
+ ),
732
+ #############################################################################
733
+
734
+ MODEL_TENSOR.CONVNEXT_DW: (
735
+ "backbone.convnext.{bid}.dwconv", # wavtokenizer
736
+ ),
737
+
738
+ MODEL_TENSOR.CONVNEXT_NORM: (
739
+ "backbone.convnext.{bid}.norm", # wavtokenizer
740
+ ),
741
+
742
+ MODEL_TENSOR.CONVNEXT_PW1: (
743
+ "backbone.convnext.{bid}.pwconv1", # wavtokenizer
744
+ ),
745
+
746
+ MODEL_TENSOR.CONVNEXT_PW2: (
747
+ "backbone.convnext.{bid}.pwconv2", # wavtokenizer
748
+ ),
749
+
750
+ MODEL_TENSOR.CONVNEXT_GAMMA: (
751
+ "backbone.convnext.{bid}.gamma", # wavtokenizer
752
+ ),
753
+
754
+ MODEL_TENSOR.POSNET_CONV1: (
755
+ "backbone.posnet.{bid}.conv1", # wavtokenizer
756
+ ),
757
+
758
+ MODEL_TENSOR.POSNET_CONV2: (
759
+ "backbone.posnet.{bid}.conv2", # wavtokenizer
760
+ ),
761
+
762
+ MODEL_TENSOR.POSNET_NORM: (
763
+ "backbone.posnet.{bid}.norm", # wavtokenizer
764
+ ),
765
+
766
+ MODEL_TENSOR.POSNET_NORM1: (
767
+ "backbone.posnet.{bid}.norm1", # wavtokenizer
768
+ ),
769
+
770
+ MODEL_TENSOR.POSNET_NORM2: (
771
+ "backbone.posnet.{bid}.norm2", # wavtokenizer
772
+ ),
773
+
774
+ MODEL_TENSOR.POSNET_ATTN_NORM: (
775
+ "backbone.posnet.{bid}.norm", # wavtokenizer
776
+ ),
777
+
778
+ MODEL_TENSOR.POSNET_ATTN_Q: (
779
+ "backbone.posnet.{bid}.q", # wavtokenizer
780
+ ),
781
+
782
+ MODEL_TENSOR.POSNET_ATTN_K: (
783
+ "backbone.posnet.{bid}.k", # wavtokenizer
784
+ ),
785
+
786
+ MODEL_TENSOR.POSNET_ATTN_V: (
787
+ "backbone.posnet.{bid}.v", # wavtokenizer
788
+ ),
789
+
790
+ MODEL_TENSOR.POSNET_ATTN_OUT: (
791
+ "backbone.posnet.{bid}.proj_out", # wavtokenizer
792
+ ),
574
793
  }
575
794
 
576
795
  # architecture-specific block mappings
@@ -646,4 +865,4 @@ class TensorNameMap:
646
865
 
647
866
 
648
867
  def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
649
- return TensorNameMap(arch, n_blocks)
868
+ return TensorNameMap(arch, n_blocks)
@@ -47,7 +47,7 @@ def size_label(total_params: int, shared_params: int, expert_params: int, expert
47
47
 
48
48
 
49
49
  def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
50
- # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
50
+ # Reference: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#gguf-naming-convention
51
51
 
52
52
  if base_name is not None:
53
53
  name = base_name.strip().replace(' ', '-').replace('/', '-')
@@ -66,4 +66,4 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st
66
66
 
67
67
  kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
68
68
 
69
- return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
69
+ return f"{name}{parameters}{finetune}{version}{encoding}{kind}"