bigdl-core-cpp 2.1.0b20230202__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. bigdl/cpp/__init__.py +0 -0
  2. bigdl/cpp/convert-hf-to-gguf.py +2858 -0
  3. bigdl/cpp/convert.py +1714 -0
  4. bigdl/cpp/gguf-py/__init__.py +0 -0
  5. bigdl/cpp/gguf-py/gguf/__init__.py +7 -0
  6. bigdl/cpp/gguf-py/gguf/constants.py +1033 -0
  7. bigdl/cpp/gguf-py/gguf/gguf.py +15 -0
  8. bigdl/cpp/gguf-py/gguf/gguf_reader.py +296 -0
  9. bigdl/cpp/gguf-py/gguf/gguf_writer.py +554 -0
  10. bigdl/cpp/gguf-py/gguf/lazy.py +236 -0
  11. bigdl/cpp/gguf-py/gguf/py.typed +0 -0
  12. bigdl/cpp/gguf-py/gguf/quants.py +123 -0
  13. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +463 -0
  14. bigdl/cpp/gguf-py/gguf/vocab.py +165 -0
  15. bigdl/cpp/libs/baby-llama.exe +0 -0
  16. bigdl/cpp/libs/batched-bench.exe +0 -0
  17. bigdl/cpp/libs/batched.exe +0 -0
  18. bigdl/cpp/libs/beam-search.exe +0 -0
  19. bigdl/cpp/libs/benchmark.exe +0 -0
  20. bigdl/cpp/libs/common.lib +0 -0
  21. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  22. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  23. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  24. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  25. bigdl/cpp/libs/embedding.exe +0 -0
  26. bigdl/cpp/libs/export-lora.exe +0 -0
  27. bigdl/cpp/libs/finetune.exe +0 -0
  28. bigdl/cpp/libs/ggml_shared.dll +0 -0
  29. bigdl/cpp/libs/gguf.exe +0 -0
  30. bigdl/cpp/libs/gritlm.exe +0 -0
  31. bigdl/cpp/libs/imatrix.exe +0 -0
  32. bigdl/cpp/libs/infill.exe +0 -0
  33. bigdl/cpp/libs/llama-bench.exe +0 -0
  34. bigdl/cpp/libs/llama.dll +0 -0
  35. bigdl/cpp/libs/llava-cli.exe +0 -0
  36. bigdl/cpp/libs/llava_shared.dll +0 -0
  37. bigdl/cpp/libs/lookahead.exe +0 -0
  38. bigdl/cpp/libs/lookup.exe +0 -0
  39. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  40. bigdl/cpp/libs/main.exe +0 -0
  41. bigdl/cpp/libs/ollama.exe +0 -0
  42. bigdl/cpp/libs/parallel.exe +0 -0
  43. bigdl/cpp/libs/passkey.exe +0 -0
  44. bigdl/cpp/libs/perplexity.exe +0 -0
  45. bigdl/cpp/libs/q8dot.exe +0 -0
  46. bigdl/cpp/libs/quantize-stats.exe +0 -0
  47. bigdl/cpp/libs/quantize.exe +0 -0
  48. bigdl/cpp/libs/save-load-state.exe +0 -0
  49. bigdl/cpp/libs/server.exe +0 -0
  50. bigdl/cpp/libs/simple.exe +0 -0
  51. bigdl/cpp/libs/speculative.exe +0 -0
  52. bigdl/cpp/libs/tokenize.exe +0 -0
  53. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  54. bigdl/cpp/libs/vdot.exe +0 -0
  55. bigdl_core_cpp-2.1.0b20230202.data/scripts/init-llama-cpp.bat +19 -0
  56. bigdl_core_cpp-2.1.0b20230202.data/scripts/init-llama-cpp.ps1 +13 -0
  57. bigdl_core_cpp-2.1.0b20230202.data/scripts/init-ollama.bat +13 -0
  58. bigdl_core_cpp-2.1.0b20230202.dist-info/METADATA +18 -0
  59. bigdl_core_cpp-2.1.0b20230202.dist-info/RECORD +61 -0
  60. bigdl_core_cpp-2.1.0b20230202.dist-info/WHEEL +5 -0
  61. bigdl_core_cpp-2.1.0b20230202.dist-info/top_level.txt +1 -0
@@ -0,0 +1,463 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Sequence
4
+
5
+ from .constants import MODEL_ARCH, MODEL_TENSOR, MODEL_TENSORS, TENSOR_NAMES
6
+
7
+
8
+ class TensorNameMap:
9
+ mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
10
+ # Token embeddings
11
+ MODEL_TENSOR.TOKEN_EMBD: (
12
+ "gpt_neox.embed_in", # gptneox
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
14
+ "transformer.word_embeddings", # falcon
15
+ "word_embeddings", # bloom
16
+ "model.embed_tokens", # llama-hf
17
+ "tok_embeddings", # llama-pth
18
+ "embeddings.word_embeddings", # bert nomic-bert
19
+ "language_model.embedding.word_embeddings", # persimmon
20
+ "wte", # gpt2
21
+ "transformer.embd.wte", # phi2
22
+ "model.tok_embeddings", # internlm2
23
+ "model.embedding", # mamba-qbert
24
+ "backbone.embedding", # mamba
25
+ "backbone.embeddings", # mamba-hf
26
+ "transformer.in_out_embed", # Grok
27
+ ),
28
+
29
+ # Token type embeddings
30
+ MODEL_TENSOR.TOKEN_TYPES: (
31
+ "embeddings.token_type_embeddings", # bert nomic-bert
32
+ ),
33
+
34
+ # Normalization of token embeddings
35
+ MODEL_TENSOR.TOKEN_EMBD_NORM: (
36
+ "word_embeddings_layernorm", # bloom
37
+ "embeddings.LayerNorm", # bert
38
+ "emb_ln", # nomic-bert
39
+ ),
40
+
41
+ # Position embeddings
42
+ MODEL_TENSOR.POS_EMBD: (
43
+ "transformer.wpe", # gpt2
44
+ "embeddings.position_embeddings", # bert
45
+ "wpe", # gpt2
46
+ ),
47
+
48
+ # Output
49
+ MODEL_TENSOR.OUTPUT: (
50
+ "embed_out", # gptneox
51
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
52
+ "output", # llama-pth bloom internlm2
53
+ "word_embeddings_for_head", # persimmon
54
+ "lm_head.linear", # phi2
55
+ ),
56
+
57
+ # Output norm
58
+ MODEL_TENSOR.OUTPUT_NORM: (
59
+ "gpt_neox.final_layer_norm", # gptneox
60
+ "transformer.ln_f", # gpt2 gpt-j falcon
61
+ "model.norm", # llama-hf baichuan internlm2
62
+ "norm", # llama-pth
63
+ "transformer.norm_f", # mpt dbrx
64
+ "ln_f", # refact bloom qwen gpt2
65
+ "language_model.encoder.final_layernorm", # persimmon
66
+ "model.final_layernorm", # persimmon
67
+ "lm_head.ln", # phi2
68
+ "model.norm_f", # mamba-qbert
69
+ "backbone.norm_f", # mamba
70
+ "transformer.rms_norm", # Grok
71
+ ),
72
+
73
+ # Rope frequencies
74
+ MODEL_TENSOR.ROPE_FREQS: (
75
+ "rope.freqs", # llama-pth
76
+ ),
77
+ }
78
+
79
+ block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
80
+ # Attention norm
81
+ MODEL_TENSOR.ATTN_NORM: (
82
+ "gpt_neox.layers.{bid}.input_layernorm", # gptneox
83
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
84
+ "transformer.blocks.{bid}.norm_1", # mpt
85
+ "transformer.h.{bid}.input_layernorm", # falcon7b
86
+ "h.{bid}.input_layernorm", # bloom
87
+ "transformer.h.{bid}.ln_mlp", # falcon40b
88
+ "model.layers.{bid}.input_layernorm", # llama-hf
89
+ "layers.{bid}.attention_norm", # llama-pth
90
+ "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
91
+ "model.layers.{bid}.ln1", # yi
92
+ "h.{bid}.ln_1", # gpt2
93
+ "transformer.h.{bid}.ln", # phi2
94
+ "model.layers.layers.{bid}.norm", # plamo
95
+ "model.layers.{bid}.attention_norm", # internlm2
96
+ "model.layers.{bid}.norm", # mamba-qbert
97
+ "backbone.layers.{bid}.norm", # mamba
98
+ "transformer.decoder_layer.{bid}.rms_norm", # Grok
99
+ "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
100
+ ),
101
+
102
+ # Attention norm 2
103
+ MODEL_TENSOR.ATTN_NORM_2: (
104
+ "transformer.h.{bid}.ln_attn", # falcon40b
105
+ ),
106
+
107
+ # Attention query-key-value
108
+ MODEL_TENSOR.ATTN_QKV: (
109
+ "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
110
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
111
+ "transformer.blocks.{bid}.attn.Wqkv", # mpt
112
+ "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
113
+ "transformer.h.{bid}.self_attention.query_key_value", # falcon
114
+ "h.{bid}.self_attention.query_key_value", # bloom
115
+ "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
116
+ "model.layers.{bid}.self_attn.query_key_value", # persimmon
117
+ "h.{bid}.attn.c_attn", # gpt2
118
+ "transformer.h.{bid}.mixer.Wqkv", # phi2
119
+ "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120
+ "model.layers.{bid}.self_attn.qkv_proj" # phi3
121
+ ),
122
+
123
+ # Attention query
124
+ MODEL_TENSOR.ATTN_Q: (
125
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf
126
+ "layers.{bid}.attention.wq", # llama-pth
127
+ "encoder.layer.{bid}.attention.self.query", # bert
128
+ "transformer.h.{bid}.attn.q_proj", # gpt-j
129
+ "model.layers.layers.{bid}.self_attn.q_proj", # plamo
130
+ "model.layers.{bid}.attention.wq", # internlm2
131
+ "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
132
+ ),
133
+
134
+ # Attention key
135
+ MODEL_TENSOR.ATTN_K: (
136
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf
137
+ "layers.{bid}.attention.wk", # llama-pth
138
+ "encoder.layer.{bid}.attention.self.key", # bert
139
+ "transformer.h.{bid}.attn.k_proj", # gpt-j
140
+ "transformer.h.{bid}.attn.k", # refact
141
+ "model.layers.layers.{bid}.self_attn.k_proj", # plamo
142
+ "model.layers.{bid}.attention.wk", # internlm2
143
+ "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
144
+ ),
145
+
146
+ # Attention value
147
+ MODEL_TENSOR.ATTN_V: (
148
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf
149
+ "layers.{bid}.attention.wv", # llama-pth
150
+ "encoder.layer.{bid}.attention.self.value", # bert
151
+ "transformer.h.{bid}.attn.v_proj", # gpt-j
152
+ "transformer.h.{bid}.attn.v", # refact
153
+ "model.layers.layers.{bid}.self_attn.v_proj", # plamo
154
+ "model.layers.{bid}.attention.wv", # internlm2
155
+ "transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
156
+ ),
157
+
158
+ # Attention output
159
+ MODEL_TENSOR.ATTN_OUT: (
160
+ "gpt_neox.layers.{bid}.attention.dense", # gptneox
161
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
162
+ "transformer.blocks.{bid}.attn.out_proj", # mpt
163
+ "transformer.h.{bid}.self_attention.dense", # falcon
164
+ "h.{bid}.self_attention.dense", # bloom
165
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf
166
+ "layers.{bid}.attention.wo", # llama-pth
167
+ "encoder.layer.{bid}.attention.output.dense", # bert
168
+ "transformer.h.{bid}.attn.out_proj", # gpt-j
169
+ "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
170
+ "model.layers.{bid}.self_attn.dense", # persimmon
171
+ "h.{bid}.attn.c_proj", # gpt2
172
+ "transformer.h.{bid}.mixer.out_proj", # phi2
173
+ "model.layers.layers.{bid}.self_attn.o_proj", # plamo
174
+ "model.layers.{bid}.attention.wo", # internlm2
175
+ "encoder.layers.{bid}.attn.out_proj", # nomic-bert
176
+ "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
177
+ "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
178
+ ),
179
+
180
+ # Attention output norm
181
+ MODEL_TENSOR.ATTN_OUT_NORM: (
182
+ "encoder.layer.{bid}.attention.output.LayerNorm", # bert
183
+ "encoder.layers.{bid}.norm1", # nomic-bert
184
+ "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
185
+ "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
186
+ ),
187
+
188
+ # Rotary embeddings
189
+ MODEL_TENSOR.ATTN_ROT_EMBD: (
190
+ "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
191
+ "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
192
+ "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
193
+ "transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
194
+ ),
195
+
196
+ # Feed-forward norm
197
+ MODEL_TENSOR.FFN_NORM: (
198
+ "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
199
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen
200
+ "h.{bid}.post_attention_layernorm", # bloom
201
+ "transformer.blocks.{bid}.norm_2", # mpt
202
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf
203
+ "layers.{bid}.ffn_norm", # llama-pth
204
+ "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
205
+ "model.layers.{bid}.ln2", # yi
206
+ "h.{bid}.ln_2", # gpt2
207
+ "model.layers.{bid}.ffn_norm", # internlm2
208
+ "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
209
+ ),
210
+
211
+ MODEL_TENSOR.FFN_GATE_INP: (
212
+ "layers.{bid}.feed_forward.gate", # mixtral
213
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral
214
+ "model.layers.{bid}.mlp.gate", # qwen2moe
215
+ "transformer.decoder_layer.{bid}.router", # Grok
216
+ "transformer.blocks.{bid}.ffn.router.layer", # dbrx
217
+ ),
218
+
219
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
220
+ "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
221
+ ),
222
+
223
+ # Feed-forward up
224
+ MODEL_TENSOR.FFN_UP: (
225
+ "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
226
+ "transformer.h.{bid}.mlp.c_fc", # gpt2
227
+ "transformer.blocks.{bid}.ffn.up_proj", # mpt
228
+ "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
229
+ "h.{bid}.mlp.dense_h_to_4h", # bloom
230
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact
231
+ "layers.{bid}.feed_forward.w3", # llama-pth
232
+ "encoder.layer.{bid}.intermediate.dense", # bert
233
+ "transformer.h.{bid}.mlp.fc_in", # gpt-j
234
+ "transformer.h.{bid}.mlp.linear_3", # refact
235
+ "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
236
+ "model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
237
+ "transformer.h.{bid}.mlp.w1", # qwen
238
+ "h.{bid}.mlp.c_fc", # gpt2
239
+ "transformer.h.{bid}.mlp.fc1", # phi2
240
+ "model.layers.{bid}.mlp.fc1", # phi2
241
+ "model.layers.{bid}.mlp.gate_up_proj", # phi3
242
+ "model.layers.layers.{bid}.mlp.up_proj", # plamo
243
+ "model.layers.{bid}.feed_forward.w3", # internlm2
244
+ "encoder.layers.{bid}.mlp.fc11", # nomic-bert
245
+ "model.layers.{bid}.mlp.c_fc", # starcoder2
246
+ "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
247
+ "model.layers.{bid}.residual_mlp.w3", # arctic
248
+ ),
249
+
250
+ MODEL_TENSOR.FFN_UP_EXP: (
251
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
252
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
253
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
254
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
255
+ ),
256
+
257
+ MODEL_TENSOR.FFN_UP_SHEXP: (
258
+ "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
259
+ ),
260
+
261
+ # AWQ-activation gate
262
+ MODEL_TENSOR.FFN_ACT: (
263
+ "transformer.blocks.{bid}.ffn.act", # mpt
264
+ ),
265
+
266
+ # Feed-forward gate
267
+ MODEL_TENSOR.FFN_GATE: (
268
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
269
+ "layers.{bid}.feed_forward.w1", # llama-pth
270
+ "transformer.h.{bid}.mlp.w2", # qwen
271
+ "model.layers.layers.{bid}.mlp.gate_proj", # plamo
272
+ "model.layers.{bid}.feed_forward.w1", # internlm2
273
+ "encoder.layers.{bid}.mlp.fc12", # nomic-bert
274
+ "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
275
+ "transformer.h.{bid}.mlp.linear_1", # refact
276
+ "model.layers.{bid}.residual_mlp.w1", # arctic
277
+ ),
278
+
279
+ MODEL_TENSOR.FFN_GATE_EXP: (
280
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
281
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
282
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
283
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
284
+ ),
285
+
286
+ MODEL_TENSOR.FFN_GATE_SHEXP: (
287
+ "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
288
+ ),
289
+
290
+ # Feed-forward down
291
+ MODEL_TENSOR.FFN_DOWN: (
292
+ "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
293
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
294
+ "transformer.blocks.{bid}.ffn.down_proj", # mpt
295
+ "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
296
+ "h.{bid}.mlp.dense_4h_to_h", # bloom
297
+ "model.layers.{bid}.mlp.down_proj", # llama-hf
298
+ "layers.{bid}.feed_forward.w2", # llama-pth
299
+ "encoder.layer.{bid}.output.dense", # bert
300
+ "transformer.h.{bid}.mlp.fc_out", # gpt-j
301
+ "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
302
+ "model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
303
+ "h.{bid}.mlp.c_proj", # gpt2
304
+ "transformer.h.{bid}.mlp.fc2", # phi2
305
+ "model.layers.{bid}.mlp.fc2", # phi2
306
+ "model.layers.layers.{bid}.mlp.down_proj", # plamo
307
+ "model.layers.{bid}.feed_forward.w2", # internlm2
308
+ "encoder.layers.{bid}.mlp.fc2", # nomic-bert
309
+ "model.layers.{bid}.mlp.c_proj", # starcoder2
310
+ "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
311
+ "model.layers.{bid}.residual_mlp.w2", # arctic
312
+ ),
313
+
314
+ MODEL_TENSOR.FFN_DOWN_EXP: (
315
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
316
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
317
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
318
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
319
+ ),
320
+
321
+ MODEL_TENSOR.FFN_DOWN_SHEXP: (
322
+ "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
323
+ ),
324
+
325
+ MODEL_TENSOR.ATTN_Q_NORM: (
326
+ "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
327
+ "model.layers.{bid}.self_attn.q_layernorm", # persimmon
328
+ "model.layers.{bid}.self_attn.q_norm", # cohere
329
+ "transformer.blocks.{bid}.attn.q_ln", # sea-lion
330
+ "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
331
+ ),
332
+
333
+ MODEL_TENSOR.ATTN_K_NORM: (
334
+ "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
335
+ "model.layers.{bid}.self_attn.k_layernorm", # persimmon
336
+ "model.layers.{bid}.self_attn.k_norm", # cohere
337
+ "transformer.blocks.{bid}.attn.k_ln", # sea-lion
338
+ "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
339
+ ),
340
+
341
+ MODEL_TENSOR.ROPE_FREQS: (
342
+ "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
343
+ ),
344
+
345
+ MODEL_TENSOR.LAYER_OUT_NORM: (
346
+ "encoder.layer.{bid}.output.LayerNorm", # bert
347
+ "encoder.layers.{bid}.norm2", # nomic-bert
348
+ "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
349
+ "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
350
+ ),
351
+
352
+ MODEL_TENSOR.SSM_IN: (
353
+ "model.layers.{bid}.in_proj",
354
+ "backbone.layers.{bid}.mixer.in_proj",
355
+ ),
356
+
357
+ MODEL_TENSOR.SSM_CONV1D: (
358
+ "model.layers.{bid}.conv1d",
359
+ "backbone.layers.{bid}.mixer.conv1d",
360
+ ),
361
+
362
+ MODEL_TENSOR.SSM_X: (
363
+ "model.layers.{bid}.x_proj",
364
+ "backbone.layers.{bid}.mixer.x_proj",
365
+ ),
366
+
367
+ MODEL_TENSOR.SSM_DT: (
368
+ "model.layers.{bid}.dt_proj",
369
+ "backbone.layers.{bid}.mixer.dt_proj",
370
+ ),
371
+
372
+ MODEL_TENSOR.SSM_A: (
373
+ "model.layers.{bid}.A_log",
374
+ "backbone.layers.{bid}.mixer.A_log",
375
+ ),
376
+
377
+ MODEL_TENSOR.SSM_D: (
378
+ "model.layers.{bid}.D",
379
+ "backbone.layers.{bid}.mixer.D",
380
+ ),
381
+
382
+ MODEL_TENSOR.SSM_OUT: (
383
+ "model.layers.{bid}.out_proj",
384
+ "backbone.layers.{bid}.mixer.out_proj",
385
+ ),
386
+ }
387
+
388
+ # architecture-specific block mappings
389
+ arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
390
+ MODEL_ARCH.ARCTIC: {
391
+ MODEL_TENSOR.FFN_NORM: (
392
+ "model.layers.{bid}.residual_layernorm",
393
+ ),
394
+ MODEL_TENSOR.FFN_NORM_EXP: (
395
+ "model.layers.{bid}.post_attention_layernorm",
396
+ ),
397
+ },
398
+ }
399
+
400
+ mapping: dict[str, tuple[MODEL_TENSOR, str]]
401
+
402
+ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
403
+ self.mapping = {}
404
+ for tensor, keys in self.mappings_cfg.items():
405
+ if tensor not in MODEL_TENSORS[arch]:
406
+ continue
407
+ tensor_name = TENSOR_NAMES[tensor]
408
+ self.mapping[tensor_name] = (tensor, tensor_name)
409
+ for key in keys:
410
+ self.mapping[key] = (tensor, tensor_name)
411
+ if arch in self.arch_block_mappings_cfg:
412
+ self.block_mappings_cfg.update(self.arch_block_mappings_cfg[arch])
413
+ for bid in range(n_blocks):
414
+ for tensor, keys in self.block_mappings_cfg.items():
415
+ if tensor not in MODEL_TENSORS[arch]:
416
+ continue
417
+ # TODO: make this configurable
418
+ n_experts = 128
419
+ for xid in range(n_experts):
420
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
421
+ self.mapping[tensor_name] = (tensor, tensor_name)
422
+ for key in keys:
423
+ key = key.format(bid = bid, xid = xid)
424
+ self.mapping[key] = (tensor, tensor_name)
425
+
426
+ def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
427
+ result = self.mapping.get(key)
428
+ if result is not None:
429
+ return result
430
+ for suffix in try_suffixes:
431
+ if key.endswith(suffix):
432
+ result = self.mapping.get(key[:-len(suffix)])
433
+ if result is not None:
434
+ return result[0], result[1] + suffix
435
+ return None
436
+
437
+ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
438
+ result = self.get_type_and_name(key, try_suffixes = try_suffixes)
439
+ if result is None:
440
+ return None
441
+ return result[1]
442
+
443
+ def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
444
+ result = self.get_type_and_name(key, try_suffixes = try_suffixes)
445
+ if result is None:
446
+ return None
447
+ return result[0]
448
+
449
+ def __getitem__(self, key: str) -> str:
450
+ try:
451
+ return self.mapping[key][1]
452
+ except KeyError:
453
+ raise KeyError(key)
454
+
455
+ def __contains__(self, key: str) -> bool:
456
+ return key in self.mapping
457
+
458
+ def __repr__(self) -> str:
459
+ return repr(self.mapping)
460
+
461
+
462
+ def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
463
+ return TensorNameMap(arch, n_blocks)
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Any, Callable, Sequence, Mapping, Iterable
8
+
9
+ from .gguf_writer import GGUFWriter
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class SpecialVocab:
15
+ merges: list[str]
16
+ add_special_token: dict[str, bool]
17
+ special_token_ids: dict[str, int]
18
+ chat_template: str | Sequence[Mapping[str, str]] | None
19
+
20
+ def __init__(
21
+ self, path: str | os.PathLike[str], load_merges: bool = False,
22
+ special_token_types: Iterable[str] | None = None,
23
+ n_vocab: int | None = None,
24
+ ):
25
+ self.special_token_ids = {}
26
+ self.add_special_token = {}
27
+ self.n_vocab = n_vocab
28
+ self.load_merges = load_merges
29
+ self.merges = []
30
+ self.chat_template = None
31
+ if special_token_types is not None:
32
+ self.special_token_types = special_token_types
33
+ else:
34
+ self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
35
+ self._load(Path(path))
36
+
37
+ def __repr__(self) -> str:
38
+ return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
39
+ len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
40
+ )
41
+
42
+ def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
43
+ if self.merges:
44
+ if not quiet:
45
+ logger.info(f'Adding {len(self.merges)} merge(s).')
46
+ gw.add_token_merges(self.merges)
47
+ elif self.load_merges:
48
+ logger.warning('Adding merges requested but no merges found, output may be non-functional.')
49
+ for typ, tokid in self.special_token_ids.items():
50
+ id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
51
+ if id_handler is None:
52
+ logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
53
+ continue
54
+ if not quiet:
55
+ logger.info(f'Setting special token type {typ} to {tokid}')
56
+ id_handler(tokid)
57
+ for typ, value in self.add_special_token.items():
58
+ add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
59
+ if add_handler is None:
60
+ logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
61
+ continue
62
+ if not quiet:
63
+ logger.info(f'Setting add_{typ}_token to {value}')
64
+ add_handler(value)
65
+ if self.chat_template is not None:
66
+ if not quiet:
67
+ logger.info(f'Setting chat_template to {self.chat_template}')
68
+ gw.add_chat_template(self.chat_template)
69
+
70
+ def _load(self, path: Path) -> None:
71
+ self._try_load_from_tokenizer_json(path)
72
+ self._try_load_from_config_json(path)
73
+ if self.load_merges and not self.merges:
74
+ self._try_load_merges_txt(path)
75
+
76
+ def _try_load_merges_txt(self, path: Path) -> bool:
77
+ merges_file = path / 'merges.txt'
78
+ if not merges_file.is_file():
79
+ return False
80
+ with open(merges_file, 'r', encoding = 'utf-8') as fp:
81
+ first_line = next(fp, '').strip()
82
+ if not first_line.startswith('#'):
83
+ fp.seek(0)
84
+ line_num = 0
85
+ else:
86
+ line_num = 1
87
+ merges = []
88
+ for line in fp:
89
+ line_num += 1
90
+ line = line.strip()
91
+ if not line:
92
+ continue
93
+ parts = line.split(None, 3)
94
+ if len(parts) != 2:
95
+ logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
96
+ continue
97
+ merges.append(f'{parts[0]} {parts[1]}')
98
+ self.merges = merges
99
+ return True
100
+
101
+ def _set_special_token(self, typ: str, tid: Any) -> None:
102
+ if not isinstance(tid, int):
103
+ return
104
+ if tid < 0:
105
+ raise ValueError(f'invalid value for special token type {typ}: {tid}')
106
+ if self.n_vocab is None or tid < self.n_vocab:
107
+ if typ in self.special_token_ids:
108
+ return
109
+ self.special_token_ids[typ] = tid
110
+ return
111
+ logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
112
+
113
+ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
114
+ tokenizer_file = path / 'tokenizer.json'
115
+ if tokenizer_file.is_file():
116
+ with open(tokenizer_file, encoding = 'utf-8') as f:
117
+ tokenizer = json.load(f)
118
+ if self.load_merges:
119
+ merges = tokenizer.get('model', {}).get('merges')
120
+ if isinstance(merges, list) and merges and isinstance(merges[0], str):
121
+ self.merges = merges
122
+ added_tokens = tokenizer.get('added_tokens', {})
123
+ else:
124
+ added_tokens = {}
125
+ tokenizer_config_file = path / 'tokenizer_config.json'
126
+ if not tokenizer_config_file.is_file():
127
+ return True
128
+ with open(tokenizer_config_file, encoding = 'utf-8') as f:
129
+ tokenizer_config = json.load(f)
130
+ chat_template = tokenizer_config.get('chat_template')
131
+ if chat_template is None or isinstance(chat_template, (str, list)):
132
+ self.chat_template = chat_template
133
+ else:
134
+ logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
135
+ for typ in self.special_token_types:
136
+ add_entry = tokenizer_config.get(f'add_{typ}_token')
137
+ if isinstance(add_entry, bool):
138
+ self.add_special_token[typ] = add_entry
139
+ entry = tokenizer_config.get(f'{typ}_token')
140
+ if isinstance(entry, str):
141
+ tc_content = entry
142
+ elif isinstance(entry, dict):
143
+ entry_content = entry.get('content')
144
+ if not isinstance(entry_content, str):
145
+ continue
146
+ tc_content = entry_content
147
+ else:
148
+ continue
149
+ # We only need the first match here.
150
+ maybe_token_id = next(
151
+ (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
152
+ None,
153
+ )
154
+ self._set_special_token(typ, maybe_token_id)
155
+ return True
156
+
157
+ def _try_load_from_config_json(self, path: Path) -> bool:
158
+ config_file = path / 'config.json'
159
+ if not config_file.is_file():
160
+ return False
161
+ with open(config_file, encoding = 'utf-8') as f:
162
+ config = json.load(f)
163
+ for typ in self.special_token_types:
164
+ self._set_special_token(typ, config.get(f'{typ}_token_id'))
165
+ return True
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file