bigdl-core-cpp 2.5.0b20240421__py3-none-win_amd64.whl → 2.5.0b20240423__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. bigdl/cpp/convert.py +276 -189
  2. bigdl/cpp/gguf-py/__init__.py +0 -0
  3. bigdl/cpp/gguf-py/gguf/__init__.py +5 -0
  4. bigdl/cpp/gguf-py/gguf/constants.py +943 -0
  5. bigdl/cpp/gguf-py/gguf/gguf.py +15 -0
  6. bigdl/cpp/gguf-py/gguf/gguf_reader.py +279 -0
  7. bigdl/cpp/gguf-py/gguf/gguf_writer.py +518 -0
  8. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +434 -0
  9. bigdl/cpp/gguf-py/gguf/vocab.py +181 -0
  10. bigdl/cpp/libs/baby-llama.exe +0 -0
  11. bigdl/cpp/libs/batched-bench.exe +0 -0
  12. bigdl/cpp/libs/batched.exe +0 -0
  13. bigdl/cpp/libs/beam-search.exe +0 -0
  14. bigdl/cpp/libs/benchmark.exe +0 -0
  15. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  16. bigdl/cpp/libs/embedding.exe +0 -0
  17. bigdl/cpp/libs/export-lora.exe +0 -0
  18. bigdl/cpp/libs/finetune.exe +0 -0
  19. bigdl/cpp/libs/gguf.exe +0 -0
  20. bigdl/cpp/libs/gritlm.exe +0 -0
  21. bigdl/cpp/libs/imatrix.exe +0 -0
  22. bigdl/cpp/libs/infill.exe +0 -0
  23. bigdl/cpp/libs/llama-bench.exe +0 -0
  24. bigdl/cpp/libs/llava-cli.exe +0 -0
  25. bigdl/cpp/libs/lookahead.exe +0 -0
  26. bigdl/cpp/libs/lookup.exe +0 -0
  27. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  28. bigdl/cpp/libs/main.exe +0 -0
  29. bigdl/cpp/libs/ollama.exe +0 -0
  30. bigdl/cpp/libs/parallel.exe +0 -0
  31. bigdl/cpp/libs/passkey.exe +0 -0
  32. bigdl/cpp/libs/perplexity.exe +0 -0
  33. bigdl/cpp/libs/q8dot.exe +0 -0
  34. bigdl/cpp/libs/quantize-stats.exe +0 -0
  35. bigdl/cpp/libs/quantize.exe +0 -0
  36. bigdl/cpp/libs/save-load-state.exe +0 -0
  37. bigdl/cpp/libs/server.exe +0 -0
  38. bigdl/cpp/libs/simple.exe +0 -0
  39. bigdl/cpp/libs/speculative.exe +0 -0
  40. bigdl/cpp/libs/tokenize.exe +0 -0
  41. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  42. bigdl/cpp/libs/vdot.exe +0 -0
  43. {bigdl_core_cpp-2.5.0b20240421.data → bigdl_core_cpp-2.5.0b20240423.data}/scripts/init-llama-cpp.bat +1 -0
  44. {bigdl_core_cpp-2.5.0b20240421.dist-info → bigdl_core_cpp-2.5.0b20240423.dist-info}/METADATA +3 -3
  45. bigdl_core_cpp-2.5.0b20240423.dist-info/RECORD +50 -0
  46. bigdl_core_cpp-2.5.0b20240421.dist-info/RECORD +0 -42
  47. {bigdl_core_cpp-2.5.0b20240421.data → bigdl_core_cpp-2.5.0b20240423.data}/scripts/init-llama-cpp.ps1 +0 -0
  48. {bigdl_core_cpp-2.5.0b20240421.data → bigdl_core_cpp-2.5.0b20240423.data}/scripts/init-ollama.bat +0 -0
  49. {bigdl_core_cpp-2.5.0b20240421.dist-info → bigdl_core_cpp-2.5.0b20240423.dist-info}/WHEEL +0 -0
  50. {bigdl_core_cpp-2.5.0b20240421.dist-info → bigdl_core_cpp-2.5.0b20240423.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,434 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Sequence
4
+
5
+ from .constants import MODEL_ARCH, MODEL_TENSOR, MODEL_TENSORS, TENSOR_NAMES
6
+
7
+
8
+ class TensorNameMap:
9
+ mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
10
+ # Token embeddings
11
+ MODEL_TENSOR.TOKEN_EMBD: (
12
+ "gpt_neox.embed_in", # gptneox
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
14
+ "transformer.word_embeddings", # falcon
15
+ "word_embeddings", # bloom
16
+ "model.embed_tokens", # llama-hf
17
+ "tok_embeddings", # llama-pth
18
+ "embeddings.word_embeddings", # bert nomic-bert
19
+ "language_model.embedding.word_embeddings", # persimmon
20
+ "wte", # gpt2
21
+ "transformer.embd.wte", # phi2
22
+ "model.tok_embeddings", # internlm2
23
+ "model.embedding", # mamba-qbert
24
+ "backbone.embedding", # mamba
25
+ "backbone.embeddings", # mamba-hf
26
+ "transformer.in_out_embed", # Grok
27
+ ),
28
+
29
+ # Token type embeddings
30
+ MODEL_TENSOR.TOKEN_TYPES: (
31
+ "embeddings.token_type_embeddings", # bert nomic-bert
32
+ ),
33
+
34
+ # Normalization of token embeddings
35
+ MODEL_TENSOR.TOKEN_EMBD_NORM: (
36
+ "word_embeddings_layernorm", # bloom
37
+ "embeddings.LayerNorm", # bert
38
+ "emb_ln", # nomic-bert
39
+ ),
40
+
41
+ # Position embeddings
42
+ MODEL_TENSOR.POS_EMBD: (
43
+ "transformer.wpe", # gpt2
44
+ "embeddings.position_embeddings", # bert
45
+ "wpe", # gpt2
46
+ ),
47
+
48
+ # Output
49
+ MODEL_TENSOR.OUTPUT: (
50
+ "embed_out", # gptneox
51
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
52
+ "output", # llama-pth bloom internlm2
53
+ "word_embeddings_for_head", # persimmon
54
+ "lm_head.linear", # phi2
55
+ ),
56
+
57
+ # Output norm
58
+ MODEL_TENSOR.OUTPUT_NORM: (
59
+ "gpt_neox.final_layer_norm", # gptneox
60
+ "transformer.ln_f", # gpt2 gpt-j falcon
61
+ "model.norm", # llama-hf baichuan internlm2
62
+ "norm", # llama-pth
63
+ "transformer.norm_f", # mpt dbrx
64
+ "ln_f", # refact bloom qwen gpt2
65
+ "language_model.encoder.final_layernorm", # persimmon
66
+ "model.final_layernorm", # persimmon
67
+ "lm_head.ln", # phi2
68
+ "model.norm_f", # mamba-qbert
69
+ "backbone.norm_f", # mamba
70
+ "transformer.rms_norm", # Grok
71
+ ),
72
+
73
+ # Rope frequencies
74
+ MODEL_TENSOR.ROPE_FREQS: (
75
+ "rope.freqs", # llama-pth
76
+ ),
77
+ }
78
+
79
+ block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
80
+ # Attention norm
81
+ MODEL_TENSOR.ATTN_NORM: (
82
+ "gpt_neox.layers.{bid}.input_layernorm", # gptneox
83
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
84
+ "transformer.blocks.{bid}.norm_1", # mpt
85
+ "transformer.h.{bid}.input_layernorm", # falcon7b
86
+ "h.{bid}.input_layernorm", # bloom
87
+ "transformer.h.{bid}.ln_mlp", # falcon40b
88
+ "model.layers.{bid}.input_layernorm", # llama-hf
89
+ "layers.{bid}.attention_norm", # llama-pth
90
+ "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
91
+ "model.layers.{bid}.ln1", # yi
92
+ "h.{bid}.ln_1", # gpt2
93
+ "transformer.h.{bid}.ln", # phi2
94
+ "model.layers.layers.{bid}.norm", # plamo
95
+ "model.layers.{bid}.attention_norm", # internlm2
96
+ "model.layers.{bid}.norm", # mamba-qbert
97
+ "backbone.layers.{bid}.norm", # mamba
98
+ "transformer.decoder_layer.{bid}.rms_norm", # Grok
99
+ "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
100
+ ),
101
+
102
+ # Attention norm 2
103
+ MODEL_TENSOR.ATTN_NORM_2: (
104
+ "transformer.h.{bid}.ln_attn", # falcon40b
105
+ ),
106
+
107
+ # Attention query-key-value
108
+ MODEL_TENSOR.ATTN_QKV: (
109
+ "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
110
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
111
+ "transformer.blocks.{bid}.attn.Wqkv", # mpt
112
+ "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
113
+ "transformer.h.{bid}.self_attention.query_key_value", # falcon
114
+ "h.{bid}.self_attention.query_key_value", # bloom
115
+ "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
116
+ "model.layers.{bid}.self_attn.query_key_value", # persimmon
117
+ "h.{bid}.attn.c_attn", # gpt2
118
+ "transformer.h.{bid}.mixer.Wqkv", # phi2
119
+ "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120
+ ),
121
+
122
+ # Attention query
123
+ MODEL_TENSOR.ATTN_Q: (
124
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf
125
+ "layers.{bid}.attention.wq", # llama-pth
126
+ "encoder.layer.{bid}.attention.self.query", # bert
127
+ "transformer.h.{bid}.attn.q_proj", # gpt-j
128
+ "model.layers.layers.{bid}.self_attn.q_proj", # plamo
129
+ "model.layers.{bid}.attention.wq", # internlm2
130
+ "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
131
+ ),
132
+
133
+ # Attention key
134
+ MODEL_TENSOR.ATTN_K: (
135
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf
136
+ "layers.{bid}.attention.wk", # llama-pth
137
+ "encoder.layer.{bid}.attention.self.key", # bert
138
+ "transformer.h.{bid}.attn.k_proj", # gpt-j
139
+ "model.layers.layers.{bid}.self_attn.k_proj", # plamo
140
+ "model.layers.{bid}.attention.wk", # internlm2
141
+ "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
142
+ ),
143
+
144
+ # Attention value
145
+ MODEL_TENSOR.ATTN_V: (
146
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf
147
+ "layers.{bid}.attention.wv", # llama-pth
148
+ "encoder.layer.{bid}.attention.self.value", # bert
149
+ "transformer.h.{bid}.attn.v_proj", # gpt-j
150
+ "model.layers.layers.{bid}.self_attn.v_proj", # plamo
151
+ "model.layers.{bid}.attention.wv", # internlm2
152
+ "transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
153
+ ),
154
+
155
+ # Attention output
156
+ MODEL_TENSOR.ATTN_OUT: (
157
+ "gpt_neox.layers.{bid}.attention.dense", # gptneox
158
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
159
+ "transformer.blocks.{bid}.attn.out_proj", # mpt
160
+ "transformer.h.{bid}.self_attention.dense", # falcon
161
+ "h.{bid}.self_attention.dense", # bloom
162
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf
163
+ "layers.{bid}.attention.wo", # llama-pth
164
+ "encoder.layer.{bid}.attention.output.dense", # bert
165
+ "transformer.h.{bid}.attn.out_proj", # gpt-j
166
+ "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
167
+ "model.layers.{bid}.self_attn.dense", # persimmon
168
+ "h.{bid}.attn.c_proj", # gpt2
169
+ "transformer.h.{bid}.mixer.out_proj", # phi2
170
+ "model.layers.layers.{bid}.self_attn.o_proj", # plamo
171
+ "model.layers.{bid}.attention.wo", # internlm2
172
+ "encoder.layers.{bid}.attn.out_proj", # nomic-bert
173
+ "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
174
+ "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
175
+ ),
176
+
177
+ # Attention output norm
178
+ MODEL_TENSOR.ATTN_OUT_NORM: (
179
+ "encoder.layer.{bid}.attention.output.LayerNorm", # bert
180
+ "encoder.layers.{bid}.norm1", # nomic-bert
181
+ "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
182
+ "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
183
+ ),
184
+
185
+ # Rotary embeddings
186
+ MODEL_TENSOR.ATTN_ROT_EMBD: (
187
+ "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
188
+ "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
189
+ "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
190
+ "transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
191
+ ),
192
+
193
+ # Feed-forward norm
194
+ MODEL_TENSOR.FFN_NORM: (
195
+ "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
196
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen
197
+ "h.{bid}.post_attention_layernorm", # bloom
198
+ "transformer.blocks.{bid}.norm_2", # mpt
199
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf
200
+ "layers.{bid}.ffn_norm", # llama-pth
201
+ "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
202
+ "model.layers.{bid}.ln2", # yi
203
+ "h.{bid}.ln_2", # gpt2
204
+ "model.layers.{bid}.ffn_norm", # internlm2
205
+ "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
206
+ ),
207
+
208
+ MODEL_TENSOR.FFN_GATE_INP: (
209
+ "layers.{bid}.feed_forward.gate", # mixtral
210
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral
211
+ "model.layers.{bid}.mlp.gate", # qwen2moe
212
+ "transformer.decoder_layer.{bid}.router", # Grok
213
+ "transformer.blocks.{bid}.ffn.router.layer", # dbrx
214
+ ),
215
+
216
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
217
+ "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
218
+ ),
219
+
220
+ # Feed-forward up
221
+ MODEL_TENSOR.FFN_UP: (
222
+ "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
223
+ "transformer.h.{bid}.mlp.c_fc", # gpt2
224
+ "transformer.blocks.{bid}.ffn.up_proj", # mpt
225
+ "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
226
+ "h.{bid}.mlp.dense_h_to_4h", # bloom
227
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact
228
+ "layers.{bid}.feed_forward.w3", # llama-pth
229
+ "encoder.layer.{bid}.intermediate.dense", # bert
230
+ "transformer.h.{bid}.mlp.fc_in", # gpt-j
231
+ "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
232
+ "model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
233
+ "transformer.h.{bid}.mlp.w1", # qwen
234
+ "h.{bid}.mlp.c_fc", # gpt2
235
+ "transformer.h.{bid}.mlp.fc1", # phi2
236
+ "model.layers.{bid}.mlp.fc1", # phi2
237
+ "model.layers.layers.{bid}.mlp.up_proj", # plamo
238
+ "model.layers.{bid}.feed_forward.w3", # internlm2
239
+ "encoder.layers.{bid}.mlp.fc11", # nomic-bert
240
+ "model.layers.{bid}.mlp.c_fc", # starcoder2
241
+ ),
242
+
243
+ MODEL_TENSOR.FFN_UP_EXP: (
244
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
245
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
246
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
247
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
248
+ ),
249
+
250
+ MODEL_TENSOR.FFN_UP_SHEXP: (
251
+ "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
252
+ ),
253
+
254
+ # AWQ-activation gate
255
+ MODEL_TENSOR.FFN_ACT: (
256
+ "transformer.blocks.{bid}.ffn.act", # mpt
257
+ ),
258
+
259
+ # Feed-forward gate
260
+ MODEL_TENSOR.FFN_GATE: (
261
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
262
+ "layers.{bid}.feed_forward.w1", # llama-pth
263
+ "transformer.h.{bid}.mlp.w2", # qwen
264
+ "model.layers.layers.{bid}.mlp.gate_proj", # plamo
265
+ "model.layers.{bid}.feed_forward.w1", # internlm2
266
+ "encoder.layers.{bid}.mlp.fc12", # nomic-bert
267
+ ),
268
+
269
+ MODEL_TENSOR.FFN_GATE_EXP: (
270
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
271
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
272
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
273
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
274
+ ),
275
+
276
+ MODEL_TENSOR.FFN_GATE_SHEXP: (
277
+ "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
278
+ ),
279
+
280
+ # Feed-forward down
281
+ MODEL_TENSOR.FFN_DOWN: (
282
+ "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
283
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
284
+ "transformer.blocks.{bid}.ffn.down_proj", # mpt
285
+ "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
286
+ "h.{bid}.mlp.dense_4h_to_h", # bloom
287
+ "model.layers.{bid}.mlp.down_proj", # llama-hf
288
+ "layers.{bid}.feed_forward.w2", # llama-pth
289
+ "encoder.layer.{bid}.output.dense", # bert
290
+ "transformer.h.{bid}.mlp.fc_out", # gpt-j
291
+ "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
292
+ "model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
293
+ "h.{bid}.mlp.c_proj", # gpt2
294
+ "transformer.h.{bid}.mlp.fc2", # phi2
295
+ "model.layers.{bid}.mlp.fc2", # phi2
296
+ "model.layers.layers.{bid}.mlp.down_proj", # plamo
297
+ "model.layers.{bid}.feed_forward.w2", # internlm2
298
+ "encoder.layers.{bid}.mlp.fc2", # nomic-bert
299
+ "model.layers.{bid}.mlp.c_proj", # starcoder2
300
+ ),
301
+
302
+ MODEL_TENSOR.FFN_DOWN_EXP: (
303
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
304
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
305
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
306
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
307
+ ),
308
+
309
+ MODEL_TENSOR.FFN_DOWN_SHEXP: (
310
+ "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
311
+ ),
312
+
313
+ MODEL_TENSOR.ATTN_Q_NORM: (
314
+ "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
315
+ "model.layers.{bid}.self_attn.q_layernorm", # persimmon
316
+ "model.layers.{bid}.self_attn.q_norm", # cohere
317
+ "transformer.blocks.{bid}.attn.q_ln", # sea-lion
318
+ ),
319
+
320
+ MODEL_TENSOR.ATTN_K_NORM: (
321
+ "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
322
+ "model.layers.{bid}.self_attn.k_layernorm", # persimmon
323
+ "model.layers.{bid}.self_attn.k_norm", # cohere
324
+ "transformer.blocks.{bid}.attn.k_ln", # sea-lion
325
+ ),
326
+
327
+ MODEL_TENSOR.ROPE_FREQS: (
328
+ "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
329
+ ),
330
+
331
+ MODEL_TENSOR.LAYER_OUT_NORM: (
332
+ "encoder.layer.{bid}.output.LayerNorm", # bert
333
+ "encoder.layers.{bid}.norm2", # nomic-bert
334
+ "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
335
+ ),
336
+
337
+ MODEL_TENSOR.SSM_IN: (
338
+ "model.layers.{bid}.in_proj",
339
+ "backbone.layers.{bid}.mixer.in_proj",
340
+ ),
341
+
342
+ MODEL_TENSOR.SSM_CONV1D: (
343
+ "model.layers.{bid}.conv1d",
344
+ "backbone.layers.{bid}.mixer.conv1d",
345
+ ),
346
+
347
+ MODEL_TENSOR.SSM_X: (
348
+ "model.layers.{bid}.x_proj",
349
+ "backbone.layers.{bid}.mixer.x_proj",
350
+ ),
351
+
352
+ MODEL_TENSOR.SSM_DT: (
353
+ "model.layers.{bid}.dt_proj",
354
+ "backbone.layers.{bid}.mixer.dt_proj",
355
+ ),
356
+
357
+ MODEL_TENSOR.SSM_A: (
358
+ "model.layers.{bid}.A_log",
359
+ "backbone.layers.{bid}.mixer.A_log",
360
+ ),
361
+
362
+ MODEL_TENSOR.SSM_D: (
363
+ "model.layers.{bid}.D",
364
+ "backbone.layers.{bid}.mixer.D",
365
+ ),
366
+
367
+ MODEL_TENSOR.SSM_OUT: (
368
+ "model.layers.{bid}.out_proj",
369
+ "backbone.layers.{bid}.mixer.out_proj",
370
+ ),
371
+ }
372
+
373
+ mapping: dict[str, tuple[MODEL_TENSOR, str]]
374
+
375
+ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
376
+ self.mapping = {}
377
+ for tensor, keys in self.mappings_cfg.items():
378
+ if tensor not in MODEL_TENSORS[arch]:
379
+ continue
380
+ tensor_name = TENSOR_NAMES[tensor]
381
+ self.mapping[tensor_name] = (tensor, tensor_name)
382
+ for key in keys:
383
+ self.mapping[key] = (tensor, tensor_name)
384
+ for bid in range(n_blocks):
385
+ for tensor, keys in self.block_mappings_cfg.items():
386
+ if tensor not in MODEL_TENSORS[arch]:
387
+ continue
388
+ # TODO: make this configurable
389
+ n_experts = 60
390
+ for xid in range(n_experts):
391
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
392
+ self.mapping[tensor_name] = (tensor, tensor_name)
393
+ for key in keys:
394
+ key = key.format(bid = bid, xid = xid)
395
+ self.mapping[key] = (tensor, tensor_name)
396
+
397
+ def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
398
+ result = self.mapping.get(key)
399
+ if result is not None:
400
+ return result
401
+ for suffix in try_suffixes:
402
+ if key.endswith(suffix):
403
+ result = self.mapping.get(key[:-len(suffix)])
404
+ if result is not None:
405
+ return result[0], result[1] + suffix
406
+ return None
407
+
408
+ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
409
+ result = self.get_type_and_name(key, try_suffixes = try_suffixes)
410
+ if result is None:
411
+ return None
412
+ return result[1]
413
+
414
+ def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
415
+ result = self.get_type_and_name(key, try_suffixes = try_suffixes)
416
+ if result is None:
417
+ return None
418
+ return result[0]
419
+
420
+ def __getitem__(self, key: str) -> str:
421
+ try:
422
+ return self.mapping[key][1]
423
+ except KeyError:
424
+ raise KeyError(key)
425
+
426
+ def __contains__(self, key: str) -> bool:
427
+ return key in self.mapping
428
+
429
+ def __repr__(self) -> str:
430
+ return repr(self.mapping)
431
+
432
+
433
+ def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
434
+ return TensorNameMap(arch, n_blocks)
@@ -0,0 +1,181 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Any, Callable
8
+
9
+ from .gguf_writer import GGUFWriter
10
+
11
+
12
+ class SpecialVocab:
13
+ merges: list[str]
14
+ add_special_token: dict[str, bool]
15
+ special_token_ids: dict[str, int]
16
+ chat_template: str | None
17
+
18
+ def __init__(
19
+ self, path: str | os.PathLike[str], load_merges: bool = False,
20
+ special_token_types: tuple[str, ...] | None = None,
21
+ n_vocab: int | None = None,
22
+ ):
23
+ self.special_token_ids = {}
24
+ self.add_special_token = {}
25
+ self.n_vocab = n_vocab
26
+ self.load_merges = load_merges
27
+ self.merges = []
28
+ self.chat_template = None
29
+ if special_token_types is not None:
30
+ self.special_token_types = special_token_types
31
+ else:
32
+ self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
33
+ self._load(Path(path))
34
+
35
+ def __repr__(self) -> str:
36
+ return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
37
+ len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
38
+ )
39
+
40
+ def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
41
+ if self.merges:
42
+ if not quiet:
43
+ print(f'gguf: Adding {len(self.merges)} merge(s).')
44
+ gw.add_token_merges(self.merges)
45
+ elif self.load_merges:
46
+ print(
47
+ 'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
48
+ file = sys.stderr,
49
+ )
50
+ for typ, tokid in self.special_token_ids.items():
51
+ id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
52
+ if id_handler is None:
53
+ print(
54
+ f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
55
+ file = sys.stderr,
56
+ )
57
+ continue
58
+ if not quiet:
59
+ print(f'gguf: Setting special token type {typ} to {tokid}')
60
+ id_handler(tokid)
61
+ for typ, value in self.add_special_token.items():
62
+ add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
63
+ if add_handler is None:
64
+ print(
65
+ f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
66
+ file = sys.stderr,
67
+ )
68
+ continue
69
+ if not quiet:
70
+ print(f'gguf: Setting add_{typ}_token to {value}')
71
+ add_handler(value)
72
+ if self.chat_template is not None:
73
+ if not quiet:
74
+ print(f'gguf: Setting chat_template to {self.chat_template}')
75
+ gw.add_chat_template(self.chat_template)
76
+
77
+ def _load(self, path: Path) -> None:
78
+ self._try_load_from_tokenizer_json(path)
79
+ self._try_load_from_config_json(path)
80
+ if self.load_merges and not self.merges:
81
+ self._try_load_merges_txt(path)
82
+
83
+ def _try_load_merges_txt(self, path: Path) -> bool:
84
+ merges_file = path / 'merges.txt'
85
+ if not merges_file.is_file():
86
+ return False
87
+ with open(merges_file, 'r', encoding = 'utf-8') as fp:
88
+ first_line = next(fp, '').strip()
89
+ if not first_line.startswith('#'):
90
+ fp.seek(0)
91
+ line_num = 0
92
+ else:
93
+ line_num = 1
94
+ merges = []
95
+ for line in fp:
96
+ line_num += 1
97
+ line = line.strip()
98
+ if not line:
99
+ continue
100
+ parts = line.split(None, 3)
101
+ if len(parts) != 2:
102
+ print(
103
+ f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
104
+ file = sys.stderr,
105
+ )
106
+ continue
107
+ merges.append(f'{parts[0]} {parts[1]}')
108
+ self.merges = merges
109
+ return True
110
+
111
+ def _set_special_token(self, typ: str, tid: Any) -> None:
112
+ if not isinstance(tid, int):
113
+ return
114
+ if tid < 0:
115
+ raise ValueError(f'invalid value for special token type {typ}: {tid}')
116
+ if self.n_vocab is None or tid < self.n_vocab:
117
+ if typ in self.special_token_ids:
118
+ return
119
+ self.special_token_ids[typ] = tid
120
+ return
121
+ print(
122
+ f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
123
+ file = sys.stderr,
124
+ )
125
+
126
+ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
127
+ tokenizer_file = path / 'tokenizer.json'
128
+ if tokenizer_file.is_file():
129
+ with open(tokenizer_file, encoding = 'utf-8') as f:
130
+ tokenizer = json.load(f)
131
+ if self.load_merges:
132
+ merges = tokenizer.get('model', {}).get('merges')
133
+ if isinstance(merges, list) and merges and isinstance(merges[0], str):
134
+ self.merges = merges
135
+ added_tokens = tokenizer.get('added_tokens', {})
136
+ else:
137
+ added_tokens = {}
138
+ tokenizer_config_file = path / 'tokenizer_config.json'
139
+ if not tokenizer_config_file.is_file():
140
+ return True
141
+ with open(tokenizer_config_file, encoding = 'utf-8') as f:
142
+ tokenizer_config = json.load(f)
143
+ chat_template = tokenizer_config.get('chat_template')
144
+ if chat_template is None or isinstance(chat_template, (str, list)):
145
+ self.chat_template = chat_template
146
+ else:
147
+ print(
148
+ f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
149
+ file = sys.stderr
150
+ )
151
+ for typ in self.special_token_types:
152
+ add_entry = tokenizer_config.get(f'add_{typ}_token')
153
+ if isinstance(add_entry, bool):
154
+ self.add_special_token[typ] = add_entry
155
+ entry = tokenizer_config.get(f'{typ}_token')
156
+ if isinstance(entry, str):
157
+ tc_content = entry
158
+ elif isinstance(entry, dict):
159
+ entry_content = entry.get('content')
160
+ if not isinstance(entry_content, str):
161
+ continue
162
+ tc_content = entry_content
163
+ else:
164
+ continue
165
+ # We only need the first match here.
166
+ maybe_token_id = next(
167
+ (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
168
+ None,
169
+ )
170
+ self._set_special_token(typ, maybe_token_id)
171
+ return True
172
+
173
+ def _try_load_from_config_json(self, path: Path) -> bool:
174
+ config_file = path / 'config.json'
175
+ if not config_file.is_file():
176
+ return False
177
+ with open(config_file, encoding = 'utf-8') as f:
178
+ config = json.load(f)
179
+ for typ in self.special_token_types:
180
+ self._set_special_token(typ, config.get(f'{typ}_token_id'))
181
+ return True
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/gguf.exe CHANGED
Binary file
bigdl/cpp/libs/gritlm.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/infill.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/lookup.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/main.exe CHANGED
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file