bigdl-core-cpp 2.1.0b2__py3-none-win_amd64.whl → 2.1.0b20240820.post1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. bigdl/cpp/convert-hf-to-gguf.py +1174 -314
  2. bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
  3. bigdl/cpp/gguf-py/gguf/constants.py +463 -167
  4. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  5. bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +475 -156
  7. bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
  8. bigdl/cpp/gguf-py/gguf/metadata.py +503 -0
  9. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +209 -23
  10. bigdl/cpp/gguf-py/gguf/utility.py +69 -0
  11. bigdl/cpp/libs/baby-llama.exe +0 -0
  12. bigdl/cpp/libs/batched-bench.exe +0 -0
  13. bigdl/cpp/libs/batched.exe +0 -0
  14. bigdl/cpp/libs/beam-search.exe +0 -0
  15. bigdl/cpp/libs/benchmark.exe +0 -0
  16. bigdl/cpp/libs/common.lib +0 -0
  17. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  18. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  19. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  20. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  21. bigdl/cpp/libs/embedding.exe +0 -0
  22. bigdl/cpp/libs/export-lora.exe +0 -0
  23. bigdl/cpp/libs/finetune.exe +0 -0
  24. bigdl/cpp/libs/ggml_shared.dll +0 -0
  25. bigdl/cpp/libs/gguf.exe +0 -0
  26. bigdl/cpp/libs/gritlm.exe +0 -0
  27. bigdl/cpp/libs/imatrix.exe +0 -0
  28. bigdl/cpp/libs/infill.exe +0 -0
  29. bigdl/cpp/libs/llama-bench.exe +0 -0
  30. bigdl/cpp/libs/llama.dll +0 -0
  31. bigdl/cpp/libs/llava-cli.exe +0 -0
  32. bigdl/cpp/libs/llava_shared.dll +0 -0
  33. bigdl/cpp/libs/lookahead.exe +0 -0
  34. bigdl/cpp/libs/lookup.exe +0 -0
  35. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  36. bigdl/cpp/libs/main.exe +0 -0
  37. bigdl/cpp/libs/ollama.exe +0 -0
  38. bigdl/cpp/libs/parallel.exe +0 -0
  39. bigdl/cpp/libs/passkey.exe +0 -0
  40. bigdl/cpp/libs/perplexity.exe +0 -0
  41. bigdl/cpp/libs/q8dot.exe +0 -0
  42. bigdl/cpp/libs/quantize-stats.exe +0 -0
  43. bigdl/cpp/libs/quantize.exe +0 -0
  44. bigdl/cpp/libs/save-load-state.exe +0 -0
  45. bigdl/cpp/libs/server.exe +0 -0
  46. bigdl/cpp/libs/simple.exe +0 -0
  47. bigdl/cpp/libs/speculative.exe +0 -0
  48. bigdl/cpp/libs/tokenize.exe +0 -0
  49. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  50. bigdl/cpp/libs/vdot.exe +0 -0
  51. {bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/METADATA +8 -8
  52. bigdl_core_cpp-2.1.0b20240820.post1.dist-info/RECORD +63 -0
  53. {bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/WHEEL +1 -1
  54. bigdl_core_cpp-2.1.0b2.dist-info/RECORD +0 -61
  55. {bigdl_core_cpp-2.1.0b2.data → bigdl_core_cpp-2.1.0b20240820.post1.data}/scripts/init-llama-cpp.bat +0 -0
  56. {bigdl_core_cpp-2.1.0b2.data → bigdl_core_cpp-2.1.0b20240820.post1.data}/scripts/init-llama-cpp.ps1 +0 -0
  57. {bigdl_core_cpp-2.1.0b2.data → bigdl_core_cpp-2.1.0b20240820.post1.data}/scripts/init-ollama.bat +0 -0
  58. {bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,7 @@ class TensorNameMap:
10
10
  # Token embeddings
11
11
  MODEL_TENSOR.TOKEN_EMBD: (
12
12
  "gpt_neox.embed_in", # gptneox
13
- "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
16
  "model.embed_tokens", # llama-hf
@@ -24,6 +24,9 @@ class TensorNameMap:
24
24
  "backbone.embedding", # mamba
25
25
  "backbone.embeddings", # mamba-hf
26
26
  "transformer.in_out_embed", # Grok
27
+ "embedding.word_embeddings", # chatglm
28
+ "transformer.token_embeddings", # openelm
29
+ "shared", # t5
27
30
  ),
28
31
 
29
32
  # Token type embeddings
@@ -36,6 +39,7 @@ class TensorNameMap:
36
39
  "word_embeddings_layernorm", # bloom
37
40
  "embeddings.LayerNorm", # bert
38
41
  "emb_ln", # nomic-bert
42
+ "transformer.norm", # openelm
39
43
  ),
40
44
 
41
45
  # Position embeddings
@@ -48,16 +52,17 @@ class TensorNameMap:
48
52
  # Output
49
53
  MODEL_TENSOR.OUTPUT: (
50
54
  "embed_out", # gptneox
51
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
55
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
52
56
  "output", # llama-pth bloom internlm2
53
57
  "word_embeddings_for_head", # persimmon
54
58
  "lm_head.linear", # phi2
59
+ "output_layer", # chatglm
55
60
  ),
56
61
 
57
62
  # Output norm
58
63
  MODEL_TENSOR.OUTPUT_NORM: (
59
64
  "gpt_neox.final_layer_norm", # gptneox
60
- "transformer.ln_f", # gpt2 gpt-j falcon
65
+ "transformer.ln_f", # gpt2 gpt-j falcon jais
61
66
  "model.norm", # llama-hf baichuan internlm2
62
67
  "norm", # llama-pth
63
68
  "transformer.norm_f", # mpt dbrx
@@ -68,11 +73,14 @@ class TensorNameMap:
68
73
  "model.norm_f", # mamba-qbert
69
74
  "backbone.norm_f", # mamba
70
75
  "transformer.rms_norm", # Grok
76
+ "encoder.final_layernorm", # chatglm
77
+ "transformer.norm", # openelm
71
78
  ),
72
79
 
73
80
  # Rope frequencies
74
81
  MODEL_TENSOR.ROPE_FREQS: (
75
82
  "rope.freqs", # llama-pth
83
+ "rotary_pos_emb.inv_freq", # chatglm
76
84
  ),
77
85
  }
78
86
 
@@ -80,7 +88,7 @@ class TensorNameMap:
80
88
  # Attention norm
81
89
  MODEL_TENSOR.ATTN_NORM: (
82
90
  "gpt_neox.layers.{bid}.input_layernorm", # gptneox
83
- "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
91
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
84
92
  "transformer.blocks.{bid}.norm_1", # mpt
85
93
  "transformer.h.{bid}.input_layernorm", # falcon7b
86
94
  "h.{bid}.input_layernorm", # bloom
@@ -97,17 +105,20 @@ class TensorNameMap:
97
105
  "backbone.layers.{bid}.norm", # mamba
98
106
  "transformer.decoder_layer.{bid}.rms_norm", # Grok
99
107
  "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
108
+ "encoder.layers.{bid}.input_layernorm", # chatglm
109
+ "transformer.layers.{bid}.attn_norm", # openelm
100
110
  ),
101
111
 
102
112
  # Attention norm 2
103
113
  MODEL_TENSOR.ATTN_NORM_2: (
104
114
  "transformer.h.{bid}.ln_attn", # falcon40b
115
+ "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
105
116
  ),
106
117
 
107
118
  # Attention query-key-value
108
119
  MODEL_TENSOR.ATTN_QKV: (
109
120
  "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
110
- "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
121
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
111
122
  "transformer.blocks.{bid}.attn.Wqkv", # mpt
112
123
  "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
113
124
  "transformer.h.{bid}.self_attention.query_key_value", # falcon
@@ -117,7 +128,9 @@ class TensorNameMap:
117
128
  "h.{bid}.attn.c_attn", # gpt2
118
129
  "transformer.h.{bid}.mixer.Wqkv", # phi2
119
130
  "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120
- "model.layers.{bid}.self_attn.qkv_proj" # phi3
131
+ "model.layers.{bid}.self_attn.qkv_proj", # phi3
132
+ "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
133
+ "transformer.layers.{bid}.attn.qkv_proj", # openelm
121
134
  ),
122
135
 
123
136
  # Attention query
@@ -128,7 +141,7 @@ class TensorNameMap:
128
141
  "transformer.h.{bid}.attn.q_proj", # gpt-j
129
142
  "model.layers.layers.{bid}.self_attn.q_proj", # plamo
130
143
  "model.layers.{bid}.attention.wq", # internlm2
131
- "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
144
+ "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
132
145
  ),
133
146
 
134
147
  # Attention key
@@ -140,7 +153,7 @@ class TensorNameMap:
140
153
  "transformer.h.{bid}.attn.k", # refact
141
154
  "model.layers.layers.{bid}.self_attn.k_proj", # plamo
142
155
  "model.layers.{bid}.attention.wk", # internlm2
143
- "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
156
+ "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
144
157
  ),
145
158
 
146
159
  # Attention value
@@ -158,7 +171,7 @@ class TensorNameMap:
158
171
  # Attention output
159
172
  MODEL_TENSOR.ATTN_OUT: (
160
173
  "gpt_neox.layers.{bid}.attention.dense", # gptneox
161
- "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
174
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
162
175
  "transformer.blocks.{bid}.attn.out_proj", # mpt
163
176
  "transformer.h.{bid}.self_attention.dense", # falcon
164
177
  "h.{bid}.self_attention.dense", # bloom
@@ -175,6 +188,8 @@ class TensorNameMap:
175
188
  "encoder.layers.{bid}.attn.out_proj", # nomic-bert
176
189
  "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
177
190
  "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
191
+ "encoder.layers.{bid}.self_attention.dense", # chatglm
192
+ "transformer.layers.{bid}.attn.out_proj", # openelm
178
193
  ),
179
194
 
180
195
  # Attention output norm
@@ -185,6 +200,10 @@ class TensorNameMap:
185
200
  "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
186
201
  ),
187
202
 
203
+ MODEL_TENSOR.ATTN_POST_NORM: (
204
+ "model.layers.{bid}.post_attention_layernorm", # gemma2
205
+ ),
206
+
188
207
  # Rotary embeddings
189
208
  MODEL_TENSOR.ATTN_ROT_EMBD: (
190
209
  "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
@@ -196,7 +215,7 @@ class TensorNameMap:
196
215
  # Feed-forward norm
197
216
  MODEL_TENSOR.FFN_NORM: (
198
217
  "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
199
- "transformer.h.{bid}.ln_2", # gpt2 refact qwen
218
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
200
219
  "h.{bid}.post_attention_layernorm", # bloom
201
220
  "transformer.blocks.{bid}.norm_2", # mpt
202
221
  "model.layers.{bid}.post_attention_layernorm", # llama-hf
@@ -206,6 +225,18 @@ class TensorNameMap:
206
225
  "h.{bid}.ln_2", # gpt2
207
226
  "model.layers.{bid}.ffn_norm", # internlm2
208
227
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
228
+ "encoder.layers.{bid}.post_attention_layernorm", # chatglm
229
+ "transformer.layers.{bid}.ffn_norm", # openelm
230
+ ),
231
+
232
+ # Post feed-forward norm
233
+ MODEL_TENSOR.FFN_PRE_NORM: (
234
+ "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
235
+ ),
236
+
237
+ # Post feed-forward norm
238
+ MODEL_TENSOR.FFN_POST_NORM: (
239
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2
209
240
  ),
210
241
 
211
242
  MODEL_TENSOR.FFN_GATE_INP: (
@@ -223,7 +254,7 @@ class TensorNameMap:
223
254
  # Feed-forward up
224
255
  MODEL_TENSOR.FFN_UP: (
225
256
  "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
226
- "transformer.h.{bid}.mlp.c_fc", # gpt2
257
+ "transformer.h.{bid}.mlp.c_fc", # gpt2 jais
227
258
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
228
259
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
229
260
  "h.{bid}.mlp.dense_h_to_4h", # bloom
@@ -245,6 +276,7 @@ class TensorNameMap:
245
276
  "model.layers.{bid}.mlp.c_fc", # starcoder2
246
277
  "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
247
278
  "model.layers.{bid}.residual_mlp.w3", # arctic
279
+ "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
248
280
  ),
249
281
 
250
282
  MODEL_TENSOR.FFN_UP_EXP: (
@@ -256,6 +288,7 @@ class TensorNameMap:
256
288
 
257
289
  MODEL_TENSOR.FFN_UP_SHEXP: (
258
290
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
291
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
259
292
  ),
260
293
 
261
294
  # AWQ-activation gate
@@ -268,6 +301,7 @@ class TensorNameMap:
268
301
  "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
269
302
  "layers.{bid}.feed_forward.w1", # llama-pth
270
303
  "transformer.h.{bid}.mlp.w2", # qwen
304
+ "transformer.h.{bid}.mlp.c_fc2", # jais
271
305
  "model.layers.layers.{bid}.mlp.gate_proj", # plamo
272
306
  "model.layers.{bid}.feed_forward.w1", # internlm2
273
307
  "encoder.layers.{bid}.mlp.fc12", # nomic-bert
@@ -285,12 +319,13 @@ class TensorNameMap:
285
319
 
286
320
  MODEL_TENSOR.FFN_GATE_SHEXP: (
287
321
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
322
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
288
323
  ),
289
324
 
290
325
  # Feed-forward down
291
326
  MODEL_TENSOR.FFN_DOWN: (
292
327
  "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
293
- "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
328
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
294
329
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
295
330
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
296
331
  "h.{bid}.mlp.dense_4h_to_h", # bloom
@@ -308,7 +343,10 @@ class TensorNameMap:
308
343
  "encoder.layers.{bid}.mlp.fc2", # nomic-bert
309
344
  "model.layers.{bid}.mlp.c_proj", # starcoder2
310
345
  "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
346
+ "transformer.layers.{bid}.ffn.proj_2", # openelm
311
347
  "model.layers.{bid}.residual_mlp.w2", # arctic
348
+ "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
349
+ "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
312
350
  ),
313
351
 
314
352
  MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -320,6 +358,7 @@ class TensorNameMap:
320
358
 
321
359
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
322
360
  "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
361
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
323
362
  ),
324
363
 
325
364
  MODEL_TENSOR.ATTN_Q_NORM: (
@@ -327,7 +366,8 @@ class TensorNameMap:
327
366
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
328
367
  "model.layers.{bid}.self_attn.q_norm", # cohere
329
368
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
330
- "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
369
+ "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
370
+ "transformer.layers.{bid}.attn.q_norm", # openelm
331
371
  ),
332
372
 
333
373
  MODEL_TENSOR.ATTN_K_NORM: (
@@ -335,7 +375,8 @@ class TensorNameMap:
335
375
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
336
376
  "model.layers.{bid}.self_attn.k_norm", # cohere
337
377
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
338
- "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
378
+ "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
379
+ "transformer.layers.{bid}.attn.k_norm", # openelm
339
380
  ),
340
381
 
341
382
  MODEL_TENSOR.ROPE_FREQS: (
@@ -347,6 +388,7 @@ class TensorNameMap:
347
388
  "encoder.layers.{bid}.norm2", # nomic-bert
348
389
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
349
390
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
391
+ "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
350
392
  ),
351
393
 
352
394
  MODEL_TENSOR.SSM_IN: (
@@ -383,6 +425,152 @@ class TensorNameMap:
383
425
  "model.layers.{bid}.out_proj",
384
426
  "backbone.layers.{bid}.mixer.out_proj",
385
427
  ),
428
+
429
+ MODEL_TENSOR.ATTN_Q_A: (
430
+ "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
431
+ ),
432
+
433
+ MODEL_TENSOR.ATTN_Q_B: (
434
+ "model.layers.{bid}.self_attn.q_b_proj", # deepseek2
435
+ ),
436
+
437
+ MODEL_TENSOR.ATTN_KV_A_MQA: (
438
+ "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
439
+ ),
440
+
441
+ MODEL_TENSOR.ATTN_KV_B: (
442
+ "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
443
+ ),
444
+
445
+ MODEL_TENSOR.ATTN_Q_A_NORM: (
446
+ "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
447
+ ),
448
+
449
+ MODEL_TENSOR.ATTN_KV_A_NORM: (
450
+ "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
451
+ ),
452
+
453
+ MODEL_TENSOR.ATTN_SUB_NORM: (
454
+ "model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
455
+ ),
456
+
457
+ MODEL_TENSOR.FFN_SUB_NORM: (
458
+ "model.layers.{bid}.mlp.ffn_layernorm", # bitnet
459
+ ),
460
+
461
+ MODEL_TENSOR.DEC_ATTN_NORM: (
462
+ "decoder.block.{bid}.layer.0.layer_norm", # t5
463
+ ),
464
+
465
+ MODEL_TENSOR.DEC_ATTN_Q: (
466
+ "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
467
+ ),
468
+
469
+ MODEL_TENSOR.DEC_ATTN_K: (
470
+ "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
471
+ ),
472
+
473
+ MODEL_TENSOR.DEC_ATTN_V: (
474
+ "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
475
+ ),
476
+
477
+ MODEL_TENSOR.DEC_ATTN_OUT: (
478
+ "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
479
+ ),
480
+
481
+ MODEL_TENSOR.DEC_ATTN_REL_B: (
482
+ "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
483
+ ),
484
+
485
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
486
+ "decoder.block.{bid}.layer.1.layer_norm", # t5
487
+ ),
488
+
489
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
490
+ "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
491
+ ),
492
+
493
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: (
494
+ "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
495
+ ),
496
+
497
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: (
498
+ "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
499
+ ),
500
+
501
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
502
+ "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
503
+ ),
504
+
505
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
506
+ "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
507
+ ),
508
+
509
+ MODEL_TENSOR.DEC_FFN_NORM: (
510
+ "decoder.block.{bid}.layer.2.layer_norm", # t5
511
+ ),
512
+
513
+ MODEL_TENSOR.DEC_FFN_GATE: (
514
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
515
+ ),
516
+
517
+ MODEL_TENSOR.DEC_FFN_UP: (
518
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
519
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
520
+ ),
521
+
522
+ MODEL_TENSOR.DEC_FFN_DOWN: (
523
+ "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
524
+ ),
525
+
526
+ MODEL_TENSOR.DEC_OUTPUT_NORM: (
527
+ "decoder.final_layer_norm", # t5
528
+ ),
529
+
530
+ MODEL_TENSOR.ENC_ATTN_NORM: (
531
+ "encoder.block.{bid}.layer.0.layer_norm", # t5
532
+ ),
533
+
534
+ MODEL_TENSOR.ENC_ATTN_Q: (
535
+ "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
536
+ ),
537
+
538
+ MODEL_TENSOR.ENC_ATTN_K: (
539
+ "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
540
+ ),
541
+
542
+ MODEL_TENSOR.ENC_ATTN_V: (
543
+ "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
544
+ ),
545
+
546
+ MODEL_TENSOR.ENC_ATTN_OUT: (
547
+ "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
548
+ ),
549
+
550
+ MODEL_TENSOR.ENC_ATTN_REL_B: (
551
+ "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
552
+ ),
553
+
554
+ MODEL_TENSOR.ENC_FFN_NORM: (
555
+ "encoder.block.{bid}.layer.1.layer_norm", # t5
556
+ ),
557
+
558
+ MODEL_TENSOR.ENC_FFN_GATE: (
559
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
560
+ ),
561
+
562
+ MODEL_TENSOR.ENC_FFN_UP: (
563
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
564
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
565
+ ),
566
+
567
+ MODEL_TENSOR.ENC_FFN_DOWN: (
568
+ "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
569
+ ),
570
+
571
+ MODEL_TENSOR.ENC_OUTPUT_NORM: (
572
+ "encoder.final_layer_norm", # t5
573
+ ),
386
574
  }
387
575
 
388
576
  # architecture-specific block mappings
@@ -414,14 +602,12 @@ class TensorNameMap:
414
602
  for tensor, keys in self.block_mappings_cfg.items():
415
603
  if tensor not in MODEL_TENSORS[arch]:
416
604
  continue
417
- # TODO: make this configurable
418
- n_experts = 128
419
- for xid in range(n_experts):
420
- tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
421
- self.mapping[tensor_name] = (tensor, tensor_name)
422
- for key in keys:
423
- key = key.format(bid = bid, xid = xid)
424
- self.mapping[key] = (tensor, tensor_name)
605
+
606
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
607
+ self.mapping[tensor_name] = (tensor, tensor_name)
608
+ for key in keys:
609
+ key = key.format(bid = bid)
610
+ self.mapping[key] = (tensor, tensor_name)
425
611
 
426
612
  def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
427
613
  result = self.mapping.get(key)
@@ -460,4 +646,4 @@ class TensorNameMap:
460
646
 
461
647
 
462
648
  def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
463
- return TensorNameMap(arch, n_blocks)
649
+ return TensorNameMap(arch, n_blocks)
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+
6
+ def fill_templated_filename(filename: str, output_type: str | None) -> str:
7
+ # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
8
+ ftype_lowercase: str = output_type.lower() if output_type is not None else ""
9
+ ftype_uppercase: str = output_type.upper() if output_type is not None else ""
10
+ return filename.format(ftype_lowercase,
11
+ outtype=ftype_lowercase, ftype=ftype_lowercase,
12
+ OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
13
+
14
+
15
+ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
16
+ if model_params_count > 1e12 :
17
+ # Trillions Of Parameters
18
+ scaled_model_params = model_params_count * 1e-12
19
+ scale_suffix = "T"
20
+ elif model_params_count > 1e9 :
21
+ # Billions Of Parameters
22
+ scaled_model_params = model_params_count * 1e-9
23
+ scale_suffix = "B"
24
+ elif model_params_count > 1e6 :
25
+ # Millions Of Parameters
26
+ scaled_model_params = model_params_count * 1e-6
27
+ scale_suffix = "M"
28
+ else:
29
+ # Thousands Of Parameters
30
+ scaled_model_params = model_params_count * 1e-3
31
+ scale_suffix = "K"
32
+
33
+ fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
34
+
35
+ return f"{scaled_model_params:.{fix}f}{scale_suffix}"
36
+
37
+
38
+ def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
39
+
40
+ if expert_count > 0:
41
+ pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
42
+ size_class = f"{expert_count}x{pretty_size}"
43
+ else:
44
+ size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
45
+
46
+ return size_class
47
+
48
+
49
+ def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
50
+ # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
51
+
52
+ if base_name is not None:
53
+ name = base_name.strip().replace(' ', '-').replace('/', '-')
54
+ elif model_name is not None:
55
+ name = model_name.strip().replace(' ', '-').replace('/', '-')
56
+ else:
57
+ name = "ggml-model"
58
+
59
+ parameters = f"-{size_label}" if size_label is not None else ""
60
+
61
+ finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
62
+
63
+ version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
64
+
65
+ encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
66
+
67
+ kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
68
+
69
+ return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/gguf.exe CHANGED
Binary file
bigdl/cpp/libs/gritlm.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/infill.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/lookup.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/main.exe CHANGED
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/q8dot.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/server.exe CHANGED
Binary file
bigdl/cpp/libs/simple.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/vdot.exe CHANGED
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bigdl-core-cpp
3
- Version: 2.1.0b2
3
+ Version: 2.1.0b20240820.post1
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Author: BigDL Authors
6
6
  License: Apache License, Version 2.0
@@ -8,11 +8,11 @@ Classifier: License :: OSI Approved :: Apache Software License
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: Implementation :: CPython
11
- Requires-Dist: torch ==2.2.0
12
- Requires-Dist: numpy ==1.26.4
13
- Requires-Dist: transformers <5.0.0,>=4.35.2
14
- Requires-Dist: sentencepiece ~=0.1.98
15
- Requires-Dist: accelerate ==0.21.0
16
- Requires-Dist: protobuf <5.0.0,>=4.21.0
17
- Requires-Dist: gguf >=0.1.0
11
+ Requires-Dist: torch==2.2.0
12
+ Requires-Dist: numpy==1.26.4
13
+ Requires-Dist: transformers<5.0.0,>=4.35.2
14
+ Requires-Dist: sentencepiece~=0.1.98
15
+ Requires-Dist: accelerate==0.21.0
16
+ Requires-Dist: protobuf<5.0.0,>=4.21.0
17
+ Requires-Dist: gguf>=0.1.0
18
18