PyPI - bigdl-core-cpp - Versions diffs - 2.1.0b2__py3-none-win_amd64.whl → 2.1.0b20240820.post1__py3-none-win_amd64.whl - Mend

bigdl-core-cpp 2.1.0b2__py3-none-win_amd64.whl → 2.1.0b20240820.post1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

bigdl/cpp/convert-hf-to-gguf.py +1174 -314
bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
bigdl/cpp/gguf-py/gguf/constants.py +463 -167
bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
bigdl/cpp/gguf-py/gguf/gguf_writer.py +475 -156
bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
bigdl/cpp/gguf-py/gguf/metadata.py +503 -0
bigdl/cpp/gguf-py/gguf/tensor_mapping.py +209 -23
bigdl/cpp/gguf-py/gguf/utility.py +69 -0
bigdl/cpp/libs/baby-llama.exe +0 -0
bigdl/cpp/libs/batched-bench.exe +0 -0
bigdl/cpp/libs/batched.exe +0 -0
bigdl/cpp/libs/beam-search.exe +0 -0
bigdl/cpp/libs/benchmark.exe +0 -0
bigdl/cpp/libs/common.lib +0 -0
bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
bigdl/cpp/libs/embedding.exe +0 -0
bigdl/cpp/libs/export-lora.exe +0 -0
bigdl/cpp/libs/finetune.exe +0 -0
bigdl/cpp/libs/ggml_shared.dll +0 -0
bigdl/cpp/libs/gguf.exe +0 -0
bigdl/cpp/libs/gritlm.exe +0 -0
bigdl/cpp/libs/imatrix.exe +0 -0
bigdl/cpp/libs/infill.exe +0 -0
bigdl/cpp/libs/llama-bench.exe +0 -0
bigdl/cpp/libs/llama.dll +0 -0
bigdl/cpp/libs/llava-cli.exe +0 -0
bigdl/cpp/libs/llava_shared.dll +0 -0
bigdl/cpp/libs/lookahead.exe +0 -0
bigdl/cpp/libs/lookup.exe +0 -0
bigdl/cpp/libs/ls-sycl-device.exe +0 -0
bigdl/cpp/libs/main.exe +0 -0
bigdl/cpp/libs/ollama.exe +0 -0
bigdl/cpp/libs/parallel.exe +0 -0
bigdl/cpp/libs/passkey.exe +0 -0
bigdl/cpp/libs/perplexity.exe +0 -0
bigdl/cpp/libs/q8dot.exe +0 -0
bigdl/cpp/libs/quantize-stats.exe +0 -0
bigdl/cpp/libs/quantize.exe +0 -0
bigdl/cpp/libs/save-load-state.exe +0 -0
bigdl/cpp/libs/server.exe +0 -0
bigdl/cpp/libs/simple.exe +0 -0
bigdl/cpp/libs/speculative.exe +0 -0
bigdl/cpp/libs/tokenize.exe +0 -0
bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
bigdl/cpp/libs/vdot.exe +0 -0
{bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/METADATA +8 -8
bigdl_core_cpp-2.1.0b20240820.post1.dist-info/RECORD +63 -0
{bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/WHEEL +1 -1
bigdl_core_cpp-2.1.0b2.dist-info/RECORD +0 -61
{bigdl_core_cpp-2.1.0b2.data → bigdl_core_cpp-2.1.0b20240820.post1.data}/scripts/init-llama-cpp.bat +0 -0
{bigdl_core_cpp-2.1.0b2.data → bigdl_core_cpp-2.1.0b20240820.post1.data}/scripts/init-llama-cpp.ps1 +0 -0
{bigdl_core_cpp-2.1.0b2.data → bigdl_core_cpp-2.1.0b20240820.post1.data}/scripts/init-ollama.bat +0 -0
{bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/top_level.txt +0 -0

bigdl/cpp/gguf-py/gguf/tensor_mapping.py CHANGED Viewed

@@ -10,7 +10,7 @@ class TensorNameMap:
         # Token embeddings
         MODEL_TENSOR.TOKEN_EMBD: (
             "gpt_neox.embed_in",                         # gptneox
-            "transformer.wte",                           # gpt2 gpt-j mpt refact qwen dbrx
+            "transformer.wte",                           # gpt2 gpt-j mpt refact qwen dbrx jais
             "transformer.word_embeddings",               # falcon
             "word_embeddings",                           # bloom
             "model.embed_tokens",                        # llama-hf
@@ -24,6 +24,9 @@ class TensorNameMap:
             "backbone.embedding",                        # mamba
             "backbone.embeddings",                       # mamba-hf
             "transformer.in_out_embed",                  # Grok
+            "embedding.word_embeddings",                 # chatglm
+            "transformer.token_embeddings",              # openelm
+            "shared",                                    # t5
         ),
         # Token type embeddings
@@ -36,6 +39,7 @@ class TensorNameMap:
             "word_embeddings_layernorm",  # bloom
             "embeddings.LayerNorm",       # bert
             "emb_ln",                     # nomic-bert
+            "transformer.norm",           # openelm
         ),
         # Position embeddings
@@ -48,16 +52,17 @@ class TensorNameMap:
         # Output
         MODEL_TENSOR.OUTPUT: (
             "embed_out",                 # gptneox
-            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
+            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
             "output",                    # llama-pth bloom internlm2
             "word_embeddings_for_head",  # persimmon
             "lm_head.linear",            # phi2
+            "output_layer",              # chatglm
         ),
         # Output norm
         MODEL_TENSOR.OUTPUT_NORM: (
             "gpt_neox.final_layer_norm",               # gptneox
-            "transformer.ln_f",                        # gpt2 gpt-j falcon
+            "transformer.ln_f",                        # gpt2 gpt-j falcon jais
             "model.norm",                              # llama-hf baichuan internlm2
             "norm",                                    # llama-pth
             "transformer.norm_f",                      # mpt dbrx
@@ -68,11 +73,14 @@ class TensorNameMap:
             "model.norm_f",                            # mamba-qbert
             "backbone.norm_f",                         # mamba
             "transformer.rms_norm",                    # Grok
+            "encoder.final_layernorm",                 # chatglm
+            "transformer.norm",                        # openelm
         ),
         # Rope frequencies
         MODEL_TENSOR.ROPE_FREQS: (
             "rope.freqs",  # llama-pth
+            "rotary_pos_emb.inv_freq",  # chatglm
         ),
     }
@@ -80,7 +88,7 @@ class TensorNameMap:
         # Attention norm
         MODEL_TENSOR.ATTN_NORM: (
             "gpt_neox.layers.{bid}.input_layernorm",                # gptneox
-            "transformer.h.{bid}.ln_1",                             # gpt2 gpt-j refact qwen
+            "transformer.h.{bid}.ln_1",                             # gpt2 gpt-j refact qwen jais
             "transformer.blocks.{bid}.norm_1",                      # mpt
             "transformer.h.{bid}.input_layernorm",                  # falcon7b
             "h.{bid}.input_layernorm",                              # bloom
@@ -97,17 +105,20 @@ class TensorNameMap:
             "backbone.layers.{bid}.norm",                           # mamba
             "transformer.decoder_layer.{bid}.rms_norm",             # Grok
             "transformer.blocks.{bid}.norm_attn_norm.norm_1",       # dbrx
+            "encoder.layers.{bid}.input_layernorm",                 # chatglm
+            "transformer.layers.{bid}.attn_norm",                   # openelm
         ),
         # Attention norm 2
         MODEL_TENSOR.ATTN_NORM_2: (
             "transformer.h.{bid}.ln_attn",  # falcon40b
+            "encoder.layer.{bid}.layer_norm_1",             # jina-v2-code
         ),
         # Attention query-key-value
         MODEL_TENSOR.ATTN_QKV: (
             "gpt_neox.layers.{bid}.attention.query_key_value",                     # gptneox
-            "transformer.h.{bid}.attn.c_attn",                                     # gpt2 qwen
+            "transformer.h.{bid}.attn.c_attn",                                     # gpt2 qwen jais
             "transformer.blocks.{bid}.attn.Wqkv",                                  # mpt
             "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv",                   # dbrx
             "transformer.h.{bid}.self_attention.query_key_value",                  # falcon
@@ -117,7 +128,9 @@ class TensorNameMap:
             "h.{bid}.attn.c_attn",                                                 # gpt2
             "transformer.h.{bid}.mixer.Wqkv",                                      # phi2
             "encoder.layers.{bid}.attn.Wqkv",                                      # nomic-bert
-            "model.layers.{bid}.self_attn.qkv_proj"                                # phi3
+            "model.layers.{bid}.self_attn.qkv_proj",                               # phi3
+            "encoder.layers.{bid}.self_attention.query_key_value",                 # chatglm
+            "transformer.layers.{bid}.attn.qkv_proj",                              # openelm
         ),
         # Attention query
@@ -128,7 +141,7 @@ class TensorNameMap:
             "transformer.h.{bid}.attn.q_proj",                           # gpt-j
             "model.layers.layers.{bid}.self_attn.q_proj",                # plamo
             "model.layers.{bid}.attention.wq",                           # internlm2
-            "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
+            "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
         ),
         # Attention key
@@ -140,7 +153,7 @@ class TensorNameMap:
             "transformer.h.{bid}.attn.k",                              # refact
             "model.layers.layers.{bid}.self_attn.k_proj",              # plamo
             "model.layers.{bid}.attention.wk",                         # internlm2
-            "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
+            "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
         ),
         # Attention value
@@ -158,7 +171,7 @@ class TensorNameMap:
         # Attention output
         MODEL_TENSOR.ATTN_OUT: (
             "gpt_neox.layers.{bid}.attention.dense",                        # gptneox
-            "transformer.h.{bid}.attn.c_proj",                              # gpt2 refact qwen
+            "transformer.h.{bid}.attn.c_proj",                              # gpt2 refact qwen jais
             "transformer.blocks.{bid}.attn.out_proj",                       # mpt
             "transformer.h.{bid}.self_attention.dense",                     # falcon
             "h.{bid}.self_attention.dense",                                 # bloom
@@ -175,6 +188,8 @@ class TensorNameMap:
             "encoder.layers.{bid}.attn.out_proj",                           # nomic-bert
             "transformer.decoder_layer.{bid}.multi_head_attention.linear",  # Grok
             "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj",        # dbrx
+            "encoder.layers.{bid}.self_attention.dense",                    # chatglm
+            "transformer.layers.{bid}.attn.out_proj",                       # openelm
         ),
         # Attention output norm
@@ -185,6 +200,10 @@ class TensorNameMap:
             "transformer.blocks.{bid}.norm_attn_norm.norm_2",  # dbrx
         ),
+        MODEL_TENSOR.ATTN_POST_NORM: (
+            "model.layers.{bid}.post_attention_layernorm",     # gemma2
+        ),
         # Rotary embeddings
         MODEL_TENSOR.ATTN_ROT_EMBD: (
             "model.layers.{bid}.self_attn.rotary_emb.inv_freq",        # llama-hf
@@ -196,7 +215,7 @@ class TensorNameMap:
         # Feed-forward norm
         MODEL_TENSOR.FFN_NORM: (
             "gpt_neox.layers.{bid}.post_attention_layernorm",                # gptneox
-            "transformer.h.{bid}.ln_2",                                      # gpt2 refact qwen
+            "transformer.h.{bid}.ln_2",                                      # gpt2 refact qwen jais
             "h.{bid}.post_attention_layernorm",                              # bloom
             "transformer.blocks.{bid}.norm_2",                               # mpt
             "model.layers.{bid}.post_attention_layernorm",                   # llama-hf
@@ -206,6 +225,18 @@ class TensorNameMap:
             "h.{bid}.ln_2",                                                  # gpt2
             "model.layers.{bid}.ffn_norm",                                   # internlm2
             "transformer.decoder_layer.{bid}.rms_norm_2",                    # Grok
+            "encoder.layers.{bid}.post_attention_layernorm",                 # chatglm
+            "transformer.layers.{bid}.ffn_norm",                             # openelm
+        ),
+        # Post feed-forward norm
+        MODEL_TENSOR.FFN_PRE_NORM: (
+            "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
+        ),
+        # Post feed-forward norm
+        MODEL_TENSOR.FFN_POST_NORM: (
+            "model.layers.{bid}.post_feedforward_layernorm", # gemma2
         ),
         MODEL_TENSOR.FFN_GATE_INP: (
@@ -223,7 +254,7 @@ class TensorNameMap:
         # Feed-forward up
         MODEL_TENSOR.FFN_UP: (
             "gpt_neox.layers.{bid}.mlp.dense_h_to_4h",                # gptneox
-            "transformer.h.{bid}.mlp.c_fc",                           # gpt2
+            "transformer.h.{bid}.mlp.c_fc",                           # gpt2 jais
             "transformer.blocks.{bid}.ffn.up_proj",                   # mpt
             "transformer.h.{bid}.mlp.dense_h_to_4h",                  # falcon
             "h.{bid}.mlp.dense_h_to_4h",                              # bloom
@@ -245,6 +276,7 @@ class TensorNameMap:
             "model.layers.{bid}.mlp.c_fc",                            # starcoder2
             "encoder.layer.{bid}.mlp.gated_layers_v",                 # jina-bert-v2
             "model.layers.{bid}.residual_mlp.w3",                     # arctic
+            "encoder.layers.{bid}.mlp.dense_h_to_4h",                 # chatglm
         ),
         MODEL_TENSOR.FFN_UP_EXP: (
@@ -256,6 +288,7 @@ class TensorNameMap:
         MODEL_TENSOR.FFN_UP_SHEXP: (
             "model.layers.{bid}.mlp.shared_expert.up_proj",  # qwen2moe
+            "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
         ),
         # AWQ-activation gate
@@ -268,6 +301,7 @@ class TensorNameMap:
             "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact
             "layers.{bid}.feed_forward.w1",               # llama-pth
             "transformer.h.{bid}.mlp.w2",                 # qwen
+            "transformer.h.{bid}.mlp.c_fc2",              # jais
             "model.layers.layers.{bid}.mlp.gate_proj",    # plamo
             "model.layers.{bid}.feed_forward.w1",         # internlm2
             "encoder.layers.{bid}.mlp.fc12",              # nomic-bert
@@ -285,12 +319,13 @@ class TensorNameMap:
         MODEL_TENSOR.FFN_GATE_SHEXP: (
             "model.layers.{bid}.mlp.shared_expert.gate_proj",  # qwen2moe
+            "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
         ),
         # Feed-forward down
         MODEL_TENSOR.FFN_DOWN: (
             "gpt_neox.layers.{bid}.mlp.dense_4h_to_h",                # gptneox
-            "transformer.h.{bid}.mlp.c_proj",                         # gpt2 refact qwen
+            "transformer.h.{bid}.mlp.c_proj",                         # gpt2 refact qwen jais
             "transformer.blocks.{bid}.ffn.down_proj",                 # mpt
             "transformer.h.{bid}.mlp.dense_4h_to_h",                  # falcon
             "h.{bid}.mlp.dense_4h_to_h",                              # bloom
@@ -308,7 +343,10 @@ class TensorNameMap:
             "encoder.layers.{bid}.mlp.fc2",                           # nomic-bert
             "model.layers.{bid}.mlp.c_proj",                          # starcoder2
             "encoder.layer.{bid}.mlp.wo",                             # jina-bert-v2
+            "transformer.layers.{bid}.ffn.proj_2",                    # openelm
             "model.layers.{bid}.residual_mlp.w2",                     # arctic
+            "encoder.layer.{bid}.mlp.down_layer",                     # jina-bert-v2
+            "encoder.layers.{bid}.mlp.dense_4h_to_h",                 # chatglm
         ),
         MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -320,6 +358,7 @@ class TensorNameMap:
         MODEL_TENSOR.FFN_DOWN_SHEXP: (
             "model.layers.{bid}.mlp.shared_expert.down_proj",  # qwen2moe
+            "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
         ),
         MODEL_TENSOR.ATTN_Q_NORM: (
@@ -327,7 +366,8 @@ class TensorNameMap:
             "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon
             "model.layers.{bid}.self_attn.q_norm",                            # cohere
             "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion
-            "encoder.layer.{bid}.attention.self.layer_norm_q"                 # jina-bert-v2
+            "encoder.layer.{bid}.attention.self.layer_norm_q",                # jina-bert-v2
+            "transformer.layers.{bid}.attn.q_norm",                           # openelm
         ),
         MODEL_TENSOR.ATTN_K_NORM: (
@@ -335,7 +375,8 @@ class TensorNameMap:
             "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon
             "model.layers.{bid}.self_attn.k_norm",                            # cohere
             "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion
-            "encoder.layer.{bid}.attention.self.layer_norm_k"                 # jina-bert-v2
+            "encoder.layer.{bid}.attention.self.layer_norm_k",                # jina-bert-v2
+            "transformer.layers.{bid}.attn.k_norm",                           # openelm
         ),
         MODEL_TENSOR.ROPE_FREQS: (
@@ -347,6 +388,7 @@ class TensorNameMap:
             "encoder.layers.{bid}.norm2",                   # nomic-bert
             "transformer.decoder_layer.{bid}.rms_norm_3",   # Grok
             "encoder.layer.{bid}.mlp.layernorm",            # jina-bert-v2
+            "encoder.layer.{bid}.layer_norm_2"              # jina-v2-code
         ),
         MODEL_TENSOR.SSM_IN: (
@@ -383,6 +425,152 @@ class TensorNameMap:
             "model.layers.{bid}.out_proj",
             "backbone.layers.{bid}.mixer.out_proj",
         ),
+        MODEL_TENSOR.ATTN_Q_A: (
+            "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_Q_B: (
+            "model.layers.{bid}.self_attn.q_b_proj", # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_KV_A_MQA: (
+            "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_KV_B: (
+            "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_Q_A_NORM: (
+            "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_KV_A_NORM: (
+            "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
+        ),
+        MODEL_TENSOR.ATTN_SUB_NORM: (
+            "model.layers.{bid}.self_attn.inner_attn_ln",  # bitnet
+        ),
+        MODEL_TENSOR.FFN_SUB_NORM: (
+            "model.layers.{bid}.mlp.ffn_layernorm",  # bitnet
+        ),
+        MODEL_TENSOR.DEC_ATTN_NORM: (
+            "decoder.block.{bid}.layer.0.layer_norm", # t5
+        ),
+        MODEL_TENSOR.DEC_ATTN_Q: (
+            "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
+        ),
+        MODEL_TENSOR.DEC_ATTN_K: (
+            "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
+        ),
+        MODEL_TENSOR.DEC_ATTN_V: (
+            "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
+        ),
+        MODEL_TENSOR.DEC_ATTN_OUT: (
+            "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
+        ),
+        MODEL_TENSOR.DEC_ATTN_REL_B: (
+            "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
+            "decoder.block.{bid}.layer.1.layer_norm", # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_K: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_V: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
+        ),
+        MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
+            "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
+        ),
+        MODEL_TENSOR.DEC_FFN_NORM: (
+            "decoder.block.{bid}.layer.2.layer_norm", # t5
+        ),
+        MODEL_TENSOR.DEC_FFN_GATE: (
+            "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
+        ),
+        MODEL_TENSOR.DEC_FFN_UP: (
+            "decoder.block.{bid}.layer.2.DenseReluDense.wi",   # t5
+            "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
+        ),
+        MODEL_TENSOR.DEC_FFN_DOWN: (
+            "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
+        ),
+        MODEL_TENSOR.DEC_OUTPUT_NORM: (
+            "decoder.final_layer_norm", # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_NORM: (
+            "encoder.block.{bid}.layer.0.layer_norm", # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_Q: (
+            "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_K: (
+            "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_V: (
+            "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_OUT: (
+            "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
+        ),
+        MODEL_TENSOR.ENC_ATTN_REL_B: (
+            "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
+        ),
+        MODEL_TENSOR.ENC_FFN_NORM: (
+            "encoder.block.{bid}.layer.1.layer_norm", # t5
+        ),
+        MODEL_TENSOR.ENC_FFN_GATE: (
+            "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
+        ),
+        MODEL_TENSOR.ENC_FFN_UP: (
+            "encoder.block.{bid}.layer.1.DenseReluDense.wi",   # t5
+            "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
+        ),
+        MODEL_TENSOR.ENC_FFN_DOWN: (
+            "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
+        ),
+        MODEL_TENSOR.ENC_OUTPUT_NORM: (
+            "encoder.final_layer_norm", # t5
+        ),
     }
     # architecture-specific block mappings
@@ -414,14 +602,12 @@ class TensorNameMap:
             for tensor, keys in self.block_mappings_cfg.items():
                 if tensor not in MODEL_TENSORS[arch]:
                     continue
-                # TODO: make this configurable
-                n_experts = 128
-                for xid in range(n_experts):
-                    tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
-                    self.mapping[tensor_name] = (tensor, tensor_name)
-                    for key in keys:
-                        key = key.format(bid = bid, xid = xid)
-                        self.mapping[key] = (tensor, tensor_name)
+                tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
+                self.mapping[tensor_name] = (tensor, tensor_name)
+                for key in keys:
+                    key = key.format(bid = bid)
+                    self.mapping[key] = (tensor, tensor_name)
     def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
         result = self.mapping.get(key)
@@ -460,4 +646,4 @@ class TensorNameMap:
 def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
-    return TensorNameMap(arch, n_blocks)
+    return TensorNameMap(arch, n_blocks)

bigdl/cpp/gguf-py/gguf/utility.py ADDED Viewed

@@ -0,0 +1,69 @@
+from __future__ import annotations
+from typing import Literal
+def fill_templated_filename(filename: str, output_type: str | None) -> str:
+    # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
+    ftype_lowercase: str = output_type.lower() if output_type is not None else ""
+    ftype_uppercase: str = output_type.upper() if output_type is not None else ""
+    return filename.format(ftype_lowercase,
+                           outtype=ftype_lowercase, ftype=ftype_lowercase,
+                           OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
+def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
+    if model_params_count > 1e12 :
+        # Trillions Of Parameters
+        scaled_model_params = model_params_count * 1e-12
+        scale_suffix = "T"
+    elif model_params_count > 1e9 :
+        # Billions Of Parameters
+        scaled_model_params = model_params_count * 1e-9
+        scale_suffix = "B"
+    elif model_params_count > 1e6 :
+        # Millions Of Parameters
+        scaled_model_params = model_params_count * 1e-6
+        scale_suffix = "M"
+    else:
+        # Thousands Of Parameters
+        scaled_model_params = model_params_count * 1e-3
+        scale_suffix = "K"
+    fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
+    return f"{scaled_model_params:.{fix}f}{scale_suffix}"
+def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
+    if expert_count > 0:
+        pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
+        size_class = f"{expert_count}x{pretty_size}"
+    else:
+        size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
+    return size_class
+def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
+    # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
+    if base_name is not None:
+        name = base_name.strip().replace(' ', '-').replace('/', '-')
+    elif model_name is not None:
+        name = model_name.strip().replace(' ', '-').replace('/', '-')
+    else:
+        name = "ggml-model"
+    parameters = f"-{size_label}" if size_label is not None else ""
+    finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
+    version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
+    encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
+    kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
+    return f"{name}{parameters}{finetune}{version}{encoding}{kind}"

bigdl/cpp/libs/baby-llama.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/batched-bench.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/batched.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/beam-search.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/benchmark.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/common.lib CHANGED Viewed

Binary file

bigdl/cpp/libs/convert-llama2c-to-ggml.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/embedding.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/export-lora.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/finetune.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ggml_shared.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/gguf.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/gritlm.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/imatrix.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/infill.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-bench.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/llava-cli.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llava_shared.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/lookahead.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/lookup.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ls-sycl-device.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/main.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ollama.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/parallel.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/passkey.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/perplexity.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/q8dot.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/quantize-stats.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/quantize.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/save-load-state.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/server.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/simple.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/speculative.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/tokenize.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/train-text-from-scratch.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/vdot.exe CHANGED Viewed

Binary file

{bigdl_core_cpp-2.1.0b2.dist-info → bigdl_core_cpp-2.1.0b20240820.post1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bigdl-core-cpp
-Version: 2.1.0b2
+Version: 2.1.0b20240820.post1
 Summary: Large Language Model Develop Toolkit
 Author: BigDL Authors
 License: Apache License, Version 2.0
@@ -8,11 +8,11 @@ Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: Implementation :: CPython
-Requires-Dist: torch ==2.2.0
-Requires-Dist: numpy ==1.26.4
-Requires-Dist: transformers <5.0.0,>=4.35.2
-Requires-Dist: sentencepiece ~=0.1.98
-Requires-Dist: accelerate ==0.21.0
-Requires-Dist: protobuf <5.0.0,>=4.21.0
-Requires-Dist: gguf >=0.1.0
+Requires-Dist: torch==2.2.0
+Requires-Dist: numpy==1.26.4
+Requires-Dist: transformers<5.0.0,>=4.35.2
+Requires-Dist: sentencepiece~=0.1.98
+Requires-Dist: accelerate==0.21.0
+Requires-Dist: protobuf<5.0.0,>=4.21.0
+Requires-Dist: gguf>=0.1.0