PyPI - bigdl-core-cpp - Versions diffs - 2.6.0b20250204__py3-none-win_amd64.whl → 2.6.0b20250204.post0__py3-none-win_amd64.whl - Mend

bigdl-core-cpp 2.6.0b20250204__py3-none-win_amd64.whl → 2.6.0b20250204.post0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

bigdl/cpp/convert_hf_to_gguf.py +99 -44
bigdl/cpp/convert_hf_to_gguf_update.py +4 -1
bigdl/cpp/convert_lora_to_gguf.py +41 -11
bigdl/cpp/gguf-py/gguf/constants.py +79 -18
bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
bigdl/cpp/gguf-py/gguf/gguf_writer.py +36 -12
bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
bigdl/cpp/gguf-py/gguf/tensor_mapping.py +17 -15
bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
bigdl/cpp/libs/common.lib +0 -0
bigdl/cpp/libs/ggml-cpu.dll +0 -0
bigdl/cpp/libs/ggml-sycl.dll +0 -0
bigdl/cpp/libs/ggml.dll +0 -0
bigdl/cpp/libs/llama-batched.exe +0 -0
bigdl/cpp/libs/llama-bench.exe +0 -0
bigdl/cpp/libs/llama-cli.exe +0 -0
bigdl/cpp/libs/llama-embedding.exe +0 -0
bigdl/cpp/libs/llama-gguf.exe +0 -0
bigdl/cpp/libs/llama-llava-cli.exe +0 -0
bigdl/cpp/libs/llama-lookup.exe +0 -0
bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
bigdl/cpp/libs/llama-perplexity.exe +0 -0
bigdl/cpp/libs/llama-quantize.exe +0 -0
bigdl/cpp/libs/llama-server.exe +0 -0
bigdl/cpp/libs/llama-simple.exe +0 -0
bigdl/cpp/libs/llama-speculative.exe +0 -0
bigdl/cpp/libs/llama-tokenize.exe +0 -0
bigdl/cpp/libs/llama.dll +0 -0
bigdl/cpp/libs/llava_shared.dll +0 -0
bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
bigdl/cpp/libs/ollama.exe +0 -0
bigdl/cpp/libs/ollama_ggml.dll +0 -0
bigdl/cpp/libs/ollama_llama.dll +0 -0
bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
{bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-ollama.bat +1 -1
{bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/METADATA +1 -1
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD +54 -0
bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe +0 -0
bigdl_core_cpp-2.6.0b20250204.dist-info/RECORD +0 -50
{bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.bat +0 -0
{bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
{bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/WHEEL +0 -0
{bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/top_level.txt +0 -0

bigdl/cpp/gguf-py/gguf/gguf_writer.py CHANGED Viewed

@@ -568,6 +568,9 @@ class GGUFWriter:
     def add_base_model_organization(self, source_id: int, organization: str) -> None:
         self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
+    def add_base_model_description(self, source_id: int, description: str) -> None:
+        self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
     def add_base_model_url(self, source_id: int, url: str) -> None:
         self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
@@ -580,15 +583,42 @@ class GGUFWriter:
     def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
         self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
+    def add_dataset_count(self, source_count: int) -> None:
+        self.add_uint32(Keys.General.DATASET_COUNT, source_count)
+    def add_dataset_name(self, source_id: int, name: str) -> None:
+        self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
+    def add_dataset_author(self, source_id: int, author: str) -> None:
+        self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
+    def add_dataset_version(self, source_id: int, version: str) -> None:
+        self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
+    def add_dataset_organization(self, source_id: int, organization: str) -> None:
+        self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
+    def add_dataset_description(self, source_id: int, description: str) -> None:
+        self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
+    def add_dataset_url(self, source_id: int, url: str) -> None:
+        self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
+    def add_dataset_doi(self, source_id: int, doi: str) -> None:
+        self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
+    def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
+        self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
+    def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
+        self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
     def add_tags(self, tags: Sequence[str]) -> None:
         self.add_array(Keys.General.TAGS, tags)
     def add_languages(self, languages: Sequence[str]) -> None:
         self.add_array(Keys.General.LANGUAGES, languages)
-    def add_datasets(self, datasets: Sequence[str]) -> None:
-        self.add_array(Keys.General.DATASETS, datasets)
     def add_tensor_data_layout(self, layout: str) -> None:
         self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
@@ -721,6 +751,9 @@ class GGUFWriter:
     def add_rope_dimension_count(self, count: int) -> None:
         self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
+    def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
+        self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
     def add_rope_freq_base(self, value: float) -> None:
         self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
@@ -843,15 +876,6 @@ class GGUFWriter:
         self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
-    def add_prefix_token_id(self, id: int) -> None:
-        self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
-    def add_suffix_token_id(self, id: int) -> None:
-        self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
-    def add_middle_token_id(self, id: int) -> None:
-        self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
     def add_eot_token_id(self, id: int) -> None:
         self.add_uint32(Keys.Tokenizer.EOT_ID, id)

bigdl/cpp/gguf-py/gguf/metadata.py CHANGED Viewed

@@ -41,7 +41,7 @@ class Metadata:
     base_models: Optional[list[dict]] = None
     tags: Optional[list[str]] = None
     languages: Optional[list[str]] = None
-    datasets: Optional[list[str]] = None
+    datasets: Optional[list[dict]] = None
     @staticmethod
     def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
@@ -91,9 +91,11 @@ class Metadata:
         # Base Models is received here as an array of models
         metadata.base_models     = metadata_override.get("general.base_models",        metadata.base_models)
+        # Datasets is received here as an array of datasets
+        metadata.datasets        = metadata_override.get("general.datasets",           metadata.datasets)
         metadata.tags            = metadata_override.get(Keys.General.TAGS,            metadata.tags)
         metadata.languages       = metadata_override.get(Keys.General.LANGUAGES,       metadata.languages)
-        metadata.datasets        = metadata_override.get(Keys.General.DATASETS,        metadata.datasets)
         # Direct Metadata Override (via direct cli argument)
         if model_name is not None:
@@ -346,12 +348,12 @@ class Metadata:
             use_model_card_metadata("author", "model_creator")
             use_model_card_metadata("basename", "model_type")
-            if "base_model" in model_card:
+            if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
                 # This represents the parent models that this is based on
                 # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
                 # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
                 metadata_base_models = []
-                base_model_value = model_card.get("base_model", None)
+                base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
                 if base_model_value is not None:
                     if isinstance(base_model_value, str):
@@ -364,18 +366,106 @@ class Metadata:
                 for model_id in metadata_base_models:
                     # NOTE: model size of base model is assumed to be similar to the size of the current model
-                    model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
                     base_model = {}
-                    if model_full_name_component is not None:
-                        base_model["name"] = Metadata.id_to_title(model_full_name_component)
-                    if org_component is not None:
-                        base_model["organization"] = Metadata.id_to_title(org_component)
-                    if version is not None:
-                        base_model["version"] = version
-                    if org_component is not None and model_full_name_component is not None:
-                        base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
+                    if isinstance(model_id, str):
+                        if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
+                            base_model["repo_url"] = model_id
+                            # Check if Hugging Face ID is present in URL
+                            if "huggingface.co" in model_id:
+                                match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
+                                if match:
+                                    model_id_component = match.group(1)
+                                    model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
+                                    # Populate model dictionary with extracted components
+                                    if model_full_name_component is not None:
+                                        base_model["name"] = Metadata.id_to_title(model_full_name_component)
+                                    if org_component is not None:
+                                        base_model["organization"] = Metadata.id_to_title(org_component)
+                                    if version is not None:
+                                        base_model["version"] = version
+                        else:
+                            # Likely a Hugging Face ID
+                            model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
+                            # Populate model dictionary with extracted components
+                            if model_full_name_component is not None:
+                                base_model["name"] = Metadata.id_to_title(model_full_name_component)
+                            if org_component is not None:
+                                base_model["organization"] = Metadata.id_to_title(org_component)
+                            if version is not None:
+                                base_model["version"] = version
+                            if org_component is not None and model_full_name_component is not None:
+                                base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
+                    elif isinstance(model_id, dict):
+                        base_model = model_id
+                    else:
+                        logger.error(f"base model entry '{str(model_id)}' not in a known format")
                     metadata.base_models.append(base_model)
+            if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
+                # This represents the datasets that this was trained from
+                metadata_datasets = []
+                dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
+                if dataset_value is not None:
+                    if isinstance(dataset_value, str):
+                        metadata_datasets.append(dataset_value)
+                    elif isinstance(dataset_value, list):
+                        metadata_datasets.extend(dataset_value)
+                if metadata.datasets is None:
+                    metadata.datasets = []
+                for dataset_id in metadata_datasets:
+                    # NOTE: model size of base model is assumed to be similar to the size of the current model
+                    dataset = {}
+                    if isinstance(dataset_id, str):
+                        if dataset_id.startswith(("http://", "https://", "ssh://")):
+                            dataset["repo_url"] = dataset_id
+                            # Check if Hugging Face ID is present in URL
+                            if "huggingface.co" in dataset_id:
+                                match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
+                                if match:
+                                    dataset_id_component = match.group(1)
+                                    dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
+                                    # Populate dataset dictionary with extracted components
+                                    if dataset_name_component is not None:
+                                        dataset["name"] = Metadata.id_to_title(dataset_name_component)
+                                    if org_component is not None:
+                                        dataset["organization"] = Metadata.id_to_title(org_component)
+                                    if version is not None:
+                                        dataset["version"] = version
+                        else:
+                            # Likely a Hugging Face ID
+                            dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
+                            # Populate dataset dictionary with extracted components
+                            if dataset_name_component is not None:
+                                dataset["name"] = Metadata.id_to_title(dataset_name_component)
+                            if org_component is not None:
+                                dataset["organization"] = Metadata.id_to_title(org_component)
+                            if version is not None:
+                                dataset["version"] = version
+                            if org_component is not None and dataset_name_component is not None:
+                                dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
+                    elif isinstance(dataset_id, dict):
+                        dataset = dataset_id
+                    else:
+                        logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
+                    metadata.datasets.append(dataset)
             use_model_card_metadata("license", "license")
             use_model_card_metadata("license_name", "license_name")
             use_model_card_metadata("license_link", "license_link")
@@ -386,9 +476,6 @@ class Metadata:
             use_array_model_card_metadata("languages", "languages")
             use_array_model_card_metadata("languages", "language")
-            use_array_model_card_metadata("datasets", "datasets")
-            use_array_model_card_metadata("datasets", "dataset")
         # Hugging Face Parameter Heuristics
         ####################################
@@ -458,7 +545,10 @@ class Metadata:
             gguf_writer.add_size_label(self.size_label)
         if self.license is not None:
-            gguf_writer.add_license(self.license)
+            if isinstance(self.license, list):
+                gguf_writer.add_license(",".join(self.license))
+            else:
+                gguf_writer.add_license(self.license)
         if self.license_name is not None:
             gguf_writer.add_license_name(self.license_name)
         if self.license_link is not None:
@@ -493,6 +583,8 @@ class Metadata:
                     gguf_writer.add_base_model_version(key, base_model_entry["version"])
                 if "organization" in base_model_entry:
                     gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
+                if "description" in base_model_entry:
+                    gguf_writer.add_base_model_description(key, base_model_entry["description"])
                 if "url" in base_model_entry:
                     gguf_writer.add_base_model_url(key, base_model_entry["url"])
                 if "doi" in base_model_entry:
@@ -502,9 +594,29 @@ class Metadata:
                 if "repo_url" in base_model_entry:
                     gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
+        if self.datasets is not None:
+            gguf_writer.add_dataset_count(len(self.datasets))
+            for key, dataset_entry in enumerate(self.datasets):
+                if "name" in dataset_entry:
+                    gguf_writer.add_dataset_name(key, dataset_entry["name"])
+                if "author" in dataset_entry:
+                    gguf_writer.add_dataset_author(key, dataset_entry["author"])
+                if "version" in dataset_entry:
+                    gguf_writer.add_dataset_version(key, dataset_entry["version"])
+                if "organization" in dataset_entry:
+                    gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
+                if "description" in dataset_entry:
+                    gguf_writer.add_dataset_description(key, dataset_entry["description"])
+                if "url" in dataset_entry:
+                    gguf_writer.add_dataset_url(key, dataset_entry["url"])
+                if "doi" in dataset_entry:
+                    gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
+                if "uuid" in dataset_entry:
+                    gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
+                if "repo_url" in dataset_entry:
+                    gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
         if self.tags is not None:
             gguf_writer.add_tags(self.tags)
         if self.languages is not None:
             gguf_writer.add_languages(self.languages)
-        if self.datasets is not None:
-            gguf_writer.add_datasets(self.datasets)

bigdl/cpp/gguf-py/gguf/tensor_mapping.py CHANGED Viewed

@@ -13,7 +13,7 @@ class TensorNameMap:
             "transformer.wte",                           # gpt2 gpt-j mpt refact qwen dbrx jais exaone
             "transformer.word_embeddings",               # falcon
             "word_embeddings",                           # bloom
-            "model.embed_tokens",                        # llama-hf nemotron olmoe
+            "model.embed_tokens",                        # llama-hf nemotron olmoe olmo2
             "tok_embeddings",                            # llama-pth
             "embeddings.word_embeddings",                # bert nomic-bert
             "language_model.embedding.word_embeddings",  # persimmon
@@ -54,7 +54,7 @@ class TensorNameMap:
         # Output
         MODEL_TENSOR.OUTPUT: (
             "embed_out",                 # gptneox
-            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe
+            "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
             "output",                    # llama-pth bloom internlm2
             "word_embeddings_for_head",  # persimmon
             "lm_head.linear",            # phi2
@@ -66,7 +66,7 @@ class TensorNameMap:
         MODEL_TENSOR.OUTPUT_NORM: (
             "gpt_neox.final_layer_norm",               # gptneox
             "transformer.ln_f",                        # gpt2 gpt-j falcon jais exaone
-            "model.norm",                              # llama-hf baichuan internlm2 olmoe
+            "model.norm",                              # llama-hf baichuan internlm2 olmoe olmo2
             "norm",                                    # llama-pth
             "transformer.norm_f",                      # mpt dbrx
             "ln_f",                                    # refact bloom qwen gpt2
@@ -145,7 +145,8 @@ class TensorNameMap:
         # Attention query
         MODEL_TENSOR.ATTN_Q: (
-            "model.layers.{bid}.self_attn.q_proj",                       # llama-hf nemotron olmoe
+            "model.layers.{bid}.self_attn.q_proj",                       # llama-hf nemotron olmoe olmo2
+            "model.layers.{bid}.self_attn.q_proj_no_perm",               # llama-custom
             "layers.{bid}.attention.wq",                                 # llama-pth
             "encoder.layer.{bid}.attention.self.query",                  # bert
             "transformer.h.{bid}.attn.q_proj",                           # gpt-j
@@ -157,7 +158,8 @@ class TensorNameMap:
         # Attention key
         MODEL_TENSOR.ATTN_K: (
-            "model.layers.{bid}.self_attn.k_proj",                     # llama-hf nemotron olmoe
+            "model.layers.{bid}.self_attn.k_proj",                     # llama-hf nemotron olmoe olmo2
+            "model.layers.{bid}.self_attn.k_proj_no_perm",             # llama-custom
             "layers.{bid}.attention.wk",                               # llama-pth
             "encoder.layer.{bid}.attention.self.key",                  # bert
             "transformer.h.{bid}.attn.k_proj",                         # gpt-j
@@ -170,7 +172,7 @@ class TensorNameMap:
         # Attention value
         MODEL_TENSOR.ATTN_V: (
-            "model.layers.{bid}.self_attn.v_proj",                       # llama-hf nemotron olmoe
+            "model.layers.{bid}.self_attn.v_proj",                       # llama-hf nemotron olmoe olmo2
             "layers.{bid}.attention.wv",                                 # llama-pth
             "encoder.layer.{bid}.attention.self.value",                  # bert
             "transformer.h.{bid}.attn.v_proj",                           # gpt-j
@@ -188,7 +190,7 @@ class TensorNameMap:
             "transformer.blocks.{bid}.attn.out_proj",                       # mpt
             "transformer.h.{bid}.self_attention.dense",                     # falcon
             "h.{bid}.self_attention.dense",                                 # bloom
-            "model.layers.{bid}.self_attn.o_proj",                          # llama-hf nemotron olmoe
+            "model.layers.{bid}.self_attn.o_proj",                          # llama-hf nemotron olmoe olmo2
             "layers.{bid}.attention.wo",                                    # llama-pth
             "encoder.layer.{bid}.attention.output.dense",                   # bert
             "transformer.h.{bid}.attn.out_proj",                            # gpt-j
@@ -215,7 +217,7 @@ class TensorNameMap:
         ),
         MODEL_TENSOR.ATTN_POST_NORM: (
-            "model.layers.{bid}.post_attention_layernorm",     # gemma2
+            "model.layers.{bid}.post_attention_layernorm",     # gemma2 olmo2
         ),
         # Rotary embeddings
@@ -250,7 +252,7 @@ class TensorNameMap:
         # Post feed-forward norm
         MODEL_TENSOR.FFN_POST_NORM: (
-            "model.layers.{bid}.post_feedforward_layernorm", # gemma2
+            "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
         ),
         MODEL_TENSOR.FFN_GATE_INP: (
@@ -273,7 +275,7 @@ class TensorNameMap:
             "transformer.blocks.{bid}.ffn.up_proj",                   # mpt
             "transformer.h.{bid}.mlp.dense_h_to_4h",                  # falcon
             "h.{bid}.mlp.dense_h_to_4h",                              # bloom
-            "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact nemotron
+            "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact nemotron olmo2
             "layers.{bid}.feed_forward.w3",                           # llama-pth
             "encoder.layer.{bid}.intermediate.dense",                 # bert
             "transformer.h.{bid}.mlp.fc_in",                          # gpt-j
@@ -314,7 +316,7 @@ class TensorNameMap:
         # Feed-forward gate
         MODEL_TENSOR.FFN_GATE: (
-            "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact
+            "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact olmo2
             "layers.{bid}.feed_forward.w1",               # llama-pth
             "transformer.h.{bid}.mlp.w2",                 # qwen
             "transformer.h.{bid}.mlp.c_fc2",              # jais
@@ -346,7 +348,7 @@ class TensorNameMap:
             "transformer.blocks.{bid}.ffn.down_proj",                 # mpt
             "transformer.h.{bid}.mlp.dense_4h_to_h",                  # falcon
             "h.{bid}.mlp.dense_4h_to_h",                              # bloom
-            "model.layers.{bid}.mlp.down_proj",                       # llama-hf nemotron
+            "model.layers.{bid}.mlp.down_proj",                       # llama-hf nemotron olmo2
             "layers.{bid}.feed_forward.w2",                           # llama-pth
             "encoder.layer.{bid}.output.dense",                       # bert
             "transformer.h.{bid}.mlp.fc_out",                         # gpt-j
@@ -383,7 +385,7 @@ class TensorNameMap:
         MODEL_TENSOR.ATTN_Q_NORM: (
             "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
             "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon
-            "model.layers.{bid}.self_attn.q_norm",                            # cohere olmoe chameleon
+            "model.layers.{bid}.self_attn.q_norm",                            # cohere olmoe chameleon olmo2
             "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion
             "encoder.layer.{bid}.attention.self.layer_norm_q",                # jina-bert-v2
             "transformer.layers.{bid}.attn.q_norm",                           # openelm
@@ -392,7 +394,7 @@ class TensorNameMap:
         MODEL_TENSOR.ATTN_K_NORM: (
             "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
             "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon
-            "model.layers.{bid}.self_attn.k_norm",                            # cohere olmoe chameleon
+            "model.layers.{bid}.self_attn.k_norm",                            # cohere olmoe chameleon olmo2
             "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion
             "encoder.layer.{bid}.attention.self.layer_norm_k",                # jina-bert-v2
             "transformer.layers.{bid}.attn.k_norm",                           # openelm
@@ -766,4 +768,4 @@ class TensorNameMap:
 def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
-    return TensorNameMap(arch, n_blocks)
+    return TensorNameMap(arch, n_blocks)

bigdl/cpp/gguf-py/gguf/vocab.py CHANGED Viewed

@@ -122,8 +122,30 @@ class SpecialVocab:
                 tokenizer = json.load(f)
             if self.load_merges:
                 merges = tokenizer.get('model', {}).get('merges')
-                if isinstance(merges, list) and merges and isinstance(merges[0], str):
-                    self.merges = merges
+                if isinstance(merges, list) and merges:
+                    if isinstance(merges[0], str):
+                        self.merges = merges
+                    elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
+                        # New format since transformers 4.45 to support spaces in merges
+                        # ref: https://github.com/ggerganov/llama.cpp/issues/9692
+                        # TODO: internally store as the new format instead of converting to old
+                        if any(' ' in s for pair in merges for s in pair):
+                            logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
+                        self.merges = [
+                            ' '.join(
+                                [
+                                    # ensure the spaces are properly encoded
+                                    ''.join(
+                                        chr(ord(c) + 256) if c == ' ' else c
+                                        for c in part
+                                    )
+                                    for part in pair
+                                ]
+                            )
+                            for pair in merges
+                        ]
+                    else:
+                        raise ValueError("Unknown tokenizer merges format")
             added_tokens = tokenizer.get('added_tokens', {})
         else:
             added_tokens = {}

bigdl/cpp/libs/common.lib CHANGED Viewed

Binary file

bigdl/cpp/libs/ggml-cpu.dll ADDED Viewed

Binary file

bigdl/cpp/libs/ggml-sycl.dll ADDED Viewed

Binary file

bigdl/cpp/libs/ggml.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-batched.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-bench.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-cli.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-embedding.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-gguf.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-llava-cli.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-lookup.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-ls-sycl-device.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-minicpmv-cli.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-perplexity.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-quantize.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-server.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-simple.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-speculative.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-tokenize.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/llava_shared.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/ollama-ggml-base.dll ADDED Viewed

Binary file

bigdl/cpp/libs/ollama-ggml-cpu.dll ADDED Viewed

Binary file

bigdl/cpp/libs/ollama-ggml-sycl.dll ADDED Viewed

Binary file

bigdl/cpp/libs/ollama.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ollama_ggml.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/ollama_llama.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/ollama_llava_shared.dll CHANGED Viewed

Binary file

{bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-ollama.bat RENAMED Viewed

@@ -6,7 +6,7 @@ set "cpp_dir=%cpp_dir:~0,-1%"
 set "lib_dir=%cpp_dir%\libs"
 :: Create symlinks for DLLs and EXE
-for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll libc++.dll) do (
+for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll ollama-ggml-base.dll ollama-ggml-cpu.dll ollama-ggml-sycl.dll libc++.dll) do (
     if exist "%cd%\%%f" del /f "%cd%\%%f"
     mklink "%cd%\%%f" "%lib_dir%\%%f"
 )

{bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: bigdl-core-cpp
-Version: 2.6.0b20250204
+Version: 2.6.0b20250204.post0
 Summary: Large Language Model Develop Toolkit
 Author: BigDL Authors
 License: Apache License, Version 2.0

bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,54 @@
+bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bigdl/cpp/convert_hf_to_gguf.py,sha256=HVJ6axht-K3DAmmkVkC1T1_vZJ8FprUwfNpXJKnLsUQ,210336
+bigdl/cpp/convert_hf_to_gguf_update.py,sha256=4A9Q4oLh5tZAovmgKgS7bVlcCqcVm4j0SIwGK_lww9s,17004
+bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
+bigdl/cpp/convert_lora_to_gguf.py,sha256=b2CUmTK-ztrJE_50DzsXK3SRZshr_LYFyUPn0UDnkiA,17270
+bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
+bigdl/cpp/gguf-py/gguf/constants.py,sha256=hN1QPaL74Ef8iJXFE2hlRJmG1w43a2E6HwB0xCxp8ic,61425
+bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
+bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=PUrx08ZwaUOz1gLw5JQ459Hi7JIeCdlHgZX7wXcTqbI,12702
+bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=e-8gwsdq0sipd8zzrXvvtVWV7mCaQD9fRsCn6_67CNs,38541
+bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
+bigdl/cpp/gguf-py/gguf/metadata.py,sha256=oBTb4DXi_h1L_gYm8x_JRVuEPR4GHlVHuM-iN0OxWoY,33244
+bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
+bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=tUkpDyeMXwf9M1Cx1HL2g6mMavngsj0UYkeWMaOPjQU,35561
+bigdl/cpp/gguf-py/gguf/utility.py,sha256=LAwLstUlosYQ1oX9LlJZ-1uCmwyNtOFcJfXpkLnju0k,3003
+bigdl/cpp/gguf-py/gguf/vocab.py,sha256=ulUC8XudFDuZC2SNKGNQpsU2KFP_YhkyNLIWV16jG6I,20816
+bigdl/cpp/libs/common.lib,sha256=ELuS4xoqHJYsjeta8zWKSOKDZHqrnAwuWa3blD7O4F8,3955904
+bigdl/cpp/libs/ggml-cpu.dll,sha256=WxBkX7Smps1FOcCaDfzxMAKlUgLTqfKvVb7kEt2GAdo,475648
+bigdl/cpp/libs/ggml-sycl.dll,sha256=y0xMUN5r3kn1idTnWep_HOSp3OEMByiq-MYfFUl5Fms,5280768
+bigdl/cpp/libs/ggml.dll,sha256=A8buG4Wm9lJ4byLJAaqfrjacjigfq5aLuoOMU3Gs0Lw,113152
+bigdl/cpp/libs/libc++.dll,sha256=U0TVK2WfFQIJPP6Bz9SeJmgskm2iqZWJorx_DGdfKIw,1561600
+bigdl/cpp/libs/llama-batched.exe,sha256=OXRvTB7SgBF3kSzluFaZ9T6AYLcRIJ-3fDGBQYMSADM,847872
+bigdl/cpp/libs/llama-bench.exe,sha256=MZM1q--5V9DMr_rQjRpe54Hjbj1hzh6B87QqA_cHS0s,277504
+bigdl/cpp/libs/llama-cli.exe,sha256=SbG_NSl6VY7ChmwRD2RYgxO-QI8tiTfzeoj3tq8WkHE,922624
+bigdl/cpp/libs/llama-embedding.exe,sha256=msgusVBW43uLEBxq26Vjg8KKoiHPBGNiCvQR4s7HnfE,870400
+bigdl/cpp/libs/llama-gguf.exe,sha256=ijuXVYdZSeRcDr35M-DmKS5sJQ0Ab_n_b0z5SQU_x9k,58880
+bigdl/cpp/libs/llama-llava-cli.exe,sha256=fqgtEg_c2uXUTgmGQWJ9pFP0ie75_GkswP56qBWGZ4w,1109504
+bigdl/cpp/libs/llama-lookup.exe,sha256=QCPdyiI8AsFFXjAnX1GohxOiFKqPRLoL7_G9K2ihN7Y,904704
+bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=IfpDycbUBZgmohCtMUNI3k5hDpg1QkqimyVuYPMk_dw,10240
+bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=j-Tk3T7Kb6N0ccR2F-UTBPuxjFYHfSs5WbfSc9HAGhQ,1107456
+bigdl/cpp/libs/llama-perplexity.exe,sha256=ZEEjyN3JnTjLBwb2DtM4SOHFbgH-HzdfJlKZX_sQIhw,990720
+bigdl/cpp/libs/llama-quantize.exe,sha256=5bKWjuDw3kbjCuwN0iPOobph6rfU-j0os_dYOFG0zHI,122880
+bigdl/cpp/libs/llama-server.exe,sha256=32cg0YZkZTJQ24Vst7kO3RhCDN1BoOrUQUwBVPJgUes,2148864
+bigdl/cpp/libs/llama-simple.exe,sha256=nIQ7VJ_I1Fqc9gsCDt_BVx8M-lIiLh_byycNPzH6lqw,62464
+bigdl/cpp/libs/llama-speculative.exe,sha256=xeDoKmoJPae-r1FBTApS-z5K1AhWA13lbpUQdqGjSpg,907264
+bigdl/cpp/libs/llama-tokenize.exe,sha256=cJLtVEuYfli42CJYGet0beV6m6pW7r2m44QtgOqlRt0,88576
+bigdl/cpp/libs/llama.dll,sha256=2Wgwo2UBczq2s6SBP9TBZMxJMmr_P8CfobB0NTlSkis,1404928
+bigdl/cpp/libs/llava_shared.dll,sha256=vBVJuzZbXjf0W14a5ZO415Wj1-qiwT_Qcex-v71RjVQ,365056
+bigdl/cpp/libs/ollama-ggml-base.dll,sha256=2yGQxfoge3KDesOWcwa8ncFMvyvg7Jd8pl_pvMN6w5w,459776
+bigdl/cpp/libs/ollama-ggml-cpu.dll,sha256=iQEVXQK_tHmGY4UpMrPPj6G9ssbPxHeJQR0s5rqG6cU,475648
+bigdl/cpp/libs/ollama-ggml-sycl.dll,sha256=Y_0PbkDf48ypCVA71QLXap_Wgbd5n76m4gKQEt64NFk,5280768
+bigdl/cpp/libs/ollama.exe,sha256=tTdj_U3k2XHx4R2e7Z_tTOaqnOtQAJQqWNdo6oxTnHU,25979904
+bigdl/cpp/libs/ollama_ggml.dll,sha256=TPiPOCTK2Lfcf4X03NIVwiYTMPeYAKX9VTmeI9b3RKc,113152
+bigdl/cpp/libs/ollama_llama.dll,sha256=UGD_a9ok5m0vnLGtF2cSHwnQJRAV6XaGDBMMdIT0g00,1427968
+bigdl/cpp/libs/ollama_llava_shared.dll,sha256=3e7S_cVWEvfUUEUYWUmTpeDsMS7Wr5_Hofc4lAUmK_w,365056
+bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
+bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
+bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-ollama.bat,sha256=DKWd9wFMT7hcNVQViMxG3CnqF4iZXCh2BmuS3zQjAuw,642
+bigdl_core_cpp-2.6.0b20250204.post0.dist-info/METADATA,sha256=mLLb2aUYHIfuJCKqAxZ2Wy1s8hPohkWVoh8m2gGrn90,756
+bigdl_core_cpp-2.6.0b20250204.post0.dist-info/WHEEL,sha256=2wr--P33L_Xt79Mrb57-zn6CrTlNaEVHEwbOduMxJRg,97
+bigdl_core_cpp-2.6.0b20250204.post0.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
+bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD,,

bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe DELETED Viewed

Binary file